From 98e9cf6e0b40ee69755ea728aa4f13ee094199bc Mon Sep 17 00:00:00 2001
From: wqymi <wangqiying@xiaomi.com>
Date: Mon, 22 Jun 2026 15:47:30 +0800
Subject: [PATCH 1/5] fix(provider): pin prompt-cache breakpoints to rolling
 head

The applyCaching message-level strategy placed breakpoints at a drifting
midpoint and before-last-user, positions that shift every turn and never
cache the request tail. Since prompt caching is longest-common-prefix
based, only a marker pinned to the end grows the cached prefix each turn,
so the old markers spent the budget without improving hit rate (and often
landed on assistant/tool messages dropped by openai-compatible proxies).

Replace them with a stable two-breakpoint scheme: last system message +
last message (rolling head), mirroring upstream cc. Add a multi-turn test
asserting only those two positions are marked.
---
 packages/opencode/src/provider/transform.ts   | 24 +++++++--------
 .../opencode/test/provider/transform.test.ts  | 30 +++++++++++++++++++
 2 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts
index 009b8fbaf..02dfdcb59 100644
--- a/packages/opencode/src/provider/transform.ts
+++ b/packages/opencode/src/provider/transform.ts
@@ -283,24 +283,24 @@ function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage
     },
   }
 
-  // Strategy: place cache breakpoints at stable prefix boundaries (max 4 allowed by Anthropic)
-  // 1. Last system message — system prompt never changes
-  // 2. Midpoint of conversation history — long prefix second-level cache
-  // 3. Message before the last user message — stable history boundary
+  // Strategy: prefix cache is longest-common-prefix based, so the only marker
+  // that grows the cached prefix every turn is one pinned to the *end* of the
+  // request. We place two stable breakpoints (max 4 allowed by Anthropic):
+  // 1. Last system message — the immutable prompt prefix.
+  // 2. The last message — "rolling head". Writing the cache up to the current
+  //    tail makes the entire history a cache *read* on the next turn.
+  //
+  // We deliberately do NOT place markers at a drifting midpoint or at
+  // before-last-user: those indices shift every turn, land on assistant/tool
+  // messages (silently dropped by openai-compatible proxies), and spend the
+  // breakpoint budget without ever caching the tail. See internal/docs/cache-policy.md.
   const targets: ModelMessage[] = []
 
   const systemMsgs = msgs.filter((msg) => msg.role === "system")
   if (systemMsgs.length > 0) targets.push(systemMsgs[systemMsgs.length - 1])
 
   const nonSystem = msgs.filter((msg) => msg.role !== "system")
-  const lastUserIdx = nonSystem.findLastIndex((msg) => msg.role === "user")
-  if (lastUserIdx >= 1) {
-    targets.push(nonSystem[lastUserIdx - 1])
-    const midpoint = Math.floor(lastUserIdx / 2)
-    if (midpoint > 0 && midpoint < lastUserIdx - 1) targets.push(nonSystem[midpoint])
-  } else if (lastUserIdx === 0) {
-    targets.push(nonSystem[0])
-  }
+  if (nonSystem.length > 0) targets.push(nonSystem[nonSystem.length - 1])
 
   for (const msg of unique(targets)) {
     const useMessageLevelOptions =
diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts
index 89231f27f..d7319f754 100644
--- a/packages/opencode/test/provider/transform.test.ts
+++ b/packages/opencode/test/provider/transform.test.ts
@@ -2172,6 +2172,36 @@ describe("ProviderTransform.message - cache control on gateway", () => {
 
     expect(result[0].providerOptions).toBeUndefined()
   })
+
+  test("multi-turn anthropic pins breakpoints to last system + last message only", () => {
+    const model = createModel({
+      providerID: "anthropic",
+      api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" },
+    })
+    const msgs = [
+      { role: "system", content: "You are a helpful assistant" },
+      { role: "user", content: "first question" },
+      { role: "assistant", content: "first answer" },
+      { role: "user", content: "second question" },
+      { role: "assistant", content: "second answer" },
+      { role: "user", content: "third question" },
+    ] as any[]
+
+    const result = ProviderTransform.message(msgs, model, {}) as any[]
+
+    // Only the last system message and the last message carry a breakpoint.
+    const marked = result
+      .map((msg, index) => ({ index, role: msg.role, hasCache: !!msg.providerOptions?.anthropic?.cacheControl }))
+      .filter((m) => m.hasCache)
+
+    expect(marked).toEqual([
+      { index: 0, role: "system", hasCache: true },
+      { index: 5, role: "user", hasCache: true },
+    ])
+    // No drifting midpoint / before-last-user markers on intermediate assistant turns.
+    expect(result[2].providerOptions?.anthropic).toBeUndefined()
+    expect(result[4].providerOptions?.anthropic).toBeUndefined()
+  })
 })
 
 describe("ProviderTransform.variants", () => {

From 66cba56cdfb9a7b76352b0219adbdacbd00f9b3e Mon Sep 17 00:00:00 2001
From: wqymi <wangqiying@xiaomi.com>
Date: Mon, 22 Jun 2026 22:22:49 +0800
Subject: [PATCH 2/5] fix(provider): double-marker rolling tail +
 tool-definition cache breakpoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A single tail breakpoint only hits cache when a turn appends fewer than 20
content blocks (Anthropic's backward lookback window). Agentic turns with
multiple tool calls routinely exceed that, pushing the prior write out of the
window — and on openai-compatible proxies a lone marker landing on an assistant
message is silently dropped. Both break the message cache.

Mark the last TWO non-system messages instead (rolling double buffer): the
prior turn's tail marker survives as the read point while the new tail is the
next write, and the second marker also survives single-step tool-call retries
and user-initiated session forks. This mirrors openclacky's double-marker
strategy for proxied Claude.

Also add ProviderTransform.tools(): the cache hierarchy is tools -> system ->
messages, so marking the last tool caches the whole tool-schema block as a
stable prefix. Tools bypass message()'s providerID->SDK-key remap, so the
marker is written under the SDK key (anthropic cacheControl / bedrock
cachePoint) directly in llm.ts before streamText.

Update the multi-turn test to assert the two tail markers and add coverage for
the tools breakpoint (anthropic/bedrock shapes, 1h TTL, unsupported providers).
---
 packages/opencode/src/provider/transform.ts   | 53 +++++++++---
 packages/opencode/src/session/llm.ts          |  2 +-
 .../opencode/test/provider/transform.test.ts  | 81 ++++++++++++++++++-
 3 files changed, 120 insertions(+), 16 deletions(-)

diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts
index 02dfdcb59..1dc2379c9 100644
--- a/packages/opencode/src/provider/transform.ts
+++ b/packages/opencode/src/provider/transform.ts
@@ -283,24 +283,31 @@ function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage
     },
   }
 
-  // Strategy: prefix cache is longest-common-prefix based, so the only marker
-  // that grows the cached prefix every turn is one pinned to the *end* of the
-  // request. We place two stable breakpoints (max 4 allowed by Anthropic):
+  // Strategy: prefix caching is longest-common-prefix based with a backward
+  // lookback window (Anthropic walks back ~20 blocks from a breakpoint to find
+  // a prior write). The markers that grow the cached prefix are pinned to the
+  // *tail* of the request. We place up to three stable breakpoints (Anthropic
+  // allows max 4):
   // 1. Last system message — the immutable prompt prefix.
-  // 2. The last message — "rolling head". Writing the cache up to the current
-  //    tail makes the entire history a cache *read* on the next turn.
-  //
-  // We deliberately do NOT place markers at a drifting midpoint or at
-  // before-last-user: those indices shift every turn, land on assistant/tool
-  // messages (silently dropped by openai-compatible proxies), and spend the
-  // breakpoint budget without ever caching the tail. See internal/docs/cache-policy.md.
+  // 2+3. The last TWO messages — a "rolling double buffer". Each turn marks
+  //      messages[-2] and messages[-1]; next turn the old [-1] is now [-2] and
+  //      still carries its marker, so the lookback gets a cache READ hit, while
+  //      the new [-1] is the WRITE for the turn after. One marker alone breaks
+  //      when an agentic turn appends >20 content blocks (tool spam pushes the
+  //      prior write outside the lookback window) or when a tool-call retry /
+  //      interrupt discards the last message — the second marker survives both.
+  //      A third tail marker would write a segment never read independently, so
+  //      two is the minimum that covers the old-tail/new-tail boundary.
+  // We deliberately do NOT mark a drifting midpoint or a fixed before-last-user
+  // INDEX: those shift every turn without tracking the tail. See
+  // internal/docs/cache-policy.md.
   const targets: ModelMessage[] = []
 
   const systemMsgs = msgs.filter((msg) => msg.role === "system")
   if (systemMsgs.length > 0) targets.push(systemMsgs[systemMsgs.length - 1])
 
   const nonSystem = msgs.filter((msg) => msg.role !== "system")
-  if (nonSystem.length > 0) targets.push(nonSystem[nonSystem.length - 1])
+  for (const msg of nonSystem.slice(-2)) targets.push(msg)
 
   for (const msg of unique(targets)) {
     const useMessageLevelOptions =
@@ -450,6 +457,30 @@ export function message(msgs: ModelMessage[], model: Provider.Model, options: Re
   return msgs
 }
 
+// Place a cache breakpoint on the tool definitions. The cache hierarchy is
+// `tools` → `system` → `messages`, so marking the LAST tool caches the entire
+// tool-schema block (often several KB) as a stable prefix that sits in front of
+// the system + message caches. Tools are passed to the SDK separately from
+// `message()` and never go through its providerID→SDK-key remap, so we write
+// the marker under the SDK key directly. Tool registration order is stable
+// (insertion order of the tools record), so "last tool" is deterministic.
+export function tools<T extends Record<string, any>>(tools: T, model: Provider.Model): T {
+  if (!supportsCacheMarkers(model)) return tools
+  const names = Object.keys(tools)
+  if (names.length === 0) return tools
+
+  const ttl = model.cachePromptTTL === "1h" ? { ttl: "1h" as const } : {}
+  const marker = iife(() => {
+    if (model.api.npm === "@ai-sdk/amazon-bedrock") return { bedrock: { cachePoint: { type: "default" } } }
+    const key = sdkKey(model.api.npm) ?? model.providerID
+    return { [key]: { cacheControl: { type: "ephemeral", ...ttl } } }
+  })
+
+  const last = tools[names[names.length - 1]]
+  last.providerOptions = mergeDeep(last.providerOptions ?? {}, marker)
+  return tools
+}
+
 export function temperature(model: Provider.Model) {
   const id = model.id.toLowerCase()
   if (id.includes("qwen")) return 0.55
diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts
index 856ba1c7a..98f20dcb5 100644
--- a/packages/opencode/src/session/llm.ts
+++ b/packages/opencode/src/session/llm.ts
@@ -583,7 +583,7 @@ const live: Layer.Layer<
         topK: params.topK,
         providerOptions: ProviderTransform.providerOptions(input.model, params.options),
         activeTools: Object.keys(tools).filter((x) => x !== "invalid"),
-        tools,
+        tools: ProviderTransform.tools(tools, input.model),
         toolChoice: input.toolChoice,
         maxOutputTokens: params.maxOutputTokens,
         abortSignal: input.abort,
diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts
index d7319f754..2d3a061c8 100644
--- a/packages/opencode/test/provider/transform.test.ts
+++ b/packages/opencode/test/provider/transform.test.ts
@@ -2173,7 +2173,7 @@ describe("ProviderTransform.message - cache control on gateway", () => {
     expect(result[0].providerOptions).toBeUndefined()
   })
 
-  test("multi-turn anthropic pins breakpoints to last system + last message only", () => {
+  test("multi-turn anthropic pins breakpoints to last system + last two messages", () => {
     const model = createModel({
       providerID: "anthropic",
       api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" },
@@ -2189,18 +2189,91 @@ describe("ProviderTransform.message - cache control on gateway", () => {
 
     const result = ProviderTransform.message(msgs, model, {}) as any[]
 
-    // Only the last system message and the last message carry a breakpoint.
+    // The last system message plus the last TWO messages carry a breakpoint
+    // (rolling double buffer): the prior turn's tail marker survives as the
+    // read point while the new tail marker is the next write.
     const marked = result
       .map((msg, index) => ({ index, role: msg.role, hasCache: !!msg.providerOptions?.anthropic?.cacheControl }))
       .filter((m) => m.hasCache)
 
     expect(marked).toEqual([
       { index: 0, role: "system", hasCache: true },
+      { index: 4, role: "assistant", hasCache: true },
       { index: 5, role: "user", hasCache: true },
     ])
-    // No drifting midpoint / before-last-user markers on intermediate assistant turns.
+    // No drifting midpoint marker on earlier turns.
     expect(result[2].providerOptions?.anthropic).toBeUndefined()
-    expect(result[4].providerOptions?.anthropic).toBeUndefined()
+    expect(result[3].providerOptions?.anthropic).toBeUndefined()
+  })
+})
+
+describe("ProviderTransform.tools", () => {
+  const createModel = (overrides: Partial<any> = {}): any => ({
+    id: "test/test-model",
+    providerID: "test",
+    api: { id: "test-model", url: "https://api.test.com", npm: "@ai-sdk/openai" },
+    name: "Test Model",
+    ...overrides,
+  })
+
+  test("marks the last tool for anthropic", () => {
+    const model = createModel({
+      providerID: "anthropic",
+      api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" },
+    })
+    const tools = { read: {}, write: {}, bash: {} } as Record<string, any>
+
+    const result = ProviderTransform.tools(tools, model)
+
+    expect(result.read.providerOptions).toBeUndefined()
+    expect(result.write.providerOptions).toBeUndefined()
+    expect(result.bash.providerOptions).toEqual({ anthropic: { cacheControl: { type: "ephemeral" } } })
+  })
+
+  test("threads cachePromptTTL 1h into the tool marker", () => {
+    const model = createModel({
+      providerID: "anthropic",
+      api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" },
+      cachePromptTTL: "1h",
+    })
+    const tools = { read: {}, bash: {} } as Record<string, any>
+
+    const result = ProviderTransform.tools(tools, model)
+
+    expect(result.bash.providerOptions).toEqual({ anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } } })
+  })
+
+  test("uses cachePoint shape for bedrock", () => {
+    const model = createModel({
+      providerID: "amazon-bedrock",
+      api: { id: "anthropic.claude-sonnet-4", url: "https://api.test.com", npm: "@ai-sdk/amazon-bedrock" },
+    })
+    const tools = { read: {}, bash: {} } as Record<string, any>
+
+    const result = ProviderTransform.tools(tools, model)
+
+    expect(result.bash.providerOptions).toEqual({ bedrock: { cachePoint: { type: "default" } } })
+  })
+
+  test("no marker for providers that do not support cache markers", () => {
+    const model = createModel({
+      providerID: "openai",
+      api: { id: "gpt-4", url: "https://api.openai.com", npm: "@ai-sdk/openai" },
+    })
+    const tools = { read: {}, bash: {} } as Record<string, any>
+
+    const result = ProviderTransform.tools(tools, model)
+
+    expect(result.read.providerOptions).toBeUndefined()
+    expect(result.bash.providerOptions).toBeUndefined()
+  })
+
+  test("no-op on empty tools", () => {
+    const model = createModel({
+      providerID: "anthropic",
+      api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" },
+    })
+    expect(ProviderTransform.tools({}, model)).toEqual({})
   })
 })
 

From 4f86c40846a671868b8b9d43afc0914b7ad4ece7 Mon Sep 17 00:00:00 2001
From: wqymi <wangqiying@xiaomi.com>
Date: Mon, 22 Jun 2026 22:35:22 +0800
Subject: [PATCH 3/5] fix(provider): skip assistant turns when selecting cache
 markers on proxy path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The double rolling-tail selection used a blind last-2 (nonSystem.slice(-2)).
On openai-compatible / content-level providers, cache_control on an assistant
message is silently dropped, so when the tail is [..., user, assistant] one of
the two markers evaporates and the double buffer collapses to a single marker —
re-triggering the original low-hit-rate bug.

Select the last 2 cacheable messages by role: Anthropic/Bedrock honor
message-level assistant markers so take the last 2 outright; for everyone else
skip assistant turns and take the last 2 user/tool messages so both markers
survive. Tool results are role:"tool" after conversion and remain cacheable.

Add a content-level (OpenRouter) test asserting only the user turns are marked.
---
 packages/opencode/src/provider/transform.ts   | 20 +++++++++++--
 .../opencode/test/provider/transform.test.ts  | 30 +++++++++++++++++++
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts
index 1dc2379c9..9f3d53af2 100644
--- a/packages/opencode/src/provider/transform.ts
+++ b/packages/opencode/src/provider/transform.ts
@@ -298,16 +298,30 @@ function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage
   //      interrupt discards the last message — the second marker survives both.
   //      A third tail marker would write a segment never read independently, so
   //      two is the minimum that covers the old-tail/new-tail boundary.
+  //
+  //      Role matters on the openai-compatible proxy path: those proxies
+  //      silently DROP cache_control on assistant messages (see
+  //      internal/docs/cache-policy.md — 1170 assistant markers, 0 cached). A
+  //      blind last-2 would often land a marker on an assistant turn and lose
+  //      it, collapsing the double buffer back to a single (or zero) effective
+  //      marker. For providers that take message-level markers (Anthropic,
+  //      Bedrock) assistant markers are honored, so we take the last 2 outright.
+  //      For everyone else we take the last 2 *cacheable* messages (user/tool),
+  //      skipping assistant turns so both markers survive.
   // We deliberately do NOT mark a drifting midpoint or a fixed before-last-user
-  // INDEX: those shift every turn without tracking the tail. See
-  // internal/docs/cache-policy.md.
+  // INDEX: those shift every turn without tracking the tail.
   const targets: ModelMessage[] = []
 
   const systemMsgs = msgs.filter((msg) => msg.role === "system")
   if (systemMsgs.length > 0) targets.push(systemMsgs[systemMsgs.length - 1])
 
+  const assistantMarkersHonored =
+    model.providerID === "anthropic" ||
+    model.providerID.includes("bedrock") ||
+    model.api.npm === "@ai-sdk/amazon-bedrock"
   const nonSystem = msgs.filter((msg) => msg.role !== "system")
-  for (const msg of nonSystem.slice(-2)) targets.push(msg)
+  const cacheable = assistantMarkersHonored ? nonSystem : nonSystem.filter((msg) => msg.role !== "assistant")
+  for (const msg of cacheable.slice(-2)) targets.push(msg)
 
   for (const msg of unique(targets)) {
     const useMessageLevelOptions =
diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts
index 2d3a061c8..402458c91 100644
--- a/packages/opencode/test/provider/transform.test.ts
+++ b/packages/opencode/test/provider/transform.test.ts
@@ -2205,6 +2205,36 @@ describe("ProviderTransform.message - cache control on gateway", () => {
     expect(result[2].providerOptions?.anthropic).toBeUndefined()
     expect(result[3].providerOptions?.anthropic).toBeUndefined()
   })
+
+  test("non-anthropic content-level path skips assistant turns so both markers survive", () => {
+    // Providers that place markers at content level (e.g. OpenRouter / proxies)
+    // can have cache_control on assistant messages silently dropped, so a blind
+    // last-2 would lose a marker when the tail is [..., user, assistant]. The
+    // selector must pick the last 2 cacheable (user/tool) messages instead.
+    const model = createModel({
+      providerID: "openrouter",
+      api: { id: "anthropic/claude-sonnet-4", url: "https://openrouter.ai/api", npm: "@openrouter/ai-sdk-provider" },
+    })
+    const msgs = [
+      { role: "system", content: [{ type: "text", text: "sys" }] },
+      { role: "user", content: [{ type: "text", text: "first question" }] },
+      { role: "assistant", content: [{ type: "text", text: "first answer" }] },
+      { role: "user", content: [{ type: "text", text: "second question" }] },
+      { role: "assistant", content: [{ type: "text", text: "second answer" }] },
+    ] as any[]
+
+    const result = ProviderTransform.message(msgs, model, {}) as any[]
+
+    const hasMarker = (msg: any) =>
+      !!msg.providerOptions?.openrouter ||
+      msg.content?.some?.((c: any) => c.providerOptions?.openrouter)
+
+    // The two user turns (index 1, 3) are marked; the assistant turns are not.
+    expect(hasMarker(result[1])).toBe(true)
+    expect(hasMarker(result[3])).toBe(true)
+    expect(hasMarker(result[2])).toBe(false)
+    expect(hasMarker(result[4])).toBe(false)
+  })
 })
 
 describe("ProviderTransform.tools", () => {

From dbb520dd0de322faf162864bf0801ab6f6ae485f Mon Sep 17 00:00:00 2001
From: wqymi <wangqiying@xiaomi.com>
Date: Tue, 23 Jun 2026 14:02:39 +0800
Subject: [PATCH 4/5] revert(provider): drop role-based skip in cache-marker
 selection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous commit skipped assistant turns when selecting the two tail
breakpoints, on the theory that openai-compatible proxies silently drop
cache_control on assistant messages. That premise doesn't hold:

- providerOptions is namespaced per provider; a provider only reads its own
  namespace, so for protocols that don't consume cache_control (plain OpenAI),
  user AND assistant markers are equally absent from the payload — not an
  assistant-specific drop.
- supportsCacheMarkers already returns false for @ai-sdk/openai and
  @ai-sdk/openai-compatible, so those never reach applyCaching at all (the
  protocol whitelist already exists, mirroring upstream RESPECTS_INLINE_HINTS).
- The providers that do reach applyCaching (anthropic, bedrock, openrouter/
  copilot/alibaba on Claude) all honor message-level assistant markers, so
  skipping assistant just discards a valid breakpoint.

Select the last two messages by position again. Reframe the "why two" comment
around the real win: when the last message is removed (retry, Ctrl-C, user
edit/delete), the next-to-last marker is a still-present write the lookback
can land on, bounding eviction to the removed message rather than the whole
history. Cost is ~equal to a single marker (adjacent writes, no rewrite on hit).
---
 packages/opencode/src/provider/transform.ts   | 37 ++++++++-----------
 .../opencode/test/provider/transform.test.ts  | 15 ++++----
 2 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts
index 9f3d53af2..109e9b536 100644
--- a/packages/opencode/src/provider/transform.ts
+++ b/packages/opencode/src/provider/transform.ts
@@ -292,22 +292,22 @@ function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage
   // 2+3. The last TWO messages — a "rolling double buffer". Each turn marks
   //      messages[-2] and messages[-1]; next turn the old [-1] is now [-2] and
   //      still carries its marker, so the lookback gets a cache READ hit, while
-  //      the new [-1] is the WRITE for the turn after. One marker alone breaks
-  //      when an agentic turn appends >20 content blocks (tool spam pushes the
-  //      prior write outside the lookback window) or when a tool-call retry /
-  //      interrupt discards the last message — the second marker survives both.
-  //      A third tail marker would write a segment never read independently, so
-  //      two is the minimum that covers the old-tail/new-tail boundary.
+  //      the new [-1] is the WRITE for the turn after.
   //
-  //      Role matters on the openai-compatible proxy path: those proxies
-  //      silently DROP cache_control on assistant messages (see
-  //      internal/docs/cache-policy.md — 1170 assistant markers, 0 cached). A
-  //      blind last-2 would often land a marker on an assistant turn and lose
-  //      it, collapsing the double buffer back to a single (or zero) effective
-  //      marker. For providers that take message-level markers (Anthropic,
-  //      Bedrock) assistant markers are honored, so we take the last 2 outright.
-  //      For everyone else we take the last 2 *cacheable* messages (user/tool),
-  //      skipping assistant turns so both markers survive.
+  //      Why two and not one: the second (next-to-last) marker is the safety
+  //      net for the tail boundary. When the last message is removed — a
+  //      tool-call retry, a Ctrl-C, or the user editing/deleting their latest
+  //      message — a lone tail marker disappears with it, and how much of the
+  //      surrounding prefix the provider then evicts depends on the upstream
+  //      (Anthropic) KV-cache implementation. The next-to-last marker is a
+  //      still-present, further-back write the next lookback can land on, so the
+  //      worst case degrades to "recompute only the removed message" instead of
+  //      "recompute the whole history". It also covers turns that append >20
+  //      blocks (tool spam pushes the prior write outside the lookback window).
+  //      Cost is ~equal to a single marker: the two adjacent breakpoints write
+  //      roughly the same incremental bytes as one, split in two, and a hit
+  //      never rewrites. A third marker would write a segment never read
+  //      independently, so two is the minimum that covers the boundary.
   // We deliberately do NOT mark a drifting midpoint or a fixed before-last-user
   // INDEX: those shift every turn without tracking the tail.
   const targets: ModelMessage[] = []
@@ -315,13 +315,8 @@ function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage
   const systemMsgs = msgs.filter((msg) => msg.role === "system")
   if (systemMsgs.length > 0) targets.push(systemMsgs[systemMsgs.length - 1])
 
-  const assistantMarkersHonored =
-    model.providerID === "anthropic" ||
-    model.providerID.includes("bedrock") ||
-    model.api.npm === "@ai-sdk/amazon-bedrock"
   const nonSystem = msgs.filter((msg) => msg.role !== "system")
-  const cacheable = assistantMarkersHonored ? nonSystem : nonSystem.filter((msg) => msg.role !== "assistant")
-  for (const msg of cacheable.slice(-2)) targets.push(msg)
+  for (const msg of nonSystem.slice(-2)) targets.push(msg)
 
   for (const msg of unique(targets)) {
     const useMessageLevelOptions =
diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts
index 402458c91..d0e9d3bcd 100644
--- a/packages/opencode/test/provider/transform.test.ts
+++ b/packages/opencode/test/provider/transform.test.ts
@@ -2206,11 +2206,9 @@ describe("ProviderTransform.message - cache control on gateway", () => {
     expect(result[3].providerOptions?.anthropic).toBeUndefined()
   })
 
-  test("non-anthropic content-level path skips assistant turns so both markers survive", () => {
-    // Providers that place markers at content level (e.g. OpenRouter / proxies)
-    // can have cache_control on assistant messages silently dropped, so a blind
-    // last-2 would lose a marker when the tail is [..., user, assistant]. The
-    // selector must pick the last 2 cacheable (user/tool) messages instead.
+  test("content-level provider marks the last two messages regardless of role", () => {
+    // Providers that reach applyCaching honor message-level markers (incl.
+    // assistant), so the double-tail marks the last two messages by position.
     const model = createModel({
       providerID: "openrouter",
       api: { id: "anthropic/claude-sonnet-4", url: "https://openrouter.ai/api", npm: "@openrouter/ai-sdk-provider" },
@@ -2229,11 +2227,12 @@ describe("ProviderTransform.message - cache control on gateway", () => {
       !!msg.providerOptions?.openrouter ||
       msg.content?.some?.((c: any) => c.providerOptions?.openrouter)
 
-    // The two user turns (index 1, 3) are marked; the assistant turns are not.
-    expect(hasMarker(result[1])).toBe(true)
+    // The last two messages (index 3 user, 4 assistant) are both marked.
     expect(hasMarker(result[3])).toBe(true)
+    expect(hasMarker(result[4])).toBe(true)
+    // Earlier turns are not.
+    expect(hasMarker(result[1])).toBe(false)
     expect(hasMarker(result[2])).toBe(false)
-    expect(hasMarker(result[4])).toBe(false)
   })
 })
 

From 2588478e6a680eb7a437797e1bc463df4029c3b2 Mon Sep 17 00:00:00 2001
From: wqymi <wangqiying@xiaomi.com>
Date: Tue, 23 Jun 2026 14:21:35 +0800
Subject: [PATCH 5/5] refactor(provider): unify cache-marker shape source for
 messages and tools

applyCaching and tools() each built the per-provider cache-control marker
independently, and tools() diverged: it wrote { copilot: { cacheControl } }
where the copilot SDK actually reads copilot_cache_control, so the tool
breakpoint was silently ineffective on github-copilot.

Extract cacheMarkerOptions() as the single source of truth for the per-provider
marker shapes (anthropic/openrouter cacheControl, bedrock cachePoint,
openaiCompatible cache_control, copilot copilot_cache_control, alibaba). message()
keeps attaching the full object and remapping; tools() resolves one SDK-keyed
namespace via cacheMarkerFor(). Add a copilot tools test locking in the shape.
---
 packages/opencode/src/provider/transform.ts   | 78 +++++++++++--------
 .../opencode/test/provider/transform.test.ts  | 12 +++
 2 files changed, 58 insertions(+), 32 deletions(-)

diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts
index 109e9b536..f8837d964 100644
--- a/packages/opencode/src/provider/transform.ts
+++ b/packages/opencode/src/provider/transform.ts
@@ -257,31 +257,50 @@ function supportsCacheMarkers(model: Provider.Model): boolean {
   return false
 }
 
-function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
-  // Only Anthropic and OpenRouter expose a cache-control TTL in their AI SDK;
-  // the other providers ignore an unknown `ttl` field, so we only thread it
-  // into those two branches. Default (unset) stays the provider 5m default.
+// The cache-control marker shape differs per provider/SDK. This is the single
+// source of truth, keyed by the SDK provider-options namespace. `applyCaching`
+// attaches the whole object (keyed by stored providerID) and lets `message()`
+// remap the active provider's namespace to its SDK key; `tools()` (which
+// bypasses that remap) resolves a single namespace up front via `cacheMarkerFor`.
+// Only Anthropic and OpenRouter expose a TTL in their AI SDK — the others ignore
+// an unknown `ttl`, so we thread it only there.
+function cacheMarkerOptions(model: Provider.Model) {
   const ttl = model.cachePromptTTL === "1h" ? { ttl: "1h" as const } : {}
-  const providerOptions = {
-    anthropic: {
-      cacheControl: { type: "ephemeral", ...ttl },
-    },
-    openrouter: {
-      cacheControl: { type: "ephemeral", ...ttl },
-    },
-    bedrock: {
-      cachePoint: { type: "default" },
-    },
-    openaiCompatible: {
-      cache_control: { type: "ephemeral" },
-    },
-    copilot: {
-      copilot_cache_control: { type: "ephemeral" },
-    },
-    alibaba: {
-      cacheControl: { type: "ephemeral" },
-    },
+  return {
+    anthropic: { cacheControl: { type: "ephemeral", ...ttl } },
+    openrouter: { cacheControl: { type: "ephemeral", ...ttl } },
+    bedrock: { cachePoint: { type: "default" } },
+    openaiCompatible: { cache_control: { type: "ephemeral" } },
+    copilot: { copilot_cache_control: { type: "ephemeral" } },
+    alibaba: { cacheControl: { type: "ephemeral" } },
   }
+}
+
+// Resolve the marker for a single model, already keyed under the SDK namespace
+// the AI SDK expects — i.e. the remap that `message()` performs for messages,
+// done up front. Used by `tools()`, whose tools never pass through `message()`.
+// Returns undefined for providers that don't take inline markers (callers gate
+// on `supportsCacheMarkers` first, so this is just a type-safety fallback).
+function cacheMarkerFor(model: Provider.Model): Record<string, unknown> | undefined {
+  const shapes = cacheMarkerOptions(model)
+  const ns: keyof typeof shapes | undefined =
+    model.api.npm === "@ai-sdk/anthropic" || model.api.npm === "@ai-sdk/google-vertex/anthropic"
+      ? "anthropic"
+      : model.api.npm === "@openrouter/ai-sdk-provider"
+        ? "openrouter"
+        : model.api.npm === "@ai-sdk/amazon-bedrock"
+          ? "bedrock"
+          : model.api.npm === "@ai-sdk/github-copilot"
+            ? "copilot"
+            : model.api.npm === "@ai-sdk/alibaba"
+              ? "alibaba"
+              : undefined
+  if (!ns) return undefined
+  return { [ns]: shapes[ns] }
+}
+
+function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
+  const providerOptions = cacheMarkerOptions(model)
 
   // Strategy: prefix caching is longest-common-prefix based with a backward
   // lookback window (Anthropic walks back ~20 blocks from a breakpoint to find
@@ -470,21 +489,16 @@ export function message(msgs: ModelMessage[], model: Provider.Model, options: Re
 // `tools` → `system` → `messages`, so marking the LAST tool caches the entire
 // tool-schema block (often several KB) as a stable prefix that sits in front of
 // the system + message caches. Tools are passed to the SDK separately from
-// `message()` and never go through its providerID→SDK-key remap, so we write
-// the marker under the SDK key directly. Tool registration order is stable
+// `message()` and never go through its providerID→SDK-key remap, so we resolve
+// the SDK-keyed marker via `cacheMarkerFor`. Tool registration order is stable
 // (insertion order of the tools record), so "last tool" is deterministic.
 export function tools<T extends Record<string, any>>(tools: T, model: Provider.Model): T {
   if (!supportsCacheMarkers(model)) return tools
+  const marker = cacheMarkerFor(model)
+  if (!marker) return tools
   const names = Object.keys(tools)
   if (names.length === 0) return tools
 
-  const ttl = model.cachePromptTTL === "1h" ? { ttl: "1h" as const } : {}
-  const marker = iife(() => {
-    if (model.api.npm === "@ai-sdk/amazon-bedrock") return { bedrock: { cachePoint: { type: "default" } } }
-    const key = sdkKey(model.api.npm) ?? model.providerID
-    return { [key]: { cacheControl: { type: "ephemeral", ...ttl } } }
-  })
-
   const last = tools[names[names.length - 1]]
   last.providerOptions = mergeDeep(last.providerOptions ?? {}, marker)
   return tools
diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts
index d0e9d3bcd..9b249af21 100644
--- a/packages/opencode/test/provider/transform.test.ts
+++ b/packages/opencode/test/provider/transform.test.ts
@@ -2284,6 +2284,18 @@ describe("ProviderTransform.tools", () => {
     expect(result.bash.providerOptions).toEqual({ bedrock: { cachePoint: { type: "default" } } })
   })
 
+  test("uses copilot_cache_control shape for github-copilot", () => {
+    const model = createModel({
+      providerID: "github-copilot",
+      api: { id: "claude-sonnet-4", url: "https://api.githubcopilot.com", npm: "@ai-sdk/github-copilot" },
+    })
+    const tools = { read: {}, bash: {} } as Record<string, any>
+
+    const result = ProviderTransform.tools(tools, model)
+
+    expect(result.bash.providerOptions).toEqual({ copilot: { copilot_cache_control: { type: "ephemeral" } } })
+  })
+
   test("no marker for providers that do not support cache markers", () => {
     const model = createModel({
       providerID: "openai",