XiaomiMiMo · wqymi · Jun 23, 2026 · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026
diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts
@@ -257,50 +257,85 @@ function supportsCacheMarkers(model: Provider.Model): boolean {
   return false
 }
 
-function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
-  // Only Anthropic and OpenRouter expose a cache-control TTL in their AI SDK;
-  // the other providers ignore an unknown `ttl` field, so we only thread it
-  // into those two branches. Default (unset) stays the provider 5m default.
+// The cache-control marker shape differs per provider/SDK. This is the single
+// source of truth, keyed by the SDK provider-options namespace. `applyCaching`
+// attaches the whole object (keyed by stored providerID) and lets `message()`
+// remap the active provider's namespace to its SDK key; `tools()` (which
+// bypasses that remap) resolves a single namespace up front via `cacheMarkerFor`.
+// Only Anthropic and OpenRouter expose a TTL in their AI SDK — the others ignore
+// an unknown `ttl`, so we thread it only there.
+function cacheMarkerOptions(model: Provider.Model) {
   const ttl = model.cachePromptTTL === "1h" ? { ttl: "1h" as const } : {}
-  const providerOptions = {
-    anthropic: {
-      cacheControl: { type: "ephemeral", ...ttl },
-    },
-    openrouter: {
-      cacheControl: { type: "ephemeral", ...ttl },
-    },
-    bedrock: {
-      cachePoint: { type: "default" },
-    },
-    openaiCompatible: {
-      cache_control: { type: "ephemeral" },
-    },
-    copilot: {
-      copilot_cache_control: { type: "ephemeral" },
-    },
-    alibaba: {
-      cacheControl: { type: "ephemeral" },
-    },
+  return {
+    anthropic: { cacheControl: { type: "ephemeral", ...ttl } },
+    openrouter: { cacheControl: { type: "ephemeral", ...ttl } },
+    bedrock: { cachePoint: { type: "default" } },
+    openaiCompatible: { cache_control: { type: "ephemeral" } },
+    copilot: { copilot_cache_control: { type: "ephemeral" } },
+    alibaba: { cacheControl: { type: "ephemeral" } },
   }
+}
+
+// Resolve the marker for a single model, already keyed under the SDK namespace
+// the AI SDK expects — i.e. the remap that `message()` performs for messages,
+// done up front. Used by `tools()`, whose tools never pass through `message()`.
+// Returns undefined for providers that don't take inline markers (callers gate
+// on `supportsCacheMarkers` first, so this is just a type-safety fallback).
+function cacheMarkerFor(model: Provider.Model): Record<string, unknown> | undefined {
+  const shapes = cacheMarkerOptions(model)
+  const ns: keyof typeof shapes | undefined =
+    model.api.npm === "@ai-sdk/anthropic" || model.api.npm === "@ai-sdk/google-vertex/anthropic"
+      ? "anthropic"
+      : model.api.npm === "@openrouter/ai-sdk-provider"
+        ? "openrouter"
+        : model.api.npm === "@ai-sdk/amazon-bedrock"
+          ? "bedrock"
+          : model.api.npm === "@ai-sdk/github-copilot"
+            ? "copilot"
+            : model.api.npm === "@ai-sdk/alibaba"
+              ? "alibaba"
+              : undefined
+  if (!ns) return undefined
+  return { [ns]: shapes[ns] }
+}
 
-  // Strategy: place cache breakpoints at stable prefix boundaries (max 4 allowed by Anthropic)
-  // 1. Last system message — system prompt never changes
-  // 2. Midpoint of conversation history — long prefix second-level cache
-  // 3. Message before the last user message — stable history boundary
+function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
+  const providerOptions = cacheMarkerOptions(model)
+
+  // Strategy: prefix caching is longest-common-prefix based with a backward
+  // lookback window (Anthropic walks back ~20 blocks from a breakpoint to find
+  // a prior write). The markers that grow the cached prefix are pinned to the
+  // *tail* of the request. We place up to three stable breakpoints (Anthropic
+  // allows max 4):
+  // 1. Last system message — the immutable prompt prefix.
+  // 2+3. The last TWO messages — a "rolling double buffer". Each turn marks
+  //      messages[-2] and messages[-1]; next turn the old [-1] is now [-2] and
+  //      still carries its marker, so the lookback gets a cache READ hit, while
+  //      the new [-1] is the WRITE for the turn after.
+  //
+  //      Why two and not one: the second (next-to-last) marker is the safety
+  //      net for the tail boundary. When the last message is removed — a
+  //      tool-call retry, a Ctrl-C, or the user editing/deleting their latest
+  //      message — a lone tail marker disappears with it, and how much of the
+  //      surrounding prefix the provider then evicts depends on the upstream
+  //      (Anthropic) KV-cache implementation. The next-to-last marker is a
+  //      still-present, further-back write the next lookback can land on, so the
+  //      worst case degrades to "recompute only the removed message" instead of
+  //      "recompute the whole history". It also covers turns that append >20
+  //      blocks (tool spam pushes the prior write outside the lookback window).
+  //      Cost is ~equal to a single marker: the two adjacent breakpoints write
+  //      roughly the same incremental bytes as one, split in two, and a hit
+  //      never rewrites. A third marker would write a segment never read
+  //      independently, so two is the minimum that covers the boundary.
+  // We deliberately do NOT mark a drifting midpoint or a fixed before-last-user
+  // INDEX: those shift every turn without tracking the tail.
   const targets: ModelMessage[] = []
 
   const systemMsgs = msgs.filter((msg) => msg.role === "system")
   if (systemMsgs.length > 0) targets.push(systemMsgs[systemMsgs.length - 1])
 
   const nonSystem = msgs.filter((msg) => msg.role !== "system")
-  const lastUserIdx = nonSystem.findLastIndex((msg) => msg.role === "user")
-  if (lastUserIdx >= 1) {
-    targets.push(nonSystem[lastUserIdx - 1])
-    const midpoint = Math.floor(lastUserIdx / 2)
-    if (midpoint > 0 && midpoint < lastUserIdx - 1) targets.push(nonSystem[midpoint])
-  } else if (lastUserIdx === 0) {
-    targets.push(nonSystem[0])
-  }
+  for (const msg of nonSystem.slice(-2)) targets.push(msg)
 
   for (const msg of unique(targets)) {
     const useMessageLevelOptions =
@@ -450,6 +485,25 @@ export function message(msgs: ModelMessage[], model: Provider.Model, options: Re
   return msgs
 }
 
+// Place a cache breakpoint on the tool definitions. The cache hierarchy is
+// `tools` → `system` → `messages`, so marking the LAST tool caches the entire
+// tool-schema block (often several KB) as a stable prefix that sits in front of
+// the system + message caches. Tools are passed to the SDK separately from
+// `message()` and never go through its providerID→SDK-key remap, so we resolve
+// the SDK-keyed marker via `cacheMarkerFor`. Tool registration order is stable
+// (insertion order of the tools record), so "last tool" is deterministic.
+export function tools<T extends Record<string, any>>(tools: T, model: Provider.Model): T {
+  if (!supportsCacheMarkers(model)) return tools
+  const marker = cacheMarkerFor(model)
+  if (!marker) return tools
+  const names = Object.keys(tools)
+  if (names.length === 0) return tools
+
+  const last = tools[names[names.length - 1]]
+  last.providerOptions = mergeDeep(last.providerOptions ?? {}, marker)
+  return tools
+}
+
 export function temperature(model: Provider.Model) {
   const id = model.id.toLowerCase()
   if (id.includes("qwen")) return 0.55

diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts
@@ -583,7 +583,7 @@ const live: Layer.Layer<
         topK: params.topK,
         providerOptions: ProviderTransform.providerOptions(input.model, params.options),
         activeTools: Object.keys(tools).filter((x) => x !== "invalid"),
-        tools,
+        tools: ProviderTransform.tools(tools, input.model),
         toolChoice: input.toolChoice,
         maxOutputTokens: params.maxOutputTokens,
         abortSignal: input.abort,

diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts
@@ -2172,6 +2172,150 @@ describe("ProviderTransform.message - cache control on gateway", () => {
 
     expect(result[0].providerOptions).toBeUndefined()
   })
+
+  test("multi-turn anthropic pins breakpoints to last system + last two messages", () => {
+    const model = createModel({
+      providerID: "anthropic",
+      api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" },
+    })
+    const msgs = [
+      { role: "system", content: "You are a helpful assistant" },
+      { role: "user", content: "first question" },
+      { role: "assistant", content: "first answer" },
+      { role: "user", content: "second question" },
+      { role: "assistant", content: "second answer" },
+      { role: "user", content: "third question" },
+    ] as any[]
+
+    const result = ProviderTransform.message(msgs, model, {}) as any[]
+
+    // The last system message plus the last TWO messages carry a breakpoint
+    // (rolling double buffer): the prior turn's tail marker survives as the
+    // read point while the new tail marker is the next write.
+    const marked = result
+      .map((msg, index) => ({ index, role: msg.role, hasCache: !!msg.providerOptions?.anthropic?.cacheControl }))
+      .filter((m) => m.hasCache)
+
+    expect(marked).toEqual([
+      { index: 0, role: "system", hasCache: true },
+      { index: 4, role: "assistant", hasCache: true },
+      { index: 5, role: "user", hasCache: true },
+    ])
+    // No drifting midpoint marker on earlier turns.
+    expect(result[2].providerOptions?.anthropic).toBeUndefined()
+    expect(result[3].providerOptions?.anthropic).toBeUndefined()
+  })
+
+  test("content-level provider marks the last two messages regardless of role", () => {
+    // Providers that reach applyCaching honor message-level markers (incl.
+    // assistant), so the double-tail marks the last two messages by position.
+    const model = createModel({
+      providerID: "openrouter",
+      api: { id: "anthropic/claude-sonnet-4", url: "https://openrouter.ai/api", npm: "@openrouter/ai-sdk-provider" },
+    })
+    const msgs = [
+      { role: "system", content: [{ type: "text", text: "sys" }] },
+      { role: "user", content: [{ type: "text", text: "first question" }] },
+      { role: "assistant", content: [{ type: "text", text: "first answer" }] },
+      { role: "user", content: [{ type: "text", text: "second question" }] },
+      { role: "assistant", content: [{ type: "text", text: "second answer" }] },
+    ] as any[]
+
+    const result = ProviderTransform.message(msgs, model, {}) as any[]
+
+    const hasMarker = (msg: any) =>
+      !!msg.providerOptions?.openrouter ||
+      msg.content?.some?.((c: any) => c.providerOptions?.openrouter)
+
+    // The last two messages (index 3 user, 4 assistant) are both marked.
+    expect(hasMarker(result[3])).toBe(true)
+    expect(hasMarker(result[4])).toBe(true)
+    // Earlier turns are not.
+    expect(hasMarker(result[1])).toBe(false)
+    expect(hasMarker(result[2])).toBe(false)
+  })
+})
+
+describe("ProviderTransform.tools", () => {
+  const createModel = (overrides: Partial<any> = {}): any => ({
+    id: "test/test-model",
+    providerID: "test",
+    api: { id: "test-model", url: "https://api.test.com", npm: "@ai-sdk/openai" },
+    name: "Test Model",
+    ...overrides,
+  })
+
+  test("marks the last tool for anthropic", () => {
+    const model = createModel({
+      providerID: "anthropic",
+      api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" },
+    })
+    const tools = { read: {}, write: {}, bash: {} } as Record<string, any>
+
+    const result = ProviderTransform.tools(tools, model)
+
+    expect(result.read.providerOptions).toBeUndefined()
+    expect(result.write.providerOptions).toBeUndefined()
+    expect(result.bash.providerOptions).toEqual({ anthropic: { cacheControl: { type: "ephemeral" } } })
+  })
+
+  test("threads cachePromptTTL 1h into the tool marker", () => {
+    const model = createModel({
+      providerID: "anthropic",
+      api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" },
+      cachePromptTTL: "1h",
+    })
+    const tools = { read: {}, bash: {} } as Record<string, any>
+
+    const result = ProviderTransform.tools(tools, model)
+
+    expect(result.bash.providerOptions).toEqual({ anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } } })
+  })
+
+  test("uses cachePoint shape for bedrock", () => {
+    const model = createModel({
+      providerID: "amazon-bedrock",
+      api: { id: "anthropic.claude-sonnet-4", url: "https://api.test.com", npm: "@ai-sdk/amazon-bedrock" },
+    })
+    const tools = { read: {}, bash: {} } as Record<string, any>
+
+    const result = ProviderTransform.tools(tools, model)
+
+    expect(result.bash.providerOptions).toEqual({ bedrock: { cachePoint: { type: "default" } } })
+  })
+
+  test("uses copilot_cache_control shape for github-copilot", () => {
+    const model = createModel({
+      providerID: "github-copilot",
+      api: { id: "claude-sonnet-4", url: "https://api.githubcopilot.com", npm: "@ai-sdk/github-copilot" },
+    })
+    const tools = { read: {}, bash: {} } as Record<string, any>
+
+    const result = ProviderTransform.tools(tools, model)
+
+    expect(result.bash.providerOptions).toEqual({ copilot: { copilot_cache_control: { type: "ephemeral" } } })
+  })
+
+  test("no marker for providers that do not support cache markers", () => {
+    const model = createModel({
+      providerID: "openai",
+      api: { id: "gpt-4", url: "https://api.openai.com", npm: "@ai-sdk/openai" },
+    })
+    const tools = { read: {}, bash: {} } as Record<string, any>
+
+    const result = ProviderTransform.tools(tools, model)
+
+    expect(result.read.providerOptions).toBeUndefined()
+    expect(result.bash.providerOptions).toBeUndefined()
+  })
+
+  test("no-op on empty tools", () => {
+    const model = createModel({
+      providerID: "anthropic",
+      api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" },
+    })
+    expect(ProviderTransform.tools({}, model)).toEqual({})
+  })
 })
 
 describe("ProviderTransform.variants", () => {