Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 89 additions & 35 deletions packages/opencode/src/provider/transform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -257,50 +257,85 @@ function supportsCacheMarkers(model: Provider.Model): boolean {
return false
}

function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
// Only Anthropic and OpenRouter expose a cache-control TTL in their AI SDK;
// the other providers ignore an unknown `ttl` field, so we only thread it
// into those two branches. Default (unset) stays the provider 5m default.
// The cache-control marker shape differs per provider/SDK. This is the single
// source of truth, keyed by the SDK provider-options namespace. `applyCaching`
// attaches the whole object (keyed by stored providerID) and lets `message()`
// remap the active provider's namespace to its SDK key; `tools()` (which
// bypasses that remap) resolves a single namespace up front via `cacheMarkerFor`.
// Only Anthropic and OpenRouter expose a TTL in their AI SDK — the others ignore
// an unknown `ttl`, so we thread it only there.
function cacheMarkerOptions(model: Provider.Model) {
const ttl = model.cachePromptTTL === "1h" ? { ttl: "1h" as const } : {}
const providerOptions = {
anthropic: {
cacheControl: { type: "ephemeral", ...ttl },
},
openrouter: {
cacheControl: { type: "ephemeral", ...ttl },
},
bedrock: {
cachePoint: { type: "default" },
},
openaiCompatible: {
cache_control: { type: "ephemeral" },
},
copilot: {
copilot_cache_control: { type: "ephemeral" },
},
alibaba: {
cacheControl: { type: "ephemeral" },
},
return {
anthropic: { cacheControl: { type: "ephemeral", ...ttl } },
openrouter: { cacheControl: { type: "ephemeral", ...ttl } },
bedrock: { cachePoint: { type: "default" } },
openaiCompatible: { cache_control: { type: "ephemeral" } },
copilot: { copilot_cache_control: { type: "ephemeral" } },
alibaba: { cacheControl: { type: "ephemeral" } },
}
}

// Resolve the marker for a single model, already keyed under the SDK namespace
// the AI SDK expects — i.e. the remap that `message()` performs for messages,
// done up front. Used by `tools()`, whose tools never pass through `message()`.
// Returns undefined for providers that don't take inline markers (callers gate
// on `supportsCacheMarkers` first, so this is just a type-safety fallback).
function cacheMarkerFor(model: Provider.Model): Record<string, unknown> | undefined {
const shapes = cacheMarkerOptions(model)
const ns: keyof typeof shapes | undefined =
model.api.npm === "@ai-sdk/anthropic" || model.api.npm === "@ai-sdk/google-vertex/anthropic"
? "anthropic"
: model.api.npm === "@openrouter/ai-sdk-provider"
? "openrouter"
: model.api.npm === "@ai-sdk/amazon-bedrock"
? "bedrock"
: model.api.npm === "@ai-sdk/github-copilot"
? "copilot"
: model.api.npm === "@ai-sdk/alibaba"
? "alibaba"
: undefined
if (!ns) return undefined
return { [ns]: shapes[ns] }
}

// Strategy: place cache breakpoints at stable prefix boundaries (max 4 allowed by Anthropic)
// 1. Last system message — system prompt never changes
// 2. Midpoint of conversation history — long prefix second-level cache
// 3. Message before the last user message — stable history boundary
function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
const providerOptions = cacheMarkerOptions(model)

// Strategy: prefix caching is longest-common-prefix based with a backward
// lookback window (Anthropic walks back ~20 blocks from a breakpoint to find
// a prior write). The markers that grow the cached prefix are pinned to the
// *tail* of the request. We place up to three stable breakpoints (Anthropic
// allows max 4):
// 1. Last system message — the immutable prompt prefix.
// 2+3. The last TWO messages — a "rolling double buffer". Each turn marks
// messages[-2] and messages[-1]; next turn the old [-1] is now [-2] and
// still carries its marker, so the lookback gets a cache READ hit, while
// the new [-1] is the WRITE for the turn after.
//
// Why two and not one: the second (next-to-last) marker is the safety
// net for the tail boundary. When the last message is removed — a
// tool-call retry, a Ctrl-C, or the user editing/deleting their latest
// message — a lone tail marker disappears with it, and how much of the
// surrounding prefix the provider then evicts depends on the upstream
// (Anthropic) KV-cache implementation. The next-to-last marker is a
// still-present, further-back write the next lookback can land on, so the
// worst case degrades to "recompute only the removed message" instead of
// "recompute the whole history". It also covers turns that append >20
// blocks (tool spam pushes the prior write outside the lookback window).
// Cost is ~equal to a single marker: the two adjacent breakpoints write
// roughly the same incremental bytes as one, split in two, and a hit
// never rewrites. A third marker would write a segment never read
// independently, so two is the minimum that covers the boundary.
// We deliberately do NOT mark a drifting midpoint or a fixed before-last-user
// INDEX: those shift every turn without tracking the tail.
const targets: ModelMessage[] = []

const systemMsgs = msgs.filter((msg) => msg.role === "system")
if (systemMsgs.length > 0) targets.push(systemMsgs[systemMsgs.length - 1])

const nonSystem = msgs.filter((msg) => msg.role !== "system")
const lastUserIdx = nonSystem.findLastIndex((msg) => msg.role === "user")
if (lastUserIdx >= 1) {
targets.push(nonSystem[lastUserIdx - 1])
const midpoint = Math.floor(lastUserIdx / 2)
if (midpoint > 0 && midpoint < lastUserIdx - 1) targets.push(nonSystem[midpoint])
} else if (lastUserIdx === 0) {
targets.push(nonSystem[0])
}
for (const msg of nonSystem.slice(-2)) targets.push(msg)

for (const msg of unique(targets)) {
const useMessageLevelOptions =
Expand Down Expand Up @@ -450,6 +485,25 @@ export function message(msgs: ModelMessage[], model: Provider.Model, options: Re
return msgs
}

// Place a cache breakpoint on the tool definitions. The cache hierarchy is
// `tools` → `system` → `messages`, so marking the LAST tool caches the entire
// tool-schema block (often several KB) as a stable prefix that sits in front of
// the system + message caches. Tools are passed to the SDK separately from
// `message()` and never go through its providerID→SDK-key remap, so we resolve
// the SDK-keyed marker via `cacheMarkerFor`. Tool registration order is stable
// (insertion order of the tools record), so "last tool" is deterministic.
export function tools<T extends Record<string, any>>(tools: T, model: Provider.Model): T {
if (!supportsCacheMarkers(model)) return tools
const marker = cacheMarkerFor(model)
if (!marker) return tools
const names = Object.keys(tools)
if (names.length === 0) return tools

const last = tools[names[names.length - 1]]
last.providerOptions = mergeDeep(last.providerOptions ?? {}, marker)
return tools
}

export function temperature(model: Provider.Model) {
const id = model.id.toLowerCase()
if (id.includes("qwen")) return 0.55
Expand Down
2 changes: 1 addition & 1 deletion packages/opencode/src/session/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ const live: Layer.Layer<
topK: params.topK,
providerOptions: ProviderTransform.providerOptions(input.model, params.options),
activeTools: Object.keys(tools).filter((x) => x !== "invalid"),
tools,
tools: ProviderTransform.tools(tools, input.model),
toolChoice: input.toolChoice,
maxOutputTokens: params.maxOutputTokens,
abortSignal: input.abort,
Expand Down
144 changes: 144 additions & 0 deletions packages/opencode/test/provider/transform.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2172,6 +2172,150 @@ describe("ProviderTransform.message - cache control on gateway", () => {

expect(result[0].providerOptions).toBeUndefined()
})

test("multi-turn anthropic pins breakpoints to last system + last two messages", () => {
const model = createModel({
providerID: "anthropic",
api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" },
})
const msgs = [
{ role: "system", content: "You are a helpful assistant" },
{ role: "user", content: "first question" },
{ role: "assistant", content: "first answer" },
{ role: "user", content: "second question" },
{ role: "assistant", content: "second answer" },
{ role: "user", content: "third question" },
] as any[]

const result = ProviderTransform.message(msgs, model, {}) as any[]

// The last system message plus the last TWO messages carry a breakpoint
// (rolling double buffer): the prior turn's tail marker survives as the
// read point while the new tail marker is the next write.
const marked = result
.map((msg, index) => ({ index, role: msg.role, hasCache: !!msg.providerOptions?.anthropic?.cacheControl }))
.filter((m) => m.hasCache)

expect(marked).toEqual([
{ index: 0, role: "system", hasCache: true },
{ index: 4, role: "assistant", hasCache: true },
{ index: 5, role: "user", hasCache: true },
])
// No drifting midpoint marker on earlier turns.
expect(result[2].providerOptions?.anthropic).toBeUndefined()
expect(result[3].providerOptions?.anthropic).toBeUndefined()
})

test("content-level provider marks the last two messages regardless of role", () => {
// Providers that reach applyCaching honor message-level markers (incl.
// assistant), so the double-tail marks the last two messages by position.
const model = createModel({
providerID: "openrouter",
api: { id: "anthropic/claude-sonnet-4", url: "https://openrouter.ai/api", npm: "@openrouter/ai-sdk-provider" },
})
const msgs = [
{ role: "system", content: [{ type: "text", text: "sys" }] },
{ role: "user", content: [{ type: "text", text: "first question" }] },
{ role: "assistant", content: [{ type: "text", text: "first answer" }] },
{ role: "user", content: [{ type: "text", text: "second question" }] },
{ role: "assistant", content: [{ type: "text", text: "second answer" }] },
] as any[]

const result = ProviderTransform.message(msgs, model, {}) as any[]

const hasMarker = (msg: any) =>
!!msg.providerOptions?.openrouter ||
msg.content?.some?.((c: any) => c.providerOptions?.openrouter)

// The last two messages (index 3 user, 4 assistant) are both marked.
expect(hasMarker(result[3])).toBe(true)
expect(hasMarker(result[4])).toBe(true)
// Earlier turns are not.
expect(hasMarker(result[1])).toBe(false)
expect(hasMarker(result[2])).toBe(false)
})
})

describe("ProviderTransform.tools", () => {
const createModel = (overrides: Partial<any> = {}): any => ({
id: "test/test-model",
providerID: "test",
api: { id: "test-model", url: "https://api.test.com", npm: "@ai-sdk/openai" },
name: "Test Model",
...overrides,
})

test("marks the last tool for anthropic", () => {
const model = createModel({
providerID: "anthropic",
api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" },
})
const tools = { read: {}, write: {}, bash: {} } as Record<string, any>

const result = ProviderTransform.tools(tools, model)

expect(result.read.providerOptions).toBeUndefined()
expect(result.write.providerOptions).toBeUndefined()
expect(result.bash.providerOptions).toEqual({ anthropic: { cacheControl: { type: "ephemeral" } } })
})

test("threads cachePromptTTL 1h into the tool marker", () => {
const model = createModel({
providerID: "anthropic",
api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" },
cachePromptTTL: "1h",
})
const tools = { read: {}, bash: {} } as Record<string, any>

const result = ProviderTransform.tools(tools, model)

expect(result.bash.providerOptions).toEqual({ anthropic: { cacheControl: { type: "ephemeral", ttl: "1h" } } })
})

test("uses cachePoint shape for bedrock", () => {
const model = createModel({
providerID: "amazon-bedrock",
api: { id: "anthropic.claude-sonnet-4", url: "https://api.test.com", npm: "@ai-sdk/amazon-bedrock" },
})
const tools = { read: {}, bash: {} } as Record<string, any>

const result = ProviderTransform.tools(tools, model)

expect(result.bash.providerOptions).toEqual({ bedrock: { cachePoint: { type: "default" } } })
})

test("uses copilot_cache_control shape for github-copilot", () => {
const model = createModel({
providerID: "github-copilot",
api: { id: "claude-sonnet-4", url: "https://api.githubcopilot.com", npm: "@ai-sdk/github-copilot" },
})
const tools = { read: {}, bash: {} } as Record<string, any>

const result = ProviderTransform.tools(tools, model)

expect(result.bash.providerOptions).toEqual({ copilot: { copilot_cache_control: { type: "ephemeral" } } })
})

test("no marker for providers that do not support cache markers", () => {
const model = createModel({
providerID: "openai",
api: { id: "gpt-4", url: "https://api.openai.com", npm: "@ai-sdk/openai" },
})
const tools = { read: {}, bash: {} } as Record<string, any>

const result = ProviderTransform.tools(tools, model)

expect(result.read.providerOptions).toBeUndefined()
expect(result.bash.providerOptions).toBeUndefined()
})

test("no-op on empty tools", () => {
const model = createModel({
providerID: "anthropic",
api: { id: "claude-sonnet-4", url: "https://api.anthropic.com", npm: "@ai-sdk/anthropic" },
})
expect(ProviderTransform.tools({}, model)).toEqual({})
})
})

describe("ProviderTransform.variants", () => {
Expand Down
Loading