Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 22 additions & 13 deletions extensions/copilot/src/extension/intents/node/agentIntent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,21 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
));
}

const lastMessage = result.messages.at(-1);
if (lastMessage?.role === Raw.ChatRole.User) {
const currentTurn = promptContext.conversation?.getLatestTurn();
if (currentTurn && !currentTurn.getMetadata(RenderedUserMessageMetadata)) {
currentTurn.setMetadata(new RenderedUserMessageMetadata(lastMessage.content));
}
}

addCacheBreakpoints(result.messages);

// Post-render: kick off background compaction at ≥ 80% if idle.
// This must run AFTER addCacheBreakpoints so that the messages
// forwarded to the background summarizer include cache breakpoints,
// making the prompt prefix byte-identical to the main agent fetch
// and enabling prompt cache hits on the summarization call.
if (summarizationEnabled && backgroundSummarizer && !didSummarizeThisIteration) {
const postRenderRatio = baseBudget > 0
? (result.tokenCount + toolTokens) / baseBudget
Expand All @@ -682,16 +696,6 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
}
}

const lastMessage = result.messages.at(-1);
if (lastMessage?.role === Raw.ChatRole.User) {
const currentTurn = promptContext.conversation?.getLatestTurn();
if (currentTurn && !currentTurn.getMetadata(RenderedUserMessageMetadata)) {
currentTurn.setMetadata(new RenderedUserMessageMetadata(lastMessage.content));
}
}

addCacheBreakpoints(result.messages);

if (this.request.command === 'error') {
// Should trigger a 400
result.messages.push({
Expand Down Expand Up @@ -805,9 +809,14 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
try {
if (useInlineSummarization) {
// Inline mode: fork the exact messages from the main render
// and append a summary user message. The prompt prefix is
// byte-identical to the main agent loop for cache hits.
const strippedMainMessages = ToolCallingLoop.stripInternalToolCallIds(mainRenderMessages);
// and append a summary user message. The prompt prefix must
// be byte-identical to the main agent fetch for cache hits.
// Apply the same post-processing as the tool calling loop
// (strip internal IDs + validate/filter orphaned tool messages)
// so the message arrays match exactly.
const strippedMainMessages = ToolCallingLoop.validateToolMessagesCore(
ToolCallingLoop.stripInternalToolCallIds(mainRenderMessages),
).messages;
Comment on lines +817 to +819
Copy link

Copilot AI Apr 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the inline background summarization path, validateToolMessagesCore is called without the stripOrphanedToolCalls option. The main agent fetch path applies applyMessagePostProcessing(..., { stripOrphanedToolCalls: isGeminiFamily(endpoint) }), so for Gemini endpoints the background summarizer can keep orphaned toolCalls on assistant messages. That can (a) re-diverge the message prefix again (hurting cache parity) and (b) cause Gemini 400s due to missing 1:1 tool_call ↔ tool_result pairing. Consider passing { stripOrphanedToolCalls: isGeminiFamily(this.endpoint) } here (and importing isGeminiFamily) to match the main loop’s post-processing exactly.

Copilot uses AI. Check for mistakes.
const summaryMsgResult = await renderPromptElement(
this.instantiationService,
this.endpoint,
Expand Down
Loading