diff --git a/extensions/copilot/src/extension/intents/node/agentIntent.ts b/extensions/copilot/src/extension/intents/node/agentIntent.ts index a2cdae7fa7de8..0b0c434982068 100644 --- a/extensions/copilot/src/extension/intents/node/agentIntent.ts +++ b/extensions/copilot/src/extension/intents/node/agentIntent.ts @@ -658,7 +658,21 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I )); } + const lastMessage = result.messages.at(-1); + if (lastMessage?.role === Raw.ChatRole.User) { + const currentTurn = promptContext.conversation?.getLatestTurn(); + if (currentTurn && !currentTurn.getMetadata(RenderedUserMessageMetadata)) { + currentTurn.setMetadata(new RenderedUserMessageMetadata(lastMessage.content)); + } + } + + addCacheBreakpoints(result.messages); + // Post-render: kick off background compaction at ≥ 80% if idle. + // This must run AFTER addCacheBreakpoints so that the messages + // forwarded to the background summarizer include cache breakpoints, + // making the prompt prefix byte-identical to the main agent fetch + // and enabling prompt cache hits on the summarization call. if (summarizationEnabled && backgroundSummarizer && !didSummarizeThisIteration) { const postRenderRatio = baseBudget > 0 ? (result.tokenCount + toolTokens) / baseBudget @@ -682,16 +696,6 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I } } - const lastMessage = result.messages.at(-1); - if (lastMessage?.role === Raw.ChatRole.User) { - const currentTurn = promptContext.conversation?.getLatestTurn(); - if (currentTurn && !currentTurn.getMetadata(RenderedUserMessageMetadata)) { - currentTurn.setMetadata(new RenderedUserMessageMetadata(lastMessage.content)); - } - } - - addCacheBreakpoints(result.messages); - if (this.request.command === 'error') { // Should trigger a 400 result.messages.push({ @@ -805,9 +809,14 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I try { if (useInlineSummarization) { // Inline mode: fork the exact messages from the main render - // and append a summary user message. The prompt prefix is - // byte-identical to the main agent loop for cache hits. - const strippedMainMessages = ToolCallingLoop.stripInternalToolCallIds(mainRenderMessages); + // and append a summary user message. The prompt prefix must + // be byte-identical to the main agent fetch for cache hits. + // Apply the same post-processing as the tool calling loop + // (strip internal IDs + validate/filter orphaned tool messages) + // so the message arrays match exactly. + const strippedMainMessages = ToolCallingLoop.validateToolMessagesCore( + ToolCallingLoop.stripInternalToolCallIds(mainRenderMessages), + ).messages; const summaryMsgResult = await renderPromptElement( this.instantiationService, this.endpoint,