KingDEV95 · pull · May 31, 2026 · May 30, 2026 · May 30, 2026 · May 30, 2026
diff --git a/extensions/copilot/src/extension/prompt/node/chatMLFetcherTelemetry.ts b/extensions/copilot/src/extension/prompt/node/chatMLFetcherTelemetry.ts
@@ -149,6 +149,8 @@ export class ChatMLFetcherTelemetrySender {
 				"clientPromptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, locally counted", "isMeasurement": true },
 				"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, server side counted", "isMeasurement": true },
 				"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens hitting cache as reported by server", "isMeasurement": true },
+				"promptCacheCreation1hTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Cache-creation input tokens written with the 1h (extended) TTL, billed at 2x base rate. Only populated when Anthropic reports the cache_creation breakdown.", "isMeasurement": true },
+				"promptCacheCreation5mTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Cache-creation input tokens written with the default 5m TTL, billed at 1.25x base rate. Only populated when Anthropic reports the cache_creation breakdown.", "isMeasurement": true },
 				"tokenCountMax": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Maximum generated tokens", "isMeasurement": true },
 				"tokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of generated tokens", "isMeasurement": true },
 				"reasoningTokens": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of reasoning tokens", "isMeasurement": true },
@@ -227,6 +229,8 @@ export class ChatMLFetcherTelemetrySender {
 			tokenCountMax: maxResponseTokens,
 			promptTokenCount: chatCompletion.usage?.prompt_tokens,
 			promptCacheTokenCount: chatCompletion.usage?.prompt_tokens_details?.cached_tokens,
+			promptCacheCreation1hTokenCount: chatCompletion.usage?.prompt_tokens_details?.anthropic_cache_creation?.ephemeral_1h_input_tokens,
+			promptCacheCreation5mTokenCount: chatCompletion.usage?.prompt_tokens_details?.anthropic_cache_creation?.ephemeral_5m_input_tokens,
 			clientPromptTokenCount: promptTokenCount,
 			tokenCount: chatCompletion.usage?.total_tokens,
 			reasoningTokens: chatCompletion.usage?.completion_tokens_details?.reasoning_tokens,

diff --git a/extensions/copilot/src/platform/endpoint/node/messagesApi.ts b/extensions/copilot/src/platform/endpoint/node/messagesApi.ts
@@ -66,6 +66,10 @@ interface AnthropicStreamEvent {
 			output_tokens: number;
 			cache_creation_input_tokens?: number;
 			cache_read_input_tokens?: number;
+			cache_creation?: {
+				ephemeral_1h_input_tokens?: number;
+				ephemeral_5m_input_tokens?: number;
+			};
 		};
 	};
 	index?: number;
@@ -92,6 +96,10 @@ interface AnthropicStreamEvent {
 		input_tokens?: number;
 		cache_creation_input_tokens?: number;
 		cache_read_input_tokens?: number;
+		cache_creation?: {
+			ephemeral_1h_input_tokens?: number;
+			ephemeral_5m_input_tokens?: number;
+		};
 	};
 	copilot_usage?: {
 		total_nano_aiu: number;
@@ -666,6 +674,8 @@ interface AnthropicCompletionState {
 	readonly inputTokens: number;
 	readonly outputTokens: number;
 	readonly cacheCreationTokens: number;
+	readonly cacheCreation1hTokens: number | undefined;
+	readonly cacheCreation5mTokens: number | undefined;
 	readonly cacheReadTokens: number;
 	readonly requestId: string;
 	readonly ghRequestId: string;
@@ -724,6 +734,14 @@ function buildAnthropicCompletion(state: AnthropicCompletionState, logService: I
 			prompt_tokens_details: {
 				cached_tokens: state.cacheReadTokens,
 				cache_creation_input_tokens: state.cacheCreationTokens,
+				...(state.cacheCreation1hTokens !== undefined || state.cacheCreation5mTokens !== undefined
+					? {
+						anthropic_cache_creation: {
+							...(state.cacheCreation1hTokens !== undefined ? { ephemeral_1h_input_tokens: state.cacheCreation1hTokens } : {}),
+							...(state.cacheCreation5mTokens !== undefined ? { ephemeral_5m_input_tokens: state.cacheCreation5mTokens } : {}),
+						},
+					}
+					: {}),
 			},
 			completion_tokens_details: {
 				reasoning_tokens: 0,
@@ -776,6 +794,10 @@ type AnthropicNonStreamingResponse =
 			output_tokens: number;
 			cache_creation_input_tokens?: number;
 			cache_read_input_tokens?: number;
+			cache_creation?: {
+				ephemeral_1h_input_tokens?: number;
+				ephemeral_5m_input_tokens?: number;
+			};
 		};
 	}
 	| {
@@ -908,6 +930,8 @@ export async function processNonStreamingResponseFromMessagesEndpoint(
 			inputTokens: usage?.input_tokens ?? 0,
 			outputTokens: usage?.output_tokens ?? 0,
 			cacheCreationTokens: usage?.cache_creation_input_tokens ?? 0,
+			cacheCreation1hTokens: usage?.cache_creation?.ephemeral_1h_input_tokens,
+			cacheCreation5mTokens: usage?.cache_creation?.ephemeral_5m_input_tokens,
 			cacheReadTokens: usage?.cache_read_input_tokens ?? 0,
 			requestId,
 			ghRequestId,
@@ -956,6 +980,8 @@ export class AnthropicMessagesProcessor {
 	private inputTokens: number = 0;
 	private outputTokens: number = 0;
 	private cacheCreationTokens: number = 0;
+	private cacheCreation1hTokens: number | undefined;
+	private cacheCreation5mTokens: number | undefined;
 	private cacheReadTokens: number = 0;
 	private copilotUsage?: { total_nano_aiu: number };
 	private contextManagementResponse?: ContextManagementResponse;
@@ -1036,6 +1062,8 @@ export class AnthropicMessagesProcessor {
 					this.inputTokens = chunk.message.usage.input_tokens ?? 0;
 					this.outputTokens = chunk.message.usage.output_tokens ?? 0;
 					this.cacheCreationTokens = chunk.message.usage.cache_creation_input_tokens ?? 0;
+					this.cacheCreation1hTokens = chunk.message.usage.cache_creation?.ephemeral_1h_input_tokens ?? this.cacheCreation1hTokens;
+					this.cacheCreation5mTokens = chunk.message.usage.cache_creation?.ephemeral_5m_input_tokens ?? this.cacheCreation5mTokens;
 					this.cacheReadTokens = chunk.message.usage.cache_read_input_tokens ?? 0;
 				}
 				return;
@@ -1146,6 +1174,8 @@ export class AnthropicMessagesProcessor {
 					this.outputTokens = chunk.usage.output_tokens;
 					this.inputTokens = chunk.usage.input_tokens ?? this.inputTokens;
 					this.cacheCreationTokens = chunk.usage.cache_creation_input_tokens ?? this.cacheCreationTokens;
+					this.cacheCreation1hTokens = chunk.usage.cache_creation?.ephemeral_1h_input_tokens ?? this.cacheCreation1hTokens;
+					this.cacheCreation5mTokens = chunk.usage.cache_creation?.ephemeral_5m_input_tokens ?? this.cacheCreation5mTokens;
 					this.cacheReadTokens = chunk.usage.cache_read_input_tokens ?? this.cacheReadTokens;
 				}
 				if (chunk.copilot_usage && typeof chunk.copilot_usage.total_nano_aiu === 'number') {
@@ -1239,6 +1269,8 @@ export class AnthropicMessagesProcessor {
 					inputTokens: this.inputTokens,
 					outputTokens: this.outputTokens,
 					cacheCreationTokens: this.cacheCreationTokens,
+					cacheCreation1hTokens: this.cacheCreation1hTokens,
+					cacheCreation5mTokens: this.cacheCreation5mTokens,
 					cacheReadTokens: this.cacheReadTokens,
 					requestId: this.requestId,
 					ghRequestId: this.ghRequestId,

diff --git a/extensions/copilot/src/platform/endpoint/test/node/messagesApi.spec.ts b/extensions/copilot/src/platform/endpoint/test/node/messagesApi.spec.ts
@@ -13,7 +13,7 @@ import { AnthropicMessagesTool, CUSTOM_TOOL_SEARCH_NAME, isExtendedCacheTtlEnabl
 import { IChatEndpoint, ICreateEndpointBodyOptions } from '../../../networking/common/networking';
 import { IToolDeferralService } from '../../../networking/common/toolDeferralService';
 import { createPlatformServices } from '../../../test/node/services';
-import { addMessagesApiCacheControl, addToolsAndSystemCacheControl, buildToolInputSchema, clearAllCacheControl, createMessagesRequestBody, processNonStreamingResponseFromMessagesEndpoint, processResponseFromMessagesEndpoint, rawMessagesToMessagesAPI } from '../../node/messagesApi';
+import { addMessagesApiCacheControl, addToolsAndSystemCacheControl, AnthropicMessagesProcessor, buildToolInputSchema, clearAllCacheControl, createMessagesRequestBody, processNonStreamingResponseFromMessagesEndpoint, processResponseFromMessagesEndpoint, rawMessagesToMessagesAPI } from '../../node/messagesApi';
 import { HeadersImpl, Response } from '../../../networking/common/fetcherService';
 import { TelemetryData } from '../../../telemetry/common/telemetryData';
 import { TestLogService } from '../../../testing/common/testLogService';
@@ -1404,6 +1404,81 @@ suite('processNonStreamingResponseFromMessagesEndpoint', () => {
 		expect(results[0].usage?.prompt_tokens_details?.cached_tokens).toBe(30);
 	});
 
+	test('surfaces 1h/5m cache_creation split when present', async () => {
+		const response = createNonStreamingResponse({
+			id: 'msg_cache_ttl',
+			type: 'message',
+			role: 'assistant',
+			content: [{ type: 'text', text: 'cached' }],
+			model: 'claude-sonnet-4-20250514',
+			stop_reason: 'end_turn',
+			usage: {
+				input_tokens: 50,
+				output_tokens: 10,
+				cache_creation_input_tokens: 25,
+				cache_read_input_tokens: 0,
+				cache_creation: {
+					ephemeral_1h_input_tokens: 17,
+					ephemeral_5m_input_tokens: 8,
+				},
+			},
+		});
+
+		const telemetryData = TelemetryData.createAndMarkAsIssued();
+		const completions = await processNonStreamingResponseFromMessagesEndpoint(
+			new NullTelemetryService(),
+			new TestLogService(),
+			response,
+			async () => undefined,
+			telemetryData,
+		);
+
+		const results = [];
+		for await (const c of completions) {
+			results.push(c);
+		}
+
+		const details = results[0].usage?.prompt_tokens_details;
+		expect(details?.cache_creation_input_tokens).toBe(25);
+		expect(details?.anthropic_cache_creation?.ephemeral_1h_input_tokens).toBe(17);
+		expect(details?.anthropic_cache_creation?.ephemeral_5m_input_tokens).toBe(8);
+	});
+
+	test('omits 1h/5m split fields when Anthropic does not report them', async () => {
+		const response = createNonStreamingResponse({
+			id: 'msg_cache_no_split',
+			type: 'message',
+			role: 'assistant',
+			content: [{ type: 'text', text: 'cached' }],
+			model: 'claude-sonnet-4-20250514',
+			stop_reason: 'end_turn',
+			usage: {
+				input_tokens: 50,
+				output_tokens: 10,
+				cache_creation_input_tokens: 20,
+				cache_read_input_tokens: 30,
+			},
+		});
+
+		const telemetryData = TelemetryData.createAndMarkAsIssued();
+		const completions = await processNonStreamingResponseFromMessagesEndpoint(
+			new NullTelemetryService(),
+			new TestLogService(),
+			response,
+			async () => undefined,
+			telemetryData,
+		);
+
+		const results = [];
+		for await (const c of completions) {
+			results.push(c);
+		}
+
+		const details = results[0].usage?.prompt_tokens_details;
+		expect(details?.cache_creation_input_tokens).toBe(20);
+		expect(details?.anthropic_cache_creation).toBeUndefined();
+	});
+
 	test('rejects on malformed JSON', async () => {
 		const response = Response.fromText(200, 'OK', createNonStreamingHeaders(), 'not json at all', 'node-fetch');
 		const telemetryData = TelemetryData.createAndMarkAsIssued();
@@ -1555,3 +1630,121 @@ suite('processResponseFromMessagesEndpoint routing', () => {
 		expect(results[0].message.content).toHaveLength(1);
 	});
 });
+
+suite('AnthropicMessagesProcessor streaming cache_creation', () => {
+	function makeProcessor(): AnthropicMessagesProcessor {
+		return new AnthropicMessagesProcessor(
+			TelemetryData.createAndMarkAsIssued(),
+			'req-1',
+			'gh-req-1',
+			'',
+			new TestLogService(),
+			new NullTelemetryService(),
+		);
+	}
+
+	test('message_start cache_creation survives a message_delta that omits the breakdown', () => {
+		// Production happy path: Anthropic only emits the cache_creation breakdown
+		// in message_start. message_delta updates other usage fields but typically
+		// has no cache_creation. The ?? fallback in the processor must preserve
+		// the values seen in message_start — including 0 (a common control-arm
+		// value) which would be wiped out by a `||` regression.
+		const processor = makeProcessor();
+		const noop = async () => undefined;
+
+		processor.push({
+			type: 'message_start',
+			message: {
+				id: 'msg_stream',
+				type: 'message',
+				role: 'assistant',
+				content: [],
+				model: 'claude-sonnet-4-20250514',
+				stop_reason: null,
+				stop_sequence: null,
+				usage: {
+					input_tokens: 5,
+					output_tokens: 0,
+					cache_creation_input_tokens: 12336,
+					cache_read_input_tokens: 391352,
+					cache_creation: {
+						ephemeral_1h_input_tokens: 0,
+						ephemeral_5m_input_tokens: 12336,
+					},
+				},
+			},
+		}, noop);
+
+		// message_delta with usage but no cache_creation breakdown — mirrors
+		// what every observed backend (Anthropic 1P, Bedrock, Vertex) emits in
+		// the final delta of a stream.
+		processor.push({
+			type: 'message_delta',
+			delta: { type: 'message_delta', stop_reason: 'end_turn' },
+			usage: {
+				output_tokens: 42,
+				input_tokens: 5,
+				cache_creation_input_tokens: 12336,
+				cache_read_input_tokens: 391352,
+			},
+		}, noop);
+
+		const completion = processor.push({ type: 'message_stop' }, noop);
+		expect(completion).toBeDefined();
+
+		const details = completion!.usage?.prompt_tokens_details;
+		expect(details?.anthropic_cache_creation?.ephemeral_1h_input_tokens).toBe(0);
+		expect(details?.anthropic_cache_creation?.ephemeral_5m_input_tokens).toBe(12336);
+	});
+
+	test('message_delta cache_creation overrides message_start values', () => {
+		// Defensive: if a backend ever did emit the breakdown in message_delta,
+		// the later values should win (matches the existing overwrite pattern
+		// for cache_creation_input_tokens / cache_read_input_tokens).
+		const processor = makeProcessor();
+		const noop = async () => undefined;
+
+		processor.push({
+			type: 'message_start',
+			message: {
+				id: 'msg_stream_override',
+				type: 'message',
+				role: 'assistant',
+				content: [],
+				model: 'claude-sonnet-4-20250514',
+				stop_reason: null,
+				stop_sequence: null,
+				usage: {
+					input_tokens: 5,
+					output_tokens: 0,
+					cache_creation_input_tokens: 10000,
+					cache_read_input_tokens: 0,
+					cache_creation: {
+						ephemeral_1h_input_tokens: 0,
+						ephemeral_5m_input_tokens: 10000,
+					},
+				},
+			},
+		}, noop);
+
+		processor.push({
+			type: 'message_delta',
+			delta: { type: 'message_delta', stop_reason: 'end_turn' },
+			usage: {
+				output_tokens: 10,
+				input_tokens: 5,
+				cache_creation_input_tokens: 15000,
+				cache_read_input_tokens: 0,
+				cache_creation: {
+					ephemeral_1h_input_tokens: 5000,
+					ephemeral_5m_input_tokens: 10000,
+				},
+			},
+		}, noop);
+
+		const completion = processor.push({ type: 'message_stop' }, noop);
+		const details = completion!.usage?.prompt_tokens_details;
+		expect(details?.anthropic_cache_creation?.ephemeral_1h_input_tokens).toBe(5000);
+		expect(details?.anthropic_cache_creation?.ephemeral_5m_input_tokens).toBe(10000);
+	});
+});
diff --git a/extensions/copilot/src/platform/networking/common/openai.ts b/extensions/copilot/src/platform/networking/common/openai.ts
@@ -43,6 +43,19 @@ export interface APIUsage {
 	prompt_tokens_details?: {
 		cached_tokens: number;
 		cache_creation_input_tokens?: number;
+		/**
+		 * Anthropic-specific: per-TTL breakdown of cache-creation (write) input
+		 * tokens. Mirrors Anthropic's `usage.cache_creation` object verbatim.
+		 * Only populated for Anthropic Messages API responses where the server
+		 * reports the split; absent for all other providers and for older
+		 * Anthropic responses that don't include the breakdown.
+		 */
+		anthropic_cache_creation?: {
+			/** Cache-creation tokens written with the 1h (extended) TTL — billed at 2x base input rate. */
+			ephemeral_1h_input_tokens?: number;
+			/** Cache-creation tokens written with the default 5m TTL — billed at 1.25x base input rate. */
+			ephemeral_5m_input_tokens?: number;
+		};
 	};
 	/**
 	 * Breakdown of tokens used in a completion.

diff --git a/extensions/copilot/src/platform/review/vscode/reviewServiceImpl.ts b/extensions/copilot/src/platform/review/vscode/reviewServiceImpl.ts
@@ -28,7 +28,7 @@ export class ReviewServiceImpl implements IReviewService {
 	private readonly _repositoryDisposables = new DisposableStore();
 	private _reviewDiffReposString: string | undefined;
 	private _diagnosticCollection: vscode.DiagnosticCollection | undefined;
-	private _commentController = vscode.comments.createCommentController('github-copilot-review', 'Code Review');
+	private _commentController = this._disposables.add(vscode.comments.createCommentController('github-copilot-review', 'Code Review'));
 	private _comments: InternalComment[] = [];
 	private _monitorActiveThread: any | undefined;
 	private _activeThread: vscode.CommentThread | undefined;

diff --git a/extensions/copilot/src/platform/survey/vscode/surveyServiceImpl.ts b/extensions/copilot/src/platform/survey/vscode/surveyServiceImpl.ts
@@ -72,7 +72,7 @@ export class SurveyService implements ISurveyService {
 			this.lastLanguageId = languageId;
 		}
 
-		if (!this.debounceTimeout) {
+		if (this.debounceTimeout === undefined) {
 			this.debounceTimeout = setTimeout(async () => {
 				const eligible = await this.checkEligibility();
 				if (eligible) {

diff --git a/extensions/php-language-features/src/features/validationProvider.ts b/extensions/php-language-features/src/features/validationProvider.ts
@@ -125,6 +125,12 @@ export default class PHPValidationProvider {
 			this.documentListener.dispose();
 			this.documentListener = null;
 		}
+		if (this.delayers) {
+			for (const key in this.delayers) {
+				this.delayers[key].cancel();
+			}
+			this.delayers = undefined;
+		}
 	}
 
 	private async loadConfiguration(): Promise<void> {

diff --git a/src/vs/base/node/zip.ts b/src/vs/base/node/zip.ts
@@ -230,6 +230,7 @@ export function extract(zipPath: string, targetPath: string, options: IExtractOp
 function read(zipPath: string, filePath: string): Promise<Readable> {
 	return openZip(zipPath).then(zipfile => {
 		return new Promise<Readable>((c, e) => {
+			zipfile.once('error', err => e(toExtractError(err)));
 			zipfile.on('entry', (entry: Entry) => {
 				if (entry.fileName === filePath) {
 					openZipStream(zipfile, entry).then(stream => c(stream), err => e(err));