Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ export class ChatMLFetcherTelemetrySender {
"clientPromptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, locally counted", "isMeasurement": true },
"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens, server side counted", "isMeasurement": true },
"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of prompt tokens hitting cache as reported by server", "isMeasurement": true },
"promptCacheCreation1hTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Cache-creation input tokens written with the 1h (extended) TTL, billed at 2x base rate. Only populated when Anthropic reports the cache_creation breakdown.", "isMeasurement": true },
"promptCacheCreation5mTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Cache-creation input tokens written with the default 5m TTL, billed at 1.25x base rate. Only populated when Anthropic reports the cache_creation breakdown.", "isMeasurement": true },
"tokenCountMax": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Maximum generated tokens", "isMeasurement": true },
"tokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of generated tokens", "isMeasurement": true },
"reasoningTokens": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "comment": "Number of reasoning tokens", "isMeasurement": true },
Expand Down Expand Up @@ -227,6 +229,8 @@ export class ChatMLFetcherTelemetrySender {
tokenCountMax: maxResponseTokens,
promptTokenCount: chatCompletion.usage?.prompt_tokens,
promptCacheTokenCount: chatCompletion.usage?.prompt_tokens_details?.cached_tokens,
promptCacheCreation1hTokenCount: chatCompletion.usage?.prompt_tokens_details?.anthropic_cache_creation?.ephemeral_1h_input_tokens,
promptCacheCreation5mTokenCount: chatCompletion.usage?.prompt_tokens_details?.anthropic_cache_creation?.ephemeral_5m_input_tokens,
clientPromptTokenCount: promptTokenCount,
tokenCount: chatCompletion.usage?.total_tokens,
reasoningTokens: chatCompletion.usage?.completion_tokens_details?.reasoning_tokens,
Expand Down
32 changes: 32 additions & 0 deletions extensions/copilot/src/platform/endpoint/node/messagesApi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ interface AnthropicStreamEvent {
output_tokens: number;
cache_creation_input_tokens?: number;
cache_read_input_tokens?: number;
cache_creation?: {
ephemeral_1h_input_tokens?: number;
ephemeral_5m_input_tokens?: number;
};
};
};
index?: number;
Expand All @@ -92,6 +96,10 @@ interface AnthropicStreamEvent {
input_tokens?: number;
cache_creation_input_tokens?: number;
cache_read_input_tokens?: number;
cache_creation?: {
ephemeral_1h_input_tokens?: number;
ephemeral_5m_input_tokens?: number;
};
};
copilot_usage?: {
total_nano_aiu: number;
Expand Down Expand Up @@ -666,6 +674,8 @@ interface AnthropicCompletionState {
readonly inputTokens: number;
readonly outputTokens: number;
readonly cacheCreationTokens: number;
readonly cacheCreation1hTokens: number | undefined;
readonly cacheCreation5mTokens: number | undefined;
readonly cacheReadTokens: number;
readonly requestId: string;
readonly ghRequestId: string;
Expand Down Expand Up @@ -724,6 +734,14 @@ function buildAnthropicCompletion(state: AnthropicCompletionState, logService: I
prompt_tokens_details: {
cached_tokens: state.cacheReadTokens,
cache_creation_input_tokens: state.cacheCreationTokens,
...(state.cacheCreation1hTokens !== undefined || state.cacheCreation5mTokens !== undefined
? {
anthropic_cache_creation: {
...(state.cacheCreation1hTokens !== undefined ? { ephemeral_1h_input_tokens: state.cacheCreation1hTokens } : {}),
...(state.cacheCreation5mTokens !== undefined ? { ephemeral_5m_input_tokens: state.cacheCreation5mTokens } : {}),
},
}
: {}),
},
completion_tokens_details: {
reasoning_tokens: 0,
Expand Down Expand Up @@ -776,6 +794,10 @@ type AnthropicNonStreamingResponse =
output_tokens: number;
cache_creation_input_tokens?: number;
cache_read_input_tokens?: number;
cache_creation?: {
ephemeral_1h_input_tokens?: number;
ephemeral_5m_input_tokens?: number;
};
};
}
| {
Expand Down Expand Up @@ -908,6 +930,8 @@ export async function processNonStreamingResponseFromMessagesEndpoint(
inputTokens: usage?.input_tokens ?? 0,
outputTokens: usage?.output_tokens ?? 0,
cacheCreationTokens: usage?.cache_creation_input_tokens ?? 0,
cacheCreation1hTokens: usage?.cache_creation?.ephemeral_1h_input_tokens,
cacheCreation5mTokens: usage?.cache_creation?.ephemeral_5m_input_tokens,
cacheReadTokens: usage?.cache_read_input_tokens ?? 0,
requestId,
ghRequestId,
Expand Down Expand Up @@ -956,6 +980,8 @@ export class AnthropicMessagesProcessor {
private inputTokens: number = 0;
private outputTokens: number = 0;
private cacheCreationTokens: number = 0;
private cacheCreation1hTokens: number | undefined;
private cacheCreation5mTokens: number | undefined;
private cacheReadTokens: number = 0;
private copilotUsage?: { total_nano_aiu: number };
private contextManagementResponse?: ContextManagementResponse;
Expand Down Expand Up @@ -1036,6 +1062,8 @@ export class AnthropicMessagesProcessor {
this.inputTokens = chunk.message.usage.input_tokens ?? 0;
this.outputTokens = chunk.message.usage.output_tokens ?? 0;
this.cacheCreationTokens = chunk.message.usage.cache_creation_input_tokens ?? 0;
this.cacheCreation1hTokens = chunk.message.usage.cache_creation?.ephemeral_1h_input_tokens ?? this.cacheCreation1hTokens;
this.cacheCreation5mTokens = chunk.message.usage.cache_creation?.ephemeral_5m_input_tokens ?? this.cacheCreation5mTokens;
this.cacheReadTokens = chunk.message.usage.cache_read_input_tokens ?? 0;
}
return;
Expand Down Expand Up @@ -1146,6 +1174,8 @@ export class AnthropicMessagesProcessor {
this.outputTokens = chunk.usage.output_tokens;
this.inputTokens = chunk.usage.input_tokens ?? this.inputTokens;
this.cacheCreationTokens = chunk.usage.cache_creation_input_tokens ?? this.cacheCreationTokens;
this.cacheCreation1hTokens = chunk.usage.cache_creation?.ephemeral_1h_input_tokens ?? this.cacheCreation1hTokens;
this.cacheCreation5mTokens = chunk.usage.cache_creation?.ephemeral_5m_input_tokens ?? this.cacheCreation5mTokens;
this.cacheReadTokens = chunk.usage.cache_read_input_tokens ?? this.cacheReadTokens;
}
if (chunk.copilot_usage && typeof chunk.copilot_usage.total_nano_aiu === 'number') {
Expand Down Expand Up @@ -1239,6 +1269,8 @@ export class AnthropicMessagesProcessor {
inputTokens: this.inputTokens,
outputTokens: this.outputTokens,
cacheCreationTokens: this.cacheCreationTokens,
cacheCreation1hTokens: this.cacheCreation1hTokens,
cacheCreation5mTokens: this.cacheCreation5mTokens,
cacheReadTokens: this.cacheReadTokens,
requestId: this.requestId,
ghRequestId: this.ghRequestId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import { AnthropicMessagesTool, CUSTOM_TOOL_SEARCH_NAME, isExtendedCacheTtlEnabl
import { IChatEndpoint, ICreateEndpointBodyOptions } from '../../../networking/common/networking';
import { IToolDeferralService } from '../../../networking/common/toolDeferralService';
import { createPlatformServices } from '../../../test/node/services';
import { addMessagesApiCacheControl, addToolsAndSystemCacheControl, buildToolInputSchema, clearAllCacheControl, createMessagesRequestBody, processNonStreamingResponseFromMessagesEndpoint, processResponseFromMessagesEndpoint, rawMessagesToMessagesAPI } from '../../node/messagesApi';
import { addMessagesApiCacheControl, addToolsAndSystemCacheControl, AnthropicMessagesProcessor, buildToolInputSchema, clearAllCacheControl, createMessagesRequestBody, processNonStreamingResponseFromMessagesEndpoint, processResponseFromMessagesEndpoint, rawMessagesToMessagesAPI } from '../../node/messagesApi';
import { HeadersImpl, Response } from '../../../networking/common/fetcherService';
import { TelemetryData } from '../../../telemetry/common/telemetryData';
import { TestLogService } from '../../../testing/common/testLogService';
Expand Down Expand Up @@ -1404,6 +1404,81 @@ suite('processNonStreamingResponseFromMessagesEndpoint', () => {
expect(results[0].usage?.prompt_tokens_details?.cached_tokens).toBe(30);
});

test('surfaces 1h/5m cache_creation split when present', async () => {
const response = createNonStreamingResponse({
id: 'msg_cache_ttl',
type: 'message',
role: 'assistant',
content: [{ type: 'text', text: 'cached' }],
model: 'claude-sonnet-4-20250514',
stop_reason: 'end_turn',
usage: {
input_tokens: 50,
output_tokens: 10,
cache_creation_input_tokens: 25,
cache_read_input_tokens: 0,
cache_creation: {
ephemeral_1h_input_tokens: 17,
ephemeral_5m_input_tokens: 8,
},
},
});

const telemetryData = TelemetryData.createAndMarkAsIssued();
const completions = await processNonStreamingResponseFromMessagesEndpoint(
new NullTelemetryService(),
new TestLogService(),
response,
async () => undefined,
telemetryData,
);

const results = [];
for await (const c of completions) {
results.push(c);
}

const details = results[0].usage?.prompt_tokens_details;
expect(details?.cache_creation_input_tokens).toBe(25);
expect(details?.anthropic_cache_creation?.ephemeral_1h_input_tokens).toBe(17);
expect(details?.anthropic_cache_creation?.ephemeral_5m_input_tokens).toBe(8);
});

test('omits 1h/5m split fields when Anthropic does not report them', async () => {
const response = createNonStreamingResponse({
id: 'msg_cache_no_split',
type: 'message',
role: 'assistant',
content: [{ type: 'text', text: 'cached' }],
model: 'claude-sonnet-4-20250514',
stop_reason: 'end_turn',
usage: {
input_tokens: 50,
output_tokens: 10,
cache_creation_input_tokens: 20,
cache_read_input_tokens: 30,
},
});

const telemetryData = TelemetryData.createAndMarkAsIssued();
const completions = await processNonStreamingResponseFromMessagesEndpoint(
new NullTelemetryService(),
new TestLogService(),
response,
async () => undefined,
telemetryData,
);

const results = [];
for await (const c of completions) {
results.push(c);
}

const details = results[0].usage?.prompt_tokens_details;
expect(details?.cache_creation_input_tokens).toBe(20);
expect(details?.anthropic_cache_creation).toBeUndefined();
});

test('rejects on malformed JSON', async () => {
const response = Response.fromText(200, 'OK', createNonStreamingHeaders(), 'not json at all', 'node-fetch');
const telemetryData = TelemetryData.createAndMarkAsIssued();
Expand Down Expand Up @@ -1555,3 +1630,121 @@ suite('processResponseFromMessagesEndpoint routing', () => {
expect(results[0].message.content).toHaveLength(1);
});
});

suite('AnthropicMessagesProcessor streaming cache_creation', () => {
function makeProcessor(): AnthropicMessagesProcessor {
return new AnthropicMessagesProcessor(
TelemetryData.createAndMarkAsIssued(),
'req-1',
'gh-req-1',
'',
new TestLogService(),
new NullTelemetryService(),
);
}

test('message_start cache_creation survives a message_delta that omits the breakdown', () => {
// Production happy path: Anthropic only emits the cache_creation breakdown
// in message_start. message_delta updates other usage fields but typically
// has no cache_creation. The ?? fallback in the processor must preserve
// the values seen in message_start — including 0 (a common control-arm
// value) which would be wiped out by a `||` regression.
const processor = makeProcessor();
const noop = async () => undefined;

processor.push({
type: 'message_start',
message: {
id: 'msg_stream',
type: 'message',
role: 'assistant',
content: [],
model: 'claude-sonnet-4-20250514',
stop_reason: null,
stop_sequence: null,
usage: {
input_tokens: 5,
output_tokens: 0,
cache_creation_input_tokens: 12336,
cache_read_input_tokens: 391352,
cache_creation: {
ephemeral_1h_input_tokens: 0,
ephemeral_5m_input_tokens: 12336,
},
},
},
}, noop);

// message_delta with usage but no cache_creation breakdown — mirrors
// what every observed backend (Anthropic 1P, Bedrock, Vertex) emits in
// the final delta of a stream.
processor.push({
type: 'message_delta',
delta: { type: 'message_delta', stop_reason: 'end_turn' },
usage: {
output_tokens: 42,
input_tokens: 5,
cache_creation_input_tokens: 12336,
cache_read_input_tokens: 391352,
},
}, noop);

const completion = processor.push({ type: 'message_stop' }, noop);
expect(completion).toBeDefined();

const details = completion!.usage?.prompt_tokens_details;
expect(details?.anthropic_cache_creation?.ephemeral_1h_input_tokens).toBe(0);
expect(details?.anthropic_cache_creation?.ephemeral_5m_input_tokens).toBe(12336);
});

test('message_delta cache_creation overrides message_start values', () => {
// Defensive: if a backend ever did emit the breakdown in message_delta,
// the later values should win (matches the existing overwrite pattern
// for cache_creation_input_tokens / cache_read_input_tokens).
const processor = makeProcessor();
const noop = async () => undefined;

processor.push({
type: 'message_start',
message: {
id: 'msg_stream_override',
type: 'message',
role: 'assistant',
content: [],
model: 'claude-sonnet-4-20250514',
stop_reason: null,
stop_sequence: null,
usage: {
input_tokens: 5,
output_tokens: 0,
cache_creation_input_tokens: 10000,
cache_read_input_tokens: 0,
cache_creation: {
ephemeral_1h_input_tokens: 0,
ephemeral_5m_input_tokens: 10000,
},
},
},
}, noop);

processor.push({
type: 'message_delta',
delta: { type: 'message_delta', stop_reason: 'end_turn' },
usage: {
output_tokens: 10,
input_tokens: 5,
cache_creation_input_tokens: 15000,
cache_read_input_tokens: 0,
cache_creation: {
ephemeral_1h_input_tokens: 5000,
ephemeral_5m_input_tokens: 10000,
},
},
}, noop);

const completion = processor.push({ type: 'message_stop' }, noop);
const details = completion!.usage?.prompt_tokens_details;
expect(details?.anthropic_cache_creation?.ephemeral_1h_input_tokens).toBe(5000);
expect(details?.anthropic_cache_creation?.ephemeral_5m_input_tokens).toBe(10000);
});
});
13 changes: 13 additions & 0 deletions extensions/copilot/src/platform/networking/common/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,19 @@ export interface APIUsage {
prompt_tokens_details?: {
cached_tokens: number;
cache_creation_input_tokens?: number;
/**
* Anthropic-specific: per-TTL breakdown of cache-creation (write) input
* tokens. Mirrors Anthropic's `usage.cache_creation` object verbatim.
* Only populated for Anthropic Messages API responses where the server
* reports the split; absent for all other providers and for older
* Anthropic responses that don't include the breakdown.
*/
anthropic_cache_creation?: {
/** Cache-creation tokens written with the 1h (extended) TTL — billed at 2x base input rate. */
ephemeral_1h_input_tokens?: number;
/** Cache-creation tokens written with the default 5m TTL — billed at 1.25x base input rate. */
ephemeral_5m_input_tokens?: number;
};
};
/**
* Breakdown of tokens used in a completion.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export class ReviewServiceImpl implements IReviewService {
private readonly _repositoryDisposables = new DisposableStore();
private _reviewDiffReposString: string | undefined;
private _diagnosticCollection: vscode.DiagnosticCollection | undefined;
private _commentController = vscode.comments.createCommentController('github-copilot-review', 'Code Review');
private _commentController = this._disposables.add(vscode.comments.createCommentController('github-copilot-review', 'Code Review'));
private _comments: InternalComment[] = [];
private _monitorActiveThread: any | undefined;
private _activeThread: vscode.CommentThread | undefined;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ export class SurveyService implements ISurveyService {
this.lastLanguageId = languageId;
}

if (!this.debounceTimeout) {
if (this.debounceTimeout === undefined) {
this.debounceTimeout = setTimeout(async () => {
const eligible = await this.checkEligibility();
if (eligible) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,12 @@ export default class PHPValidationProvider {
this.documentListener.dispose();
this.documentListener = null;
}
if (this.delayers) {
for (const key in this.delayers) {
this.delayers[key].cancel();
}
this.delayers = undefined;
}
}

private async loadConfiguration(): Promise<void> {
Expand Down
1 change: 1 addition & 0 deletions src/vs/base/node/zip.ts
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ export function extract(zipPath: string, targetPath: string, options: IExtractOp
function read(zipPath: string, filePath: string): Promise<Readable> {
return openZip(zipPath).then(zipfile => {
return new Promise<Readable>((c, e) => {
zipfile.once('error', err => e(toExtractError(err)));
zipfile.on('entry', (entry: Entry) => {
if (entry.fileName === filePath) {
openZipStream(zipfile, entry).then(stream => c(stream), err => e(err));
Expand Down
Loading
Loading