diff --git a/extensions/copilot/src/extension/chat/vscode-node/chatDebugFileLoggerService.ts b/extensions/copilot/src/extension/chat/vscode-node/chatDebugFileLoggerService.ts index 1f3b6fc918313..0c62c70c83159 100644 --- a/extensions/copilot/src/extension/chat/vscode-node/chatDebugFileLoggerService.ts +++ b/extensions/copilot/src/extension/chat/vscode-node/chatDebugFileLoggerService.ts @@ -965,6 +965,9 @@ export class ChatDebugFileLoggerService extends Disposable implements IChatDebug ...(span.attributes[CopilotChatAttr.REQUEST_SHAPE] !== undefined ? { requestShape: String(span.attributes[CopilotChatAttr.REQUEST_SHAPE]) } : {}), + ...(span.attributes[CopilotChatAttr.COPILOT_USAGE_NANO_AIU] !== undefined + ? { copilotUsageNanoAiu: asNumber(span.attributes[CopilotChatAttr.COPILOT_USAGE_NANO_AIU]) } + : {}), ...(isError && span.status.message ? { error: span.status.message } : {}), }, }; diff --git a/extensions/copilot/src/extension/chatInputNotification/vscode-node/chatInputNotification.contribution.ts b/extensions/copilot/src/extension/chatInputNotification/vscode-node/chatInputNotification.contribution.ts index c4596ddcf50b0..379da10e620fe 100644 --- a/extensions/copilot/src/extension/chatInputNotification/vscode-node/chatInputNotification.contribution.ts +++ b/extensions/copilot/src/extension/chatInputNotification/vscode-node/chatInputNotification.contribution.ts @@ -37,6 +37,8 @@ export class ChatInputNotificationContribution extends Disposable { private _notification: vscode.ChatInputNotification | undefined; /** Tracks whether the current notification is the quota-exhausted variant. */ private _showingExhausted = false; + /** Whether a copilot token was present on the last {@link _update} call. */ + private _hadCopilotToken = false; private readonly _shownQuotaThresholds = new Set(); private readonly _shownSessionThresholds = new Set(); @@ -56,6 +58,20 @@ export class ChatInputNotificationContribution extends Disposable { * to show (or whether to hide). */ private _update(): void { + const hasCopilotToken = !!this._authService.copilotToken; + const wasSignedIn = this._hadCopilotToken; + this._hadCopilotToken = hasCopilotToken; + + // Detect signed-in → signed-out transition: clear thresholds and hide. + if (wasSignedIn && !hasCopilotToken) { + this._shownQuotaThresholds.clear(); + this._shownSessionThresholds.clear(); + this._shownWeeklyThresholds.clear(); + this._hideNotification(); + this._showingExhausted = false; + return; + } + // Priority 1: Quota exhausted — sticky info notification if (this._chatQuotaService.quotaExhausted) { this._showExhaustedNotification(); @@ -202,20 +218,18 @@ export class ChatInputNotificationContribution extends Disposable { const isFree = !!this._authService.copilotToken?.isFreeUser; if (isAnonymous || isFree) { - notification.description = vscode.l10n.t("You're getting the most out of Copilot."); + notification.description = vscode.l10n.t('Upgrade to continue past the limit.'); notification.actions = [ - { label: vscode.l10n.t('View Usage'), commandId: 'workbench.action.chat.openCopilotStatus' }, { label: vscode.l10n.t('Upgrade'), commandId: 'workbench.action.chat.upgradePlan' }, ]; } else if (this._chatQuotaService.additionalUsageEnabled) { - notification.description = vscode.l10n.t('Your additional budget will keep Copilot going.'); + notification.description = vscode.l10n.t('Additional budget is covering extra usage.'); notification.actions = [ { label: vscode.l10n.t('View Usage'), commandId: 'workbench.action.chat.openCopilotStatus' }, ]; } else { - notification.description = vscode.l10n.t('Manage your budget to keep going.'); + notification.description = vscode.l10n.t('Set additional budget to cover extra usage.'); notification.actions = [ - { label: vscode.l10n.t('View Usage'), commandId: 'workbench.action.chat.openCopilotStatus' }, { label: vscode.l10n.t('Manage Budget'), commandId: 'workbench.action.chat.manageAdditionalSpend' }, ]; } diff --git a/extensions/copilot/src/extension/chatInputNotification/vscode-node/test/chatInputNotification.contribution.spec.ts b/extensions/copilot/src/extension/chatInputNotification/vscode-node/test/chatInputNotification.contribution.spec.ts new file mode 100644 index 0000000000000..69575029019c4 --- /dev/null +++ b/extensions/copilot/src/extension/chatInputNotification/vscode-node/test/chatInputNotification.contribution.spec.ts @@ -0,0 +1,365 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { afterEach, beforeEach, describe, expect, test, vi } from 'vitest'; +import { Emitter } from '../../../../util/vs/base/common/event'; +import { IAuthenticationService } from '../../../../platform/authentication/common/authentication'; +import { IChatQuota, IChatQuotaService } from '../../../../platform/chat/common/chatQuotaService'; + +// ---- vscode mock ----------------------------------------------------------- + +const mockNotification = { + severity: 0, + dismissible: false, + autoDismissOnMessage: false, + message: '', + description: '', + actions: [] as { label: string; commandId: string }[], + show: vi.fn(), + hide: vi.fn(), + dispose: vi.fn(), +}; + +vi.mock('vscode', () => ({ + ChatInputNotificationSeverity: { Info: 1 }, + chat: { + createInputNotification: vi.fn(() => mockNotification), + }, + l10n: { t: (str: string, ...args: unknown[]) => str.replace(/\{(\d+)\}/g, (_, i) => String(args[Number(i)])) }, +})); + +import { ChatInputNotificationContribution } from '../chatInputNotification.contribution'; + +// ---- helpers --------------------------------------------------------------- + +function createAuthService(opts?: { anyGitHubSession?: unknown; copilotToken?: unknown }) { + const emitter = new Emitter(); + const hasSession = opts && 'anyGitHubSession' in opts; + const hasToken = opts && 'copilotToken' in opts; + const authService = { + _serviceBrand: undefined, + anyGitHubSession: hasSession ? opts.anyGitHubSession : { accessToken: 'tok' }, + copilotToken: hasToken ? opts.copilotToken : { isFreeUser: false, isNoAuthUser: false }, + onDidAuthenticationChange: emitter.event, + } as unknown as IAuthenticationService; + return { authService, emitter }; +} + +function makeQuota(percentRemaining: number, opts?: Partial): IChatQuota { + return { + quota: 100, + percentRemaining, + unlimited: false, + additionalUsageUsed: 0, + additionalUsageEnabled: false, + resetDate: new Date('2026-06-01T00:00:00Z'), + ...opts, + }; +} + +function createQuotaService(opts?: { + quotaExhausted?: boolean; + quotaInfo?: IChatQuota; + session?: IChatQuota; + weekly?: IChatQuota; + additionalUsageEnabled?: boolean; +}) { + const emitter = new Emitter(); + const quotaService = { + _serviceBrand: undefined, + onDidChange: emitter.event, + quotaExhausted: opts?.quotaExhausted ?? false, + quotaInfo: opts?.quotaInfo, + rateLimitInfo: { session: opts?.session, weekly: opts?.weekly }, + additionalUsageEnabled: opts?.additionalUsageEnabled ?? false, + getCreditsForTurn: () => undefined, + processQuotaHeaders: vi.fn(), + processQuotaSnapshots: vi.fn(), + setLastCopilotUsage: vi.fn(), + resetTurnCredits: vi.fn(), + clearQuota: vi.fn(), + } as unknown as IChatQuotaService; + return { quotaService, emitter }; +} + +// ---- tests ----------------------------------------------------------------- + +describe('ChatInputNotificationContribution', () => { + let authEmitter: Emitter; + let authService: IAuthenticationService; + let quotaEmitter: Emitter; + let quotaService: IChatQuotaService; + let contribution: ChatInputNotificationContribution; + + function setup(authOpts?: Parameters[0], quotaOpts?: Parameters[0]) { + const auth = createAuthService(authOpts); + const quota = createQuotaService(quotaOpts); + authEmitter = auth.emitter; + authService = auth.authService; + quotaEmitter = quota.emitter; + quotaService = quota.quotaService; + contribution = new ChatInputNotificationContribution(authService, quotaService); + } + + beforeEach(() => { + vi.clearAllMocks(); + mockNotification.show.mockClear(); + mockNotification.hide.mockClear(); + mockNotification.message = ''; + mockNotification.description = ''; + mockNotification.actions = []; + }); + + afterEach(() => { + contribution?.dispose(); + }); + + // --- sign-out behaviour (the PR change) --------------------------------- + + describe('sign-out clears state and hides notification', () => { + test('hides notification when copilot token disappears (sign out)', () => { + setup( + {}, + { quotaExhausted: true }, + ); + + // Trigger _update with exhausted quota → shows notification + quotaEmitter.fire(); + expect(mockNotification.show).toHaveBeenCalled(); + + // User signs out — copilot token cleared + (authService as any).copilotToken = undefined; + authEmitter.fire(); + + expect(mockNotification.hide).toHaveBeenCalled(); + }); + + test('re-shows threshold notification after sign-out + sign-in', () => { + setup( + {}, + { quotaInfo: makeQuota(5) }, // 95% used → crosses 95 threshold + ); + + // First update: threshold notification shown + quotaEmitter.fire(); + expect(mockNotification.show).toHaveBeenCalledTimes(1); + mockNotification.show.mockClear(); + + // Fire again — same threshold already shown, no new notification + quotaEmitter.fire(); + expect(mockNotification.show).not.toHaveBeenCalled(); + + // Sign out → copilot token cleared → thresholds cleared + (authService as any).copilotToken = undefined; + authEmitter.fire(); + + // Sign back in + (authService as any).copilotToken = { isFreeUser: false, isNoAuthUser: false }; + authEmitter.fire(); + + // Threshold was cleared, so it should re-show + expect(mockNotification.show).toHaveBeenCalled(); + }); + + test('sign-out resets showingExhausted flag', () => { + setup( + {}, + { quotaExhausted: true }, + ); + + // Show exhausted notification + quotaEmitter.fire(); + expect(mockNotification.show).toHaveBeenCalled(); + mockNotification.show.mockClear(); + + // Sign out — copilot token cleared + (authService as any).copilotToken = undefined; + authEmitter.fire(); + + // Sign back in, quota no longer exhausted + (authService as any).copilotToken = { isFreeUser: false, isNoAuthUser: false }; + (quotaService as any).quotaExhausted = false; + (quotaService as any).quotaInfo = undefined; + (quotaService as any).rateLimitInfo = { session: undefined, weekly: undefined }; + authEmitter.fire(); + + // Should NOT call hide again (showingExhausted was reset on sign-out) + // and should NOT show a new notification (no thresholds crossed) + expect(mockNotification.show).not.toHaveBeenCalled(); + }); + + test('sign-out while no notification was active is harmless', () => { + setup(); + + // No quota events fired yet → no notification created + (authService as any).copilotToken = undefined; + authEmitter.fire(); + + // hide is only called on the notification object; since none was + // created, this should not throw. + expect(mockNotification.hide).not.toHaveBeenCalled(); + }); + + test('anonymous user with no GitHub session still sees quota notifications', () => { + setup( + { anyGitHubSession: undefined, copilotToken: { isNoAuthUser: true, isFreeUser: false } }, + { quotaExhausted: true }, + ); + + // Anonymous user has a copilotToken but no GitHub session. + // They should still see the exhausted notification. + quotaEmitter.fire(); + + expect(mockNotification.show).toHaveBeenCalled(); + expect(mockNotification.message).toBe('Credit Limit Reached'); + expect(mockNotification.description).toBe('Sign in to keep going.'); + }); + }); + + // --- basic notification lifecycle ---------------------------------------- + + describe('quota exhausted', () => { + test('shows exhausted notification', () => { + setup( + { copilotToken: { isFreeUser: true, isNoAuthUser: false } }, + { quotaExhausted: true }, + ); + + quotaEmitter.fire(); + + expect(mockNotification.show).toHaveBeenCalled(); + expect(mockNotification.message).toBe('Credit Limit Reached'); + }); + + test('hides exhausted when quota is no longer exhausted', () => { + setup( + { anyGitHubSession: { accessToken: 'tok' } }, + { quotaExhausted: true }, + ); + + quotaEmitter.fire(); + expect(mockNotification.show).toHaveBeenCalled(); + + // Quota replenished + (quotaService as any).quotaExhausted = false; + quotaEmitter.fire(); + + expect(mockNotification.hide).toHaveBeenCalled(); + }); + }); + + describe('quota approaching threshold', () => { + test('shows warning at 50% used', () => { + setup( + { anyGitHubSession: { accessToken: 'tok' } }, + { quotaInfo: makeQuota(50) }, // 50% used + ); + + quotaEmitter.fire(); + + expect(mockNotification.show).toHaveBeenCalled(); + expect(mockNotification.message).toBe('Credits at 50%'); + }); + + test('does not re-show the same threshold', () => { + setup( + { anyGitHubSession: { accessToken: 'tok' } }, + { quotaInfo: makeQuota(50) }, + ); + + quotaEmitter.fire(); + expect(mockNotification.show).toHaveBeenCalledTimes(1); + + mockNotification.show.mockClear(); + quotaEmitter.fire(); + expect(mockNotification.show).not.toHaveBeenCalled(); + }); + + test('shows higher threshold when usage increases', () => { + setup( + { anyGitHubSession: { accessToken: 'tok' } }, + { quotaInfo: makeQuota(50) }, // 50% used + ); + + quotaEmitter.fire(); + expect(mockNotification.show).toHaveBeenCalledTimes(1); + + mockNotification.show.mockClear(); + (quotaService as any).quotaInfo = makeQuota(10); // 90% used + quotaEmitter.fire(); + + expect(mockNotification.show).toHaveBeenCalled(); + expect(mockNotification.message).toBe('Credits at 90%'); + }); + }); + + describe('rate limit warning', () => { + test('shows session rate limit warning', () => { + setup( + { anyGitHubSession: { accessToken: 'tok' } }, + { session: makeQuota(25) }, // 75% used + ); + + quotaEmitter.fire(); + + expect(mockNotification.show).toHaveBeenCalled(); + expect(mockNotification.message).toContain('75%'); + expect(mockNotification.message).toContain('session'); + }); + + test('shows weekly rate limit warning', () => { + setup( + { anyGitHubSession: { accessToken: 'tok' } }, + { weekly: makeQuota(10) }, // 90% used + ); + + quotaEmitter.fire(); + + expect(mockNotification.show).toHaveBeenCalled(); + expect(mockNotification.message).toContain('90%'); + expect(mockNotification.message).toContain('weekly'); + }); + }); + + describe('priority ordering', () => { + test('exhausted takes priority over threshold warning', () => { + setup( + { anyGitHubSession: { accessToken: 'tok' } }, + { quotaExhausted: true, quotaInfo: makeQuota(5) }, + ); + + quotaEmitter.fire(); + + expect(mockNotification.message).toBe('Credit Limit Reached'); + }); + + test('threshold warning takes priority over rate limit', () => { + setup( + { anyGitHubSession: { accessToken: 'tok' } }, + { quotaInfo: makeQuota(10), session: makeQuota(25) }, // 90% quota, 75% session + ); + + quotaEmitter.fire(); + + expect(mockNotification.message).toBe('Credits at 90%'); + }); + }); + + describe('never-signed-in user still gets notifications', () => { + test('shows exhausted notification even with no copilot token initially', () => { + setup( + { copilotToken: undefined }, + { quotaExhausted: true, quotaInfo: makeQuota(5) }, + ); + + quotaEmitter.fire(); + + // User was never signed in, so no transition occurred — + // notifications should still flow through normally. + expect(mockNotification.show).toHaveBeenCalled(); + expect(mockNotification.message).toBe('Credit Limit Reached'); + }); + }); +}); diff --git a/extensions/copilot/src/extension/chatSessions/claude/common/claudeSessionStateService.ts b/extensions/copilot/src/extension/chatSessions/claude/common/claudeSessionStateService.ts index 43c9ecdd0c544..30318d5352e1f 100644 --- a/extensions/copilot/src/extension/chatSessions/claude/common/claudeSessionStateService.ts +++ b/extensions/copilot/src/extension/chatSessions/claude/common/claudeSessionStateService.ts @@ -25,6 +25,7 @@ export interface SessionState { usageHandler: UsageHandler | undefined; reasoningEffort: EffortLevel | undefined; traceContext: TraceContext | undefined; + turnId: string | undefined; } /** @@ -114,6 +115,16 @@ export interface IClaudeSessionStateService { * Sets the OTel trace context for a session. */ setTraceContextForSession(sessionId: string, traceContext: TraceContext | undefined): void; + + /** + * Gets the current turn ID for a session (VS Code request ID, used for per-turn credit tracking). + */ + getTurnIdForSession(sessionId: string): string | undefined; + + /** + * Sets the current turn ID for a session. + */ + setTurnIdForSession(sessionId: string, turnId: string | undefined): void; } export const IClaudeSessionStateService = createServiceIdentifier('IClaudeSessionStateService'); diff --git a/extensions/copilot/src/extension/chatSessions/claude/node/claudeLanguageModelServer.ts b/extensions/copilot/src/extension/chatSessions/claude/node/claudeLanguageModelServer.ts index 5e44813f929a2..f876a5da9b9a5 100644 --- a/extensions/copilot/src/extension/chatSessions/claude/node/claudeLanguageModelServer.ts +++ b/extensions/copilot/src/extension/chatSessions/claude/node/claudeLanguageModelServer.ts @@ -228,7 +228,8 @@ export class ClaudeLanguageModelServer extends Disposable { finishedCb: async () => undefined, location: ChatLocation.MessagesProxy, modelCapabilities: { enableThinking: true, reasoningEffort }, - userInitiatedRequest: isUserInitiatedMessage + userInitiatedRequest: isUserInitiatedMessage, + turnId: sessionId ? this.sessionStateService.getTurnIdForSession(sessionId) : undefined, }, tokenSource.token); // Wrap in trace context so chat spans are parented to the invoke_agent span diff --git a/extensions/copilot/src/extension/chatSessions/claude/node/claudeSessionStateService.ts b/extensions/copilot/src/extension/chatSessions/claude/node/claudeSessionStateService.ts index f54975c4ff79f..9965e38b6173e 100644 --- a/extensions/copilot/src/extension/chatSessions/claude/node/claudeSessionStateService.ts +++ b/extensions/copilot/src/extension/chatSessions/claude/node/claudeSessionStateService.ts @@ -48,6 +48,7 @@ export class ClaudeSessionStateService extends Disposable implements IClaudeSess usageHandler: existing?.usageHandler, reasoningEffort: existing?.reasoningEffort, traceContext: existing?.traceContext, + turnId: existing?.turnId, }); this._onDidChangeSessionState.fire({ sessionId, modelId }); } @@ -69,6 +70,7 @@ export class ClaudeSessionStateService extends Disposable implements IClaudeSess usageHandler: existing?.usageHandler, reasoningEffort: existing?.reasoningEffort, traceContext: existing?.traceContext, + turnId: existing?.turnId, }); this._onDidChangeSessionState.fire({ sessionId, permissionMode: mode }); } @@ -87,6 +89,7 @@ export class ClaudeSessionStateService extends Disposable implements IClaudeSess usageHandler: existing?.usageHandler, reasoningEffort: existing?.reasoningEffort, traceContext: existing?.traceContext, + turnId: existing?.turnId, }); } @@ -107,6 +110,7 @@ export class ClaudeSessionStateService extends Disposable implements IClaudeSess usageHandler: existing?.usageHandler, reasoningEffort: existing?.reasoningEffort, traceContext: existing?.traceContext, + turnId: existing?.turnId, }); this._onDidChangeSessionState.fire({ sessionId, folderInfo }); } @@ -125,6 +129,7 @@ export class ClaudeSessionStateService extends Disposable implements IClaudeSess usageHandler: handler, reasoningEffort: existing?.reasoningEffort, traceContext: existing?.traceContext, + turnId: existing?.turnId, }); } @@ -145,6 +150,7 @@ export class ClaudeSessionStateService extends Disposable implements IClaudeSess usageHandler: existing?.usageHandler, reasoningEffort: effort, traceContext: existing?.traceContext, + turnId: existing?.turnId, }); } @@ -162,6 +168,25 @@ export class ClaudeSessionStateService extends Disposable implements IClaudeSess usageHandler: existing?.usageHandler, reasoningEffort: existing?.reasoningEffort, traceContext, + turnId: existing?.turnId, + }); + } + + getTurnIdForSession(sessionId: string): string | undefined { + return this._sessionState.get(sessionId)?.turnId; + } + + setTurnIdForSession(sessionId: string, turnId: string | undefined): void { + const existing = this._sessionState.get(sessionId); + this._sessionState.set(sessionId, { + modelId: existing?.modelId, + permissionMode: existing?.permissionMode ?? 'acceptEdits', + capturingToken: existing?.capturingToken, + folderInfo: existing?.folderInfo, + usageHandler: existing?.usageHandler, + reasoningEffort: existing?.reasoningEffort, + traceContext: existing?.traceContext, + turnId, }); } diff --git a/extensions/copilot/src/extension/chatSessions/copilotcli/node/copilotcliSession.ts b/extensions/copilot/src/extension/chatSessions/copilotcli/node/copilotcliSession.ts index f9bd738f4d78f..d32cff99a2e64 100644 --- a/extensions/copilot/src/extension/chatSessions/copilotcli/node/copilotcliSession.ts +++ b/extensions/copilot/src/extension/chatSessions/copilotcli/node/copilotcliSession.ts @@ -10,6 +10,7 @@ import * as crypto from 'crypto'; import type * as vscode from 'vscode'; import type { ChatParticipantToolToken } from 'vscode'; import { IAuthenticationService } from '../../../../platform/authentication/common/authentication'; +import { IChatQuotaService } from '../../../../platform/chat/common/chatQuotaService'; import { ConfigKey, IConfigurationService } from '../../../../platform/configuration/common/configurationService'; import { PermissiveAuthRequiredError } from '../../../../platform/github/common/githubService'; import { ILogService } from '../../../../platform/log/common/logService'; @@ -805,6 +806,7 @@ export class CopilotCLISession extends DisposableStore implements ICopilotCLISes @IOTelService private readonly _otelService: IOTelService, @IGitService private readonly _gitService: IGitService, @IAuthenticationService private readonly _authenticationService: IAuthenticationService, + @IChatQuotaService private readonly _chatQuotaService: IChatQuotaService, ) { super(); this.sessionId = _sdkSession.sessionId; @@ -1010,6 +1012,7 @@ export class CopilotCLISession extends DisposableStore implements ICopilotCLISes const prompt = getPromptLabel(input); this._pendingPrompt = prompt; this._lastResponseModelId = undefined; + this._chatQuotaService.resetTurnCredits(request.id); this.logService.info(`[CopilotCLISession] Invoking session ${this.sessionId}`); const disposables = new DisposableStore(); const logStartTime = Date.now(); @@ -1272,6 +1275,14 @@ export class CopilotCLISession extends DisposableStore implements ICopilotCLISes if (requestStream && typeof event.data.outputTokens === 'number' && typeof event.data.inputTokens === 'number') { reportUsage(event.data.inputTokens, event.data.outputTokens); } + // Accumulate per-turn credits from SDK copilotUsage data + const copilotUsage = (event.data as Record).copilotUsage; + if (copilotUsage && typeof copilotUsage === 'object') { + const { totalNanoAiu } = copilotUsage as { totalNanoAiu?: number }; + if (typeof totalNanoAiu === 'number') { + this._chatQuotaService.setLastCopilotUsage(totalNanoAiu, request.id); + } + } }))); disposables.add(toDisposable(this._sdkSession.on('session.usage_info', (event) => { lastUsageInfo = { diff --git a/extensions/copilot/src/extension/chatSessions/copilotcli/node/copilotcliSessionService.ts b/extensions/copilot/src/extension/chatSessions/copilotcli/node/copilotcliSessionService.ts index cb99bb311f8b5..904bde416b412 100644 --- a/extensions/copilot/src/extension/chatSessions/copilotcli/node/copilotcliSessionService.ts +++ b/extensions/copilot/src/extension/chatSessions/copilotcli/node/copilotcliSessionService.ts @@ -54,6 +54,7 @@ import { INTEGRATION_ID } from '../../../../platform/endpoint/common/licenseAgre const COPILOT_CLI_WORKSPACE_JSON_FILE_KEY = 'github.copilot.cli.workspaceSessionFile'; const AUTO_MODE_REFRESH_LEAD_TIME_MS = 300 * 1000; +export const COPILOT_CLI_CHAT_PANEL_SYSTEM_MESSAGE = 'You are an AI assistant using Copilot CLI runtime in VS Code. You help users with software engineering tasks. When asked about your identity, you must state that you are an AI assistant using Copilot CLI runtime in VS Code.'; type SDKPackage = Awaited>; type AutoModeResolveArgs = Parameters[0]; @@ -890,7 +891,18 @@ export class CopilotCLISessionService extends Disposable implements ICopilotCLIS ]); const customAgents = agentInfos.map(i => i.agent); const variablesContext = this._promptVariablesService.buildTemplateVariablesContext(options.sessionId, options.debugTargetSessionIds); - const systemMessage = variablesContext ? { mode: 'append' as const, content: variablesContext } : undefined; + const systemMessage: NonNullable = { + mode: 'customize', + sections: { + identity: { + action: 'replace', + content: COPILOT_CLI_CHAT_PANEL_SYSTEM_MESSAGE, + }, + }, + }; + if (variablesContext) { + systemMessage.content = variablesContext; + } const allOptions: SessionOptions = { clientName: 'vscode', @@ -924,9 +936,7 @@ export class CopilotCLISessionService extends Disposable implements ICopilotCLIS if (copilotUrl) { allOptions.copilotUrl = copilotUrl; } - if (systemMessage) { - allOptions.systemMessage = systemMessage; - } + allOptions.systemMessage = systemMessage; allOptions.sessionCapabilities = new Set(['plan-mode', 'memory', 'cli-documentation', 'ask-user', 'interactive-mode', 'system-notifications']); if (options.reasoningEffort && this.configurationService.getConfig(ConfigKey.Advanced.CLIThinkingEffortEnabled)) { allOptions.reasoningEffort = options.reasoningEffort; diff --git a/extensions/copilot/src/extension/chatSessions/copilotcli/node/test/copilotCliSessionService.spec.ts b/extensions/copilot/src/extension/chatSessions/copilotcli/node/test/copilotCliSessionService.spec.ts index d4030a8938e94..474e62fb1ce4c 100644 --- a/extensions/copilot/src/extension/chatSessions/copilotcli/node/test/copilotCliSessionService.spec.ts +++ b/extensions/copilot/src/extension/chatSessions/copilotcli/node/test/copilotCliSessionService.spec.ts @@ -39,7 +39,7 @@ import { IChatDelegationSummaryService } from '../../common/delegationSummarySer import { getCopilotCLISessionDir } from '../cliHelpers'; import { ICopilotCLISDK } from '../copilotCli'; import { CopilotCLISession, ICopilotCLISession } from '../copilotcliSession'; -import { CopilotCLISessionService, CopilotCLISessionWorkspaceTracker, ICopilotCLISessionItem } from '../copilotcliSessionService'; +import { COPILOT_CLI_CHAT_PANEL_SYSTEM_MESSAGE, CopilotCLISessionService, CopilotCLISessionWorkspaceTracker, ICopilotCLISessionItem } from '../copilotcliSessionService'; import { CopilotCLIMCPHandler } from '../mcpHandler'; import { MissionControlApiClient } from '../missionControlApiClient'; import { IQuestion, IQuestionAnswer, IUserQuestionHandler } from '../userInputHelpers'; @@ -160,7 +160,7 @@ describe('CopilotCLISessionService', () => { deleteSession: vi.fn(async () => { }), }; } - return disposables.add(new CopilotCLISession(workspaceInfo, agentName, sdkSession, [], logService, workspaceService, new MockChatSessionMetadataStore(), instantiationService, new NullRequestLogger(), new NullICopilotCLIImageSupport(), new FakeToolsService(), new FakeUserQuestionHandler(), accessor.get(IConfigurationService), new NoopOTelService(resolveOTelConfig({ env: {}, extensionVersion: '0.0.0', sessionId: 'test' })), new MockGitService(), { _serviceBrand: undefined } as any)); + return disposables.add(new CopilotCLISession(workspaceInfo, agentName, sdkSession, [], logService, workspaceService, new MockChatSessionMetadataStore(), instantiationService, new NullRequestLogger(), new NullICopilotCLIImageSupport(), new FakeToolsService(), new FakeUserQuestionHandler(), accessor.get(IConfigurationService), new NoopOTelService(resolveOTelConfig({ env: {}, extensionVersion: '0.0.0', sessionId: 'test' })), new MockGitService(), { _serviceBrand: undefined } as any, { _serviceBrand: undefined, resetTurnCredits() { }, getCreditsForTurn() { return undefined; }, setLastCopilotUsage() { } } as any)); } } as unknown as IInstantiationService; const configurationService = accessor.get(IConfigurationService); @@ -250,6 +250,43 @@ describe('CopilotCLISessionService', () => { })); }); + it('passes the VS Code Copilot CLI identity system message to session manager', async () => { + const createSessionSpy = vi.spyOn(manager, 'createSession'); + await service.createSession({ model: 'gpt-test', ...sessionOptionsFor(URI.file('/tmp')) }, CancellationToken.None); + + const callArgs = createSessionSpy.mock.calls[0][0]; + expect(callArgs.systemMessage).toEqual({ + mode: 'customize', + sections: { + identity: { + action: 'replace', + content: COPILOT_CLI_CHAT_PANEL_SYSTEM_MESSAGE, + }, + }, + }); + }); + + it('preserves prompt variable context separately from the VS Code Copilot CLI identity system message', async () => { + const variableContext = 'Resolved template variables are available here.'; + vi.spyOn(NullPromptVariablesService.prototype, 'buildTemplateVariablesContext').mockReturnValue(variableContext); + const createSessionSpy = vi.spyOn(manager, 'createSession'); + await service.createSession({ model: 'gpt-test', ...sessionOptionsFor(URI.file('/tmp')) }, CancellationToken.None); + + const systemMessage = createSessionSpy.mock.calls[0][0].systemMessage; + expect(systemMessage?.mode).toBe('customize'); + if (systemMessage?.mode !== 'customize') { + throw new Error('Expected customize-mode system message'); + } + const identity = systemMessage.sections?.identity; + expect(identity?.action).toBe('replace'); + if (identity?.action !== 'replace') { + throw new Error('Expected replace identity section override'); + } + expect(systemMessage.content).toBe(variableContext); + expect(identity.content).toBe(COPILOT_CLI_CHAT_PANEL_SYSTEM_MESSAGE); + expect(identity.content).not.toContain(variableContext); + }); + it('passes reasoningEffort to session manager when provided', async () => { const createSessionSpy = vi.spyOn(manager, 'createSession'); await service.createSession({ model: 'gpt-test', reasoningEffort: 'high', ...sessionOptionsFor(URI.file('/tmp')) }, CancellationToken.None); @@ -283,6 +320,25 @@ describe('CopilotCLISessionService', () => { }), true); }); + it('passes the VS Code Copilot CLI identity system message when getting an existing session', async () => { + const targetId = 'system-message-get'; + manager.sessions.set(targetId, new MockCliSdkSession(targetId, new Date())); + const getSessionSpy = vi.spyOn(manager, 'getSession'); + await service.getSession({ sessionId: targetId, model: 'gpt-test', ...sessionOptionsFor(URI.file('/tmp')) }, CancellationToken.None); + + expect(getSessionSpy).toHaveBeenCalledWith(expect.objectContaining({ + systemMessage: { + mode: 'customize', + sections: { + identity: { + action: 'replace', + content: COPILOT_CLI_CHAT_PANEL_SYSTEM_MESSAGE, + }, + }, + }, + }), true); + }); + it('does not set reasoningEffort when not provided', async () => { const targetId = 'no-reasoning-get'; manager.sessions.set(targetId, new MockCliSdkSession(targetId, new Date())); diff --git a/extensions/copilot/src/extension/chatSessions/copilotcli/node/test/copilotcliSession.spec.ts b/extensions/copilot/src/extension/chatSessions/copilotcli/node/test/copilotcliSession.spec.ts index c56453daa6dbe..c96a9fa21e977 100644 --- a/extensions/copilot/src/extension/chatSessions/copilotcli/node/test/copilotcliSession.spec.ts +++ b/extensions/copilot/src/extension/chatSessions/copilotcli/node/test/copilotcliSession.spec.ts @@ -265,7 +265,8 @@ describe('CopilotCLISession', () => { configurationService, new NoopOTelService(resolveOTelConfig({ env: {}, extensionVersion: '0.0.0', sessionId: 'test' })), new MockGitService(), - { _serviceBrand: undefined } as any + { _serviceBrand: undefined } as any, + { _serviceBrand: undefined, resetTurnCredits() { }, getCreditsForTurn() { return undefined; }, setLastCopilotUsage() { } } as any )); } diff --git a/extensions/copilot/src/extension/chatSessions/vscode-node/claudeChatSessionContentProvider.ts b/extensions/copilot/src/extension/chatSessions/vscode-node/claudeChatSessionContentProvider.ts index 26b2fbdea774c..77675c17a9d4c 100644 --- a/extensions/copilot/src/extension/chatSessions/vscode-node/claudeChatSessionContentProvider.ts +++ b/extensions/copilot/src/extension/chatSessions/vscode-node/claudeChatSessionContentProvider.ts @@ -6,6 +6,7 @@ import * as vscode from 'vscode'; import { ChatExtendedRequestHandler } from 'vscode'; import { PermissionMode } from '@anthropic-ai/claude-agent-sdk'; +import { IChatQuotaService } from '../../../platform/chat/common/chatQuotaService'; import { ConfigKey, IConfigurationService } from '../../../platform/configuration/common/configurationService'; import { INativeEnvService } from '../../../platform/env/common/envService'; import { getGitHubRepoInfoFromContext, IGitService } from '../../../platform/git/common/gitService'; @@ -82,7 +83,8 @@ export class ClaudeChatSessionContentProvider extends Disposable implements vsco @IClaudeSlashCommandService private readonly slashCommandService: IClaudeSlashCommandService, @IClaudeCodeModels private readonly claudeModels: IClaudeCodeModels, @IConfigurationService private readonly configurationService: IConfigurationService, - @IInstantiationService instantiationService: IInstantiationService + @IInstantiationService instantiationService: IInstantiationService, + @IChatQuotaService private readonly _chatQuotaService: IChatQuotaService, ) { super(); this._controller = this._register(instantiationService.createInstance(ClaudeChatSessionItemController)); @@ -154,16 +156,37 @@ export class ClaudeChatSessionContentProvider extends Disposable implements vsco stream.usage(usage); }); - const prompt = request.prompt; - await this._controller.updateItemStatus(effectiveSessionId, vscode.ChatSessionStatus.InProgress, prompt); - const result = await this.claudeAgentManager.handleRequest(effectiveSessionId, request, stream, token, isNewSession, yieldRequested); - await this._controller.updateItemStatus(effectiveSessionId, vscode.ChatSessionStatus.Completed, prompt); - - // Clear usage handler after request completes - this.sessionStateService.setUsageHandlerForSession(effectiveSessionId, undefined); + // Set turn ID for per-turn credit tracking via chatMLFetcher + this._chatQuotaService.resetTurnCredits(request.id); + this.sessionStateService.setTurnIdForSession(effectiveSessionId, request.id); + + let result: vscode.ChatResult; + let creditsUsed: number | undefined; + try { + const prompt = request.prompt; + await this._controller.updateItemStatus(effectiveSessionId, vscode.ChatSessionStatus.InProgress, prompt); + result = await this.claudeAgentManager.handleRequest(effectiveSessionId, request, stream, token, isNewSession, yieldRequested); + await this._controller.updateItemStatus(effectiveSessionId, vscode.ChatSessionStatus.Completed, prompt); + } finally { + // Clear usage handler and turn ID after request completes (even on error/cancellation) + this.sessionStateService.setUsageHandlerForSession(effectiveSessionId, undefined); + this.sessionStateService.setTurnIdForSession(effectiveSessionId, undefined); + creditsUsed = this._chatQuotaService.getCreditsForTurn(request.id); + this._chatQuotaService.resetTurnCredits(request.id); + } const modelDetailsEnabled = this.configurationService.getConfig(ConfigKey.Advanced.CLIModelDetailsEnabled); - const details = modelDetailsEnabled && endpoint ? formatClaudeModelDetails(endpoint) : undefined; + let details: string | undefined; + if (modelDetailsEnabled && endpoint) { + if (creditsUsed !== undefined) { + const formatted = creditsUsed % 1 === 0 ? creditsUsed.toString() : creditsUsed.toFixed(1); + details = creditsUsed === 1 + ? vscode.l10n.t('{0} \u2022 {1} credit', endpoint.name, formatted) + : vscode.l10n.t('{0} \u2022 {1} credits', endpoint.name, formatted); + } else { + details = formatClaudeModelDetails(endpoint); + } + } return { ...(details ? { details } : {}), ...(result.errorDetails ? { errorDetails: result.errorDetails } : {}), diff --git a/extensions/copilot/src/extension/chatSessions/vscode-node/copilotCLIChatSessions.ts b/extensions/copilot/src/extension/chatSessions/vscode-node/copilotCLIChatSessions.ts index 1dd943b94a0e3..30f7544e7a46e 100644 --- a/extensions/copilot/src/extension/chatSessions/vscode-node/copilotCLIChatSessions.ts +++ b/extensions/copilot/src/extension/chatSessions/vscode-node/copilotCLIChatSessions.ts @@ -7,6 +7,7 @@ import * as l10n from '@vscode/l10n'; import * as vscode from 'vscode'; import { ChatExtendedRequestHandler, ChatRequestTurn2, Uri } from 'vscode'; import { IRunCommandExecutionService } from '../../../platform/commands/common/runCommandExecutionService'; +import { IChatQuotaService } from '../../../platform/chat/common/chatQuotaService'; import { ConfigKey, IConfigurationService } from '../../../platform/configuration/common/configurationService'; import { INativeEnvService } from '../../../platform/env/common/envService'; import { IFileSystemService } from '../../../platform/filesystem/common/fileSystemService'; @@ -669,6 +670,7 @@ export class CopilotCLIChatSessionParticipant extends Disposable { @ISessionOptionGroupBuilder private readonly _optionGroupBuilder: ISessionOptionGroupBuilder, @ICopilotCLIModels private readonly copilotCLIModels: ICopilotCLIModels, @IChatSessionMetadataStore private readonly chatSessionMetadataStore: IChatSessionMetadataStore, + @IChatQuotaService private readonly _chatQuotaService: IChatQuotaService, ) { super(); @@ -873,7 +875,8 @@ export class CopilotCLIChatSessionParticipant extends Disposable { } const modelDetailsEnabled = this.configurationService.getConfig(ConfigKey.Advanced.CLIModelDetailsEnabled); - const { result, responseModelId } = await getCopilotCLIModelDetails(session.object, model, this.copilotCLIModels, this.logService, modelDetailsEnabled); + const creditsUsed = this._chatQuotaService.getCreditsForTurn(request.id); + const { result, responseModelId } = await getCopilotCLIModelDetails(session.object, model, this.copilotCLIModels, this.logService, modelDetailsEnabled, creditsUsed); persistCopilotCLIResponseModelId(sdkSessionId, request.id, responseModelId, this.chatSessionMetadataStore, this.logService); return result; @@ -883,6 +886,7 @@ export class CopilotCLIChatSessionParticipant extends Disposable { } throw ex; } finally { + this._chatQuotaService.resetTurnCredits(request.id); if (sdkSessionId && session) { await this.sessionRequestLifecycle.endRequest( sdkSessionId, request, diff --git a/extensions/copilot/src/extension/chatSessions/vscode-node/copilotCLIChatSessionsContribution.ts b/extensions/copilot/src/extension/chatSessions/vscode-node/copilotCLIChatSessionsContribution.ts index 83fcb509a05c4..bbc5c0a91ffca 100644 --- a/extensions/copilot/src/extension/chatSessions/vscode-node/copilotCLIChatSessionsContribution.ts +++ b/extensions/copilot/src/extension/chatSessions/vscode-node/copilotCLIChatSessionsContribution.ts @@ -8,6 +8,7 @@ import * as l10n from '@vscode/l10n'; import * as vscode from 'vscode'; import { ChatExtendedRequestHandler, ChatRequestTurn2, ChatSessionProviderOptionItem, Uri } from 'vscode'; import { IRunCommandExecutionService } from '../../../platform/commands/common/runCommandExecutionService'; +import { IChatQuotaService } from '../../../platform/chat/common/chatQuotaService'; import { ConfigKey, IConfigurationService } from '../../../platform/configuration/common/configurationService'; import { INativeEnvService } from '../../../platform/env/common/envService'; import { IVSCodeExtensionContext } from '../../../platform/extContext/common/extensionContext'; @@ -1267,6 +1268,7 @@ export class CopilotCLIChatSessionParticipant extends Disposable { @IChatSessionMetadataStore private readonly chatSessionMetadataStore: IChatSessionMetadataStore, @ICustomSessionTitleService private readonly customSessionTitleService: ICustomSessionTitleService, @IOctoKitService private readonly octoKitService: IOctoKitService, + @IChatQuotaService private readonly _chatQuotaService: IChatQuotaService, ) { super(); } @@ -1528,6 +1530,7 @@ export class CopilotCLIChatSessionParticipant extends Disposable { requestsForSession.add(request); this.pendingRequestBySession.set(session.object.sessionId, requestsForSession); + const isCopilotCLICommand = !!request.command && (copilotCLICommands as readonly string[]).includes(request.command); if (request.command === 'delegate') { await this.handleDelegationToCloud(session.object, request, context, stream, token); } else if (contextForRequest) { @@ -1535,11 +1538,14 @@ export class CopilotCLIChatSessionParticipant extends Disposable { const { prompt, attachments } = contextForRequest; await session.object.handleRequest(request, { prompt }, attachments, model, authInfo, token); await this.commitWorktreeChangesIfNeeded(request, session.object, token); + } else if (isCopilotCLICommand && (!isUntitled || request.command === 'remote')) { + const { prompt, attachments } = request.prompt + ? await this.promptResolver.resolvePrompt(request, undefined, [], session.object.workspace, [], token) + : { prompt: '', attachments: [] }; + await session.object.handleRequest(request, { command: request.command as CopilotCLICommand, prompt }, attachments, model, authInfo, token); + await this.commitWorktreeChangesIfNeeded(request, session.object, token); } else if (request.command && !request.prompt && !isUntitled) { - const input = (copilotCLICommands as readonly string[]).includes(request.command) - ? { command: request.command as CopilotCLICommand, prompt: '' } - : { prompt: `/${request.command}` }; - await session.object.handleRequest(request, input, [], model, authInfo, token); + await session.object.handleRequest(request, { prompt: `/${request.command}` }, [], model, authInfo, token); await this.commitWorktreeChangesIfNeeded(request, session.object, token); } else if (request.prompt && Object.values(builtinSlashSCommands).some(command => request.prompt.startsWith(command))) { // Sessions app built-in slash commands @@ -1557,11 +1563,12 @@ export class CopilotCLIChatSessionParticipant extends Disposable { // the chat UI reloads history from the SDK and discards the in-memory // result, which would drop our `details` field on the first request. const modelDetailsEnabled = this.configurationService.getConfig(ConfigKey.Advanced.CLIModelDetailsEnabled); - const { result, responseModelId } = await getCopilotCLIModelDetails(session.object, model, this.copilotCLIModels, this.logService, modelDetailsEnabled); + const creditsUsed = this._chatQuotaService.getCreditsForTurn(request.id); + const { result, responseModelId } = await getCopilotCLIModelDetails(session.object, model, this.copilotCLIModels, this.logService, modelDetailsEnabled, creditsUsed); persistCopilotCLIResponseModelId(sessionId, request.id, responseModelId, this.chatSessionMetadataStore, this.logService); - if (isUntitled && !token.isCancellationRequested) { + if (isUntitled && request.command !== 'remote' && !token.isCancellationRequested) { this.scheduleUntitledSessionSwap(id, request.id, request.prompt, session.object.sessionId, chatSessionContext.chatSessionItem); } @@ -1573,6 +1580,7 @@ export class CopilotCLIChatSessionParticipant extends Disposable { throw ex; } finally { + this._chatQuotaService.resetTurnCredits(request.id); if (sdkSessionId) { const requestsForSession = this.pendingRequestBySession.get(sdkSessionId); if (requestsForSession) { diff --git a/extensions/copilot/src/extension/chatSessions/vscode-node/copilotCLIModelDetails.ts b/extensions/copilot/src/extension/chatSessions/vscode-node/copilotCLIModelDetails.ts index 126801cc906e0..6c46d7fa9d52b 100644 --- a/extensions/copilot/src/extension/chatSessions/vscode-node/copilotCLIModelDetails.ts +++ b/extensions/copilot/src/extension/chatSessions/vscode-node/copilotCLIModelDetails.ts @@ -3,6 +3,7 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ +import * as l10n from '@vscode/l10n'; import type * as vscode from 'vscode'; import { ILogService } from '../../../platform/log/common/logService'; import { IChatSessionMetadataStore } from '../common/chatSessionMetadataStore'; @@ -17,7 +18,7 @@ export interface CopilotCLIModelDetails { /** * Builds the chat result details for the model that produced the latest CLI response. */ -export async function getCopilotCLIModelDetails(session: ICopilotCLISession, requestModel: { model: string; reasoningEffort?: string } | undefined, copilotCLIModels: ICopilotCLIModels, logService: ILogService, enabled: boolean): Promise { +export async function getCopilotCLIModelDetails(session: ICopilotCLISession, requestModel: { model: string; reasoningEffort?: string } | undefined, copilotCLIModels: ICopilotCLIModels, logService: ILogService, enabled: boolean, creditsUsed?: number): Promise { if (!enabled) { return { result: {}, responseModelId: undefined }; } @@ -35,8 +36,18 @@ export async function getCopilotCLIModelDetails(session: ICopilotCLISession, req .map(modelId => modelId ? models.find(model => matchesCopilotCLIModel(model, modelId)) : undefined) .find(modelInfo => !!modelInfo); + let details: string | undefined; + if (modelInfo && creditsUsed !== undefined) { + const formatted = creditsUsed % 1 === 0 ? creditsUsed.toString() : creditsUsed.toFixed(1); + details = creditsUsed === 1 + ? l10n.t('{0} \u2022 {1} credit', modelInfo.name, formatted) + : l10n.t('{0} \u2022 {1} credits', modelInfo.name, formatted); + } else if (modelInfo) { + details = formatModelDetails(modelInfo); + } + return { - result: modelInfo ? { details: formatModelDetails(modelInfo) } : {}, + result: details ? { details } : {}, responseModelId, }; } @@ -50,4 +61,4 @@ export function persistCopilotCLIResponseModelId(sessionId: string, requestId: s } chatSessionMetadataStore.updateRequestDetails(sessionId, [{ vscodeRequestId: requestId, responseModelId }]) .catch(ex => logService.error(ex, 'Failed to persist response model id')); -} \ No newline at end of file +} diff --git a/extensions/copilot/src/extension/chatSessions/vscode-node/test/copilotCLIChatSessionParticipant.spec.ts b/extensions/copilot/src/extension/chatSessions/vscode-node/test/copilotCLIChatSessionParticipant.spec.ts index fc70c937c88bd..9f419a2debbaf 100644 --- a/extensions/copilot/src/extension/chatSessions/vscode-node/test/copilotCLIChatSessionParticipant.spec.ts +++ b/extensions/copilot/src/extension/chatSessions/vscode-node/test/copilotCLIChatSessionParticipant.spec.ts @@ -406,7 +406,7 @@ describe('CopilotCLIChatSessionParticipant.handleRequest', () => { } }(); } - const session = new TestCopilotCLISession(workspaceInfo, agentName, sdkSession, [], logService, workspaceService, new MockChatSessionMetadataStore(), instantiationService, new NullRequestLogger(), new NullICopilotCLIImageSupport(), new FakeToolsService(), new FakeUserQuestionHandler(), accessor.get(IConfigurationService), new NoopOTelService(resolveOTelConfig({ env: {}, extensionVersion: '0.0.0', sessionId: 'test' })), new FakeGitService(), { _serviceBrand: undefined } as any); + const session = new TestCopilotCLISession(workspaceInfo, agentName, sdkSession, [], logService, workspaceService, new MockChatSessionMetadataStore(), instantiationService, new NullRequestLogger(), new NullICopilotCLIImageSupport(), new FakeToolsService(), new FakeUserQuestionHandler(), accessor.get(IConfigurationService), new NoopOTelService(resolveOTelConfig({ env: {}, extensionVersion: '0.0.0', sessionId: 'test' })), new FakeGitService(), { _serviceBrand: undefined } as any, { _serviceBrand: undefined, resetTurnCredits() { }, getCreditsForTurn() { return undefined; }, setLastCopilotUsage() { } } as any); cliSessions.push(session); return disposables.add(session); } @@ -461,6 +461,7 @@ describe('CopilotCLIChatSessionParticipant.handleRequest', () => { new MockChatSessionMetadataStore(), customSessionTitleService, new (mock())(), + { _serviceBrand: undefined, resetTurnCredits() { }, getCreditsForTurn() { return undefined; }, setLastCopilotUsage() { } } as any, ); }); @@ -641,6 +642,23 @@ describe('CopilotCLIChatSessionParticipant.handleRequest', () => { expect(promptResolver.resolvePrompt).not.toHaveBeenCalled(); }); + it('maps /remote with arguments to CLI command input for untitled sessions', async () => { + const request = new TestChatRequest('on'); + request.command = 'remote'; + const context = createChatContext('temp-remote', true, request); + const stream = new MockChatResponseStream(); + const token = disposables.add(new CancellationTokenSource()).token; + + await participant.createHandler()(request, context, stream, token); + await waitForScheduledUntitledSwap(); + + expect(cliSessions.length).toBe(1); + expect(cliSessions[0].requests).toHaveLength(1); + expect(cliSessions[0].requests[0].input).toEqual({ command: 'remote', prompt: 'on' }); + expect(promptResolver.resolvePrompt).toHaveBeenCalled(); + expect(itemProvider.swap).not.toHaveBeenCalled(); + }); + it.skip('returns early when yield is requested while the session is still running', async () => { const sessionId = 'existing-yield'; const sdkSession = new MockCliSdkSession(sessionId, new Date()); @@ -889,6 +907,7 @@ describe('CopilotCLIChatSessionParticipant.handleRequest', () => { new MockChatSessionMetadataStore(), customSessionTitleService, new (mock())(), + { _serviceBrand: undefined, resetTurnCredits() { }, getCreditsForTurn() { return undefined; }, setLastCopilotUsage() { } } as any, ); const sessionResource = vscode.Uri.from({ scheme: 'copilotcli', path: `/${sessionId}` }); const contentToken = disposables.add(new CancellationTokenSource()).token; @@ -2059,6 +2078,7 @@ describe('CopilotCLIChatSessionParticipant.handleRequest', () => { new MockChatSessionMetadataStore(), customSessionTitleService, new (mock())(), + { _serviceBrand: undefined, resetTurnCredits() { }, getCreditsForTurn() { return undefined; }, setLastCopilotUsage() { } } as any, ); } @@ -2192,6 +2212,7 @@ describe('CopilotCLIChatSessionParticipant.handleRequest', () => { new MockChatSessionMetadataStore(), customSessionTitleService, octoKitService, + { _serviceBrand: undefined, resetTurnCredits() { }, getCreditsForTurn() { return undefined; }, setLastCopilotUsage() { } } as any, ); }); diff --git a/extensions/copilot/src/extension/chatSessions/vscode-node/test/copilotCLIChatSessions.spec.ts b/extensions/copilot/src/extension/chatSessions/vscode-node/test/copilotCLIChatSessions.spec.ts index 46c80dc7087e5..b2323745e9d53 100644 --- a/extensions/copilot/src/extension/chatSessions/vscode-node/test/copilotCLIChatSessions.spec.ts +++ b/extensions/copilot/src/extension/chatSessions/vscode-node/test/copilotCLIChatSessions.spec.ts @@ -401,6 +401,7 @@ describe('CopilotCLIChatSessionParticipant', () => { new class extends mock() { declare readonly _serviceBrand: undefined; }(), + { _serviceBrand: undefined, resetTurnCredits() { }, getCreditsForTurn() { return undefined; }, setLastCopilotUsage() { } } as any, ); await participant.createHandler()( diff --git a/extensions/copilot/src/extension/chatSessions/vscode-node/test/copilotCLIModelDetails.spec.ts b/extensions/copilot/src/extension/chatSessions/vscode-node/test/copilotCLIModelDetails.spec.ts new file mode 100644 index 0000000000000..57a7c6f420e04 --- /dev/null +++ b/extensions/copilot/src/extension/chatSessions/vscode-node/test/copilotCLIModelDetails.spec.ts @@ -0,0 +1,81 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { describe, expect, it } from 'vitest'; +import { getCopilotCLIModelDetails } from '../copilotCLIModelDetails'; +import type { ICopilotCLISession } from '../../copilotcli/node/copilotcliSession'; +import type { ICopilotCLIModels, CopilotCLIModelInfo } from '../../copilotcli/node/copilotCli'; +import type { ILogService } from '../../../../platform/log/common/logService'; + +const testModel: CopilotCLIModelInfo = { + id: 'claude-sonnet-4', + name: 'Claude Sonnet 4', + multiplier: 2, + maxContextWindowTokens: 200000, + supportsVision: true, +}; + +function createMockSession(responseModelId?: string, selectedModelId?: string): ICopilotCLISession { + return { + getLastResponseModelId: () => responseModelId, + getSelectedModelId: async () => selectedModelId, + } as unknown as ICopilotCLISession; +} + +function createMockModels(models: CopilotCLIModelInfo[]): ICopilotCLIModels { + return { + _serviceBrand: undefined, + getModels: async () => models, + } as unknown as ICopilotCLIModels; +} + +const nullLog = { error() { } } as unknown as ILogService; + +describe('getCopilotCLIModelDetails', () => { + it('returns credits display for integer credits', async () => { + const session = createMockSession('claude-sonnet-4'); + const models = createMockModels([testModel]); + + const { result } = await getCopilotCLIModelDetails(session, undefined, models, nullLog, true, 5); + + expect(result.details).toBe('Claude Sonnet 4 \u2022 5 credits'); + }); + + it('returns singular credit label for exactly 1 credit', async () => { + const session = createMockSession('claude-sonnet-4'); + const models = createMockModels([testModel]); + + const { result } = await getCopilotCLIModelDetails(session, undefined, models, nullLog, true, 1); + + expect(result.details).toBe('Claude Sonnet 4 \u2022 1 credit'); + }); + + it('returns formatted decimal for fractional credits', async () => { + const session = createMockSession('claude-sonnet-4'); + const models = createMockModels([testModel]); + + const { result } = await getCopilotCLIModelDetails(session, undefined, models, nullLog, true, 1.5); + + expect(result.details).toBe('Claude Sonnet 4 \u2022 1.5 credits'); + }); + + it('falls back to multiplier format when credits are undefined', async () => { + const session = createMockSession('claude-sonnet-4'); + const models = createMockModels([testModel]); + + const { result } = await getCopilotCLIModelDetails(session, undefined, models, nullLog, true); + + expect(result.details).toBe('Claude Sonnet 4 \u2022 2x'); + }); + + it('returns empty result when disabled', async () => { + const session = createMockSession('claude-sonnet-4'); + const models = createMockModels([testModel]); + + const { result } = await getCopilotCLIModelDetails(session, undefined, models, nullLog, false, 5); + + expect(result).toEqual({}); + }); +}); diff --git a/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts b/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts index 80637c06618b9..84ceaca7b838e 100644 --- a/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts +++ b/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts @@ -429,6 +429,9 @@ export class ChatMLFetcherImpl extends AbstractChatMLFetcher { ...(result.usage.completion_tokens_details?.reasoning_tokens ? { [GenAiAttr.USAGE_REASONING_TOKENS]: result.usage.completion_tokens_details.reasoning_tokens } : {}), + ...(typeof result.usage.copilot_usage?.total_nano_aiu === 'number' + ? { [CopilotChatAttr.COPILOT_USAGE_NANO_AIU]: result.usage.copilot_usage.total_nano_aiu } + : {}), }); } // Always capture response content for the debug panel diff --git a/extensions/copilot/src/extension/trajectory/vscode-node/otelSpanToChatDebugEvent.ts b/extensions/copilot/src/extension/trajectory/vscode-node/otelSpanToChatDebugEvent.ts index 3c0ae7fc90451..9a8f1f98c32ea 100644 --- a/extensions/copilot/src/extension/trajectory/vscode-node/otelSpanToChatDebugEvent.ts +++ b/extensions/copilot/src/extension/trajectory/vscode-node/otelSpanToChatDebugEvent.ts @@ -360,6 +360,7 @@ function spanToModelTurnEvent(span: ICompletedSpanData): vscode.ChatDebugModelTu evt.requestName = asString(span.attributes[CopilotChatAttr.DEBUG_NAME]) ?? asString(span.attributes[GenAiAttr.AGENT_NAME]); evt.status = spanStatusToString(span.status.code as SpanStatusCode); + evt.copilotUsageNanoAiu = asNumber(span.attributes[CopilotChatAttr.COPILOT_USAGE_NANO_AIU]); return evt; } @@ -688,6 +689,7 @@ function entryToModelTurnEvent(entry: IDebugLogEntry): vscode.ChatDebugModelTurn evt.maxOutputTokens = entry.attrs.maxTokens as number | undefined; evt.requestName = (entry.attrs.debugName as string | undefined) ?? entry.name; evt.status = entry.status === 'error' ? 'error' : 'success'; + evt.copilotUsageNanoAiu = entry.attrs.copilotUsageNanoAiu as number | undefined; return evt; } diff --git a/extensions/copilot/src/platform/chat/common/chatQuotaServiceImpl.ts b/extensions/copilot/src/platform/chat/common/chatQuotaServiceImpl.ts index e38d9954225c0..e52f6bcea34bd 100644 --- a/extensions/copilot/src/platform/chat/common/chatQuotaServiceImpl.ts +++ b/extensions/copilot/src/platform/chat/common/chatQuotaServiceImpl.ts @@ -151,13 +151,16 @@ export class ChatQuotaService extends Disposable implements IChatQuotaService { if (!quotaInfo || !quotaInfo.quota_snapshots || !quotaInfo.quota_reset_date) { return; } + const snapshot = this._authService.copilotToken?.isFreeUser + ? quotaInfo.quota_snapshots.chat + : quotaInfo.quota_snapshots.premium_interactions; this._quotaInfo = { - unlimited: quotaInfo.quota_snapshots.premium_interactions.unlimited, - additionalUsageEnabled: quotaInfo.quota_snapshots.premium_interactions.overage_permitted, - additionalUsageUsed: quotaInfo.quota_snapshots.premium_interactions.overage_count, - quota: quotaInfo.quota_snapshots.premium_interactions.entitlement, + unlimited: snapshot.unlimited, + additionalUsageEnabled: snapshot.overage_permitted, + additionalUsageUsed: snapshot.overage_count, + quota: snapshot.entitlement, resetDate: new Date(quotaInfo.quota_reset_date), - percentRemaining: quotaInfo.quota_snapshots.premium_interactions.percent_remaining, + percentRemaining: snapshot.percent_remaining, }; this._onDidChange.fire(); } diff --git a/extensions/copilot/src/platform/chat/test/common/chatQuotaServiceImpl.spec.ts b/extensions/copilot/src/platform/chat/test/common/chatQuotaServiceImpl.spec.ts index ca06e0d01fba0..bcf3488bb875a 100644 --- a/extensions/copilot/src/platform/chat/test/common/chatQuotaServiceImpl.spec.ts +++ b/extensions/copilot/src/platform/chat/test/common/chatQuotaServiceImpl.spec.ts @@ -16,6 +16,33 @@ function createMockAuthService(): IAuthenticationService { } as unknown as IAuthenticationService; } +function createMockAuthServiceWithEmitter(opts?: { isFreeUser?: boolean }) { + const emitter = new Emitter(); + const authService = { + _serviceBrand: undefined, + copilotToken: undefined as { isFreeUser: boolean; quotaInfo: ReturnType } | undefined, + onDidAuthenticationChange: emitter.event, + } as unknown as IAuthenticationService; + return { + authService, emitter, setToken: (quotaInfo: ReturnType) => { + (authService as any).copilotToken = { isFreeUser: opts?.isFreeUser ?? false, quotaInfo }; + } + }; +} + +function makeQuotaInfo(overrides: { chat?: Partial; premium_interactions?: Partial } = {}, resetDate = '2026-06-01T00:00:00Z') { + return { + quota_reset_date: resetDate, + quota_snapshots: { + chat: { quota_id: 'chat', entitlement: 100, remaining: 50, unlimited: false, overage_count: 0, overage_permitted: false, percent_remaining: 50, ...overrides.chat }, + completions: { quota_id: 'completions', entitlement: 100, remaining: 100, unlimited: false, overage_count: 0, overage_permitted: false, percent_remaining: 100 }, + premium_interactions: { quota_id: 'premium', entitlement: 500, remaining: 400, unlimited: false, overage_count: 5, overage_permitted: true, percent_remaining: 80, ...overrides.premium_interactions }, + }, + }; +} + +type SnapshotData = { quota_id: string; entitlement: number; remaining: number; unlimited: boolean; overage_count: number; overage_permitted: boolean; percent_remaining: number }; + describe('ChatQuotaService', () => { function create() { return new ChatQuotaService(createMockAuthService()); @@ -424,4 +451,64 @@ describe('ChatQuotaService', () => { expect(getTotalCredits(svc, parentReqId, parentTurnId)).toBe(57); }); }); + + describe('processUserInfoQuotaSnapshot via auth change', () => { + test('free user reads from chat snapshot', () => { + const { authService, emitter, setToken } = createMockAuthServiceWithEmitter({ isFreeUser: true }); + const svc = new ChatQuotaService(authService); + + setToken(makeQuotaInfo({ + chat: { percent_remaining: 30, overage_permitted: false, overage_count: 0, entitlement: 100 }, + premium_interactions: { percent_remaining: 80, overage_permitted: true, overage_count: 5, entitlement: 500 }, + })); + emitter.fire(); + + const quota = svc.quotaInfo; + expect(quota).toBeDefined(); + expect(quota!.percentRemaining).toBe(30); + expect(quota!.additionalUsageEnabled).toBe(false); + expect(quota!.additionalUsageUsed).toBe(0); + expect(quota!.quota).toBe(100); + }); + + test('paid user reads from premium_interactions snapshot', () => { + const { authService, emitter, setToken } = createMockAuthServiceWithEmitter({ isFreeUser: false }); + const svc = new ChatQuotaService(authService); + + setToken(makeQuotaInfo({ + chat: { percent_remaining: 30, overage_permitted: false, overage_count: 0, entitlement: 100 }, + premium_interactions: { percent_remaining: 80, overage_permitted: true, overage_count: 5, entitlement: 500 }, + })); + emitter.fire(); + + const quota = svc.quotaInfo; + expect(quota).toBeDefined(); + expect(quota!.percentRemaining).toBe(80); + expect(quota!.additionalUsageEnabled).toBe(true); + expect(quota!.additionalUsageUsed).toBe(5); + expect(quota!.quota).toBe(500); + }); + + test('fires onDidChange when quota is updated', () => { + const { authService, emitter, setToken } = createMockAuthServiceWithEmitter({ isFreeUser: true }); + const svc = new ChatQuotaService(authService); + let changeCount = 0; + svc.onDidChange(() => changeCount++); + + setToken(makeQuotaInfo()); + emitter.fire(); + + expect(changeCount).toBe(1); + }); + + test('no-ops when copilotToken has no quotaInfo', () => { + const { authService, emitter } = createMockAuthServiceWithEmitter({ isFreeUser: true }); + const svc = new ChatQuotaService(authService); + + (authService as any).copilotToken = { isFreeUser: true, quotaInfo: undefined }; + emitter.fire(); + + expect(svc.quotaInfo).toBeUndefined(); + }); + }); }); diff --git a/extensions/copilot/src/platform/otel/common/genAiAttributes.ts b/extensions/copilot/src/platform/otel/common/genAiAttributes.ts index 19cc34a67ac38..1972c799da110 100644 --- a/extensions/copilot/src/platform/otel/common/genAiAttributes.ts +++ b/extensions/copilot/src/platform/otel/common/genAiAttributes.ts @@ -164,6 +164,8 @@ export const CopilotChatAttr = { MODE_NAME: 'copilot_chat.mode_name', /** Aggregated session cost in USD (Claude agent) */ TOTAL_COST_USD: 'copilot_chat.total_cost_usd', + /** Per-request cost from copilot_usage.total_nano_aiu */ + COPILOT_USAGE_NANO_AIU: 'copilot_chat.copilot_usage_nano_aiu', } as const; export type EditSource = 'inline_chat' | 'chat_editing' | 'chat_editing_hunk' | 'apply_patch' | 'replace_string' | 'code_mapper'; diff --git a/extensions/copilot/src/platform/otel/common/test/chatMLFetcherSpanLifecycle.spec.ts b/extensions/copilot/src/platform/otel/common/test/chatMLFetcherSpanLifecycle.spec.ts index e21d6d84a2fd9..283c24606ad37 100644 --- a/extensions/copilot/src/platform/otel/common/test/chatMLFetcherSpanLifecycle.spec.ts +++ b/extensions/copilot/src/platform/otel/common/test/chatMLFetcherSpanLifecycle.spec.ts @@ -48,12 +48,14 @@ describe('chatMLFetcher Span Lifecycle', () => { [GenAiAttr.RESPONSE_ID]: 'chatcmpl-xyz', [GenAiAttr.RESPONSE_FINISH_REASONS]: ['stop'], [CopilotChatAttr.TIME_TO_FIRST_TOKEN]: 450, + [CopilotChatAttr.COPILOT_USAGE_NANO_AIU]: 3_500_000_000, }); span.setStatus(SpanStatusCode.OK); span.end(); expect(s.attributes[GenAiAttr.USAGE_INPUT_TOKENS]).toBe(1500); expect(s.attributes[GenAiAttr.RESPONSE_MODEL]).toBe('gpt-4o-2024-08-06'); + expect(s.attributes[CopilotChatAttr.COPILOT_USAGE_NANO_AIU]).toBe(3_500_000_000); expect(s.statusCode).toBe(SpanStatusCode.OK); expect(s.ended).toBe(true); }); diff --git a/src/vs/editor/contrib/snippet/browser/snippet.md b/src/vs/editor/contrib/snippet/browser/snippet.md index 4068bf80675b1..dc8dd61cf211d 100644 --- a/src/vs/editor/contrib/snippet/browser/snippet.md +++ b/src/vs/editor/contrib/snippet/browser/snippet.md @@ -47,7 +47,11 @@ For inserting the current date and time: * `CURRENT_HOUR` The current hour in 24-hour clock format * `CURRENT_MINUTE` The current minute * `CURRENT_SECOND` The current second +* `CURRENT_MILLISECOND` The current millisecond as three digits * `CURRENT_SECONDS_UNIX` The number of seconds since the Unix epoch +* `CURRENT_MILLISECONDS_UNIX` The number of milliseconds since the Unix epoch +* `CURRENT_TIMEZONE_OFFSET` The current UTC offset in the format +HH:MM or -HH:MM +* `CURRENT_TIMEZONE_NAME` The current IANA time zone name (example 'Europe/Berlin') For inserting random values: diff --git a/src/vs/editor/contrib/snippet/browser/snippetParser.ts b/src/vs/editor/contrib/snippet/browser/snippetParser.ts index 411b5554f88d6..8a76e164db6b0 100644 --- a/src/vs/editor/contrib/snippet/browser/snippetParser.ts +++ b/src/vs/editor/contrib/snippet/browser/snippetParser.ts @@ -353,7 +353,7 @@ export class Transform extends Marker { } toTextmateString(): string { - return `/${this.regexp.source}/${this.children.map(c => c.toTextmateString())}/${(this.regexp.ignoreCase ? 'i' : '') + (this.regexp.global ? 'g' : '')}`; + return `/${this.regexp.source}/${this.children.map(c => c.toTextmateString()).join('')}/${(this.regexp.ignoreCase ? 'i' : '') + (this.regexp.global ? 'g' : '')}`; } clone(): Transform { diff --git a/src/vs/editor/contrib/snippet/browser/snippetVariables.ts b/src/vs/editor/contrib/snippet/browser/snippetVariables.ts index e9b9c49a09fc1..bd116bdd748ac 100644 --- a/src/vs/editor/contrib/snippet/browser/snippetVariables.ts +++ b/src/vs/editor/contrib/snippet/browser/snippetVariables.ts @@ -25,12 +25,15 @@ export const KnownSnippetVariableNames = Object.freeze<{ [key: string]: true }>( 'CURRENT_HOUR': true, 'CURRENT_MINUTE': true, 'CURRENT_SECOND': true, + 'CURRENT_MILLISECOND': true, 'CURRENT_DAY_NAME': true, 'CURRENT_DAY_NAME_SHORT': true, 'CURRENT_MONTH_NAME': true, 'CURRENT_MONTH_NAME_SHORT': true, 'CURRENT_SECONDS_UNIX': true, + 'CURRENT_MILLISECONDS_UNIX': true, 'CURRENT_TIMEZONE_OFFSET': true, + 'CURRENT_TIMEZONE_NAME': true, 'SELECTION': true, 'CLIPBOARD': true, 'TM_SELECTED_TEXT': true, @@ -272,42 +275,51 @@ export class TimeBasedVariableResolver implements VariableResolver { private static readonly monthNamesShort = [nls.localize('JanuaryShort', "Jan"), nls.localize('FebruaryShort', "Feb"), nls.localize('MarchShort', "Mar"), nls.localize('AprilShort', "Apr"), nls.localize('MayShort', "May"), nls.localize('JuneShort', "Jun"), nls.localize('JulyShort', "Jul"), nls.localize('AugustShort', "Aug"), nls.localize('SeptemberShort', "Sep"), nls.localize('OctoberShort', "Oct"), nls.localize('NovemberShort', "Nov"), nls.localize('DecemberShort', "Dec")]; private readonly _date = new Date(); + private _timezoneName: string | undefined; resolve(variable: Variable): string | undefined { const { name } = variable; - if (name === 'CURRENT_YEAR') { - return String(this._date.getFullYear()); - } else if (name === 'CURRENT_YEAR_SHORT') { - return String(this._date.getFullYear()).slice(-2); - } else if (name === 'CURRENT_MONTH') { - return String(this._date.getMonth().valueOf() + 1).padStart(2, '0'); - } else if (name === 'CURRENT_DATE') { - return String(this._date.getDate().valueOf()).padStart(2, '0'); - } else if (name === 'CURRENT_HOUR') { - return String(this._date.getHours().valueOf()).padStart(2, '0'); - } else if (name === 'CURRENT_MINUTE') { - return String(this._date.getMinutes().valueOf()).padStart(2, '0'); - } else if (name === 'CURRENT_SECOND') { - return String(this._date.getSeconds().valueOf()).padStart(2, '0'); - } else if (name === 'CURRENT_DAY_NAME') { - return TimeBasedVariableResolver.dayNames[this._date.getDay()]; - } else if (name === 'CURRENT_DAY_NAME_SHORT') { - return TimeBasedVariableResolver.dayNamesShort[this._date.getDay()]; - } else if (name === 'CURRENT_MONTH_NAME') { - return TimeBasedVariableResolver.monthNames[this._date.getMonth()]; - } else if (name === 'CURRENT_MONTH_NAME_SHORT') { - return TimeBasedVariableResolver.monthNamesShort[this._date.getMonth()]; - } else if (name === 'CURRENT_SECONDS_UNIX') { - return String(Math.floor(this._date.getTime() / 1000)); - } else if (name === 'CURRENT_TIMEZONE_OFFSET') { - const rawTimeOffset = this._date.getTimezoneOffset(); - const sign = rawTimeOffset > 0 ? '-' : '+'; - const hours = Math.trunc(Math.abs(rawTimeOffset / 60)); - const hoursString = (hours < 10 ? '0' + hours : hours); - const minutes = Math.abs(rawTimeOffset) - hours * 60; - const minutesString = (minutes < 10 ? '0' + minutes : minutes); - return sign + hoursString + ':' + minutesString; + switch (name) { + case 'CURRENT_YEAR': + return String(this._date.getFullYear()); + case 'CURRENT_YEAR_SHORT': + return String(this._date.getFullYear()).slice(-2); + case 'CURRENT_MONTH': + return String(this._date.getMonth().valueOf() + 1).padStart(2, '0'); + case 'CURRENT_DATE': + return String(this._date.getDate().valueOf()).padStart(2, '0'); + case 'CURRENT_HOUR': + return String(this._date.getHours().valueOf()).padStart(2, '0'); + case 'CURRENT_MINUTE': + return String(this._date.getMinutes().valueOf()).padStart(2, '0'); + case 'CURRENT_SECOND': + return String(this._date.getSeconds().valueOf()).padStart(2, '0'); + case 'CURRENT_MILLISECOND': + return String(this._date.getMilliseconds().valueOf()).padStart(3, '0'); + case 'CURRENT_DAY_NAME': + return TimeBasedVariableResolver.dayNames[this._date.getDay()]; + case 'CURRENT_DAY_NAME_SHORT': + return TimeBasedVariableResolver.dayNamesShort[this._date.getDay()]; + case 'CURRENT_MONTH_NAME': + return TimeBasedVariableResolver.monthNames[this._date.getMonth()]; + case 'CURRENT_MONTH_NAME_SHORT': + return TimeBasedVariableResolver.monthNamesShort[this._date.getMonth()]; + case 'CURRENT_SECONDS_UNIX': + return String(Math.floor(this._date.getTime() / 1000)); + case 'CURRENT_MILLISECONDS_UNIX': + return String(this._date.getTime()); + case 'CURRENT_TIMEZONE_OFFSET': { + const rawTimeOffset = this._date.getTimezoneOffset(); + const sign = rawTimeOffset > 0 ? '-' : '+'; + const hours = Math.trunc(Math.abs(rawTimeOffset / 60)); + const hoursString = (hours < 10 ? '0' + hours : hours); + const minutes = Math.abs(rawTimeOffset) - hours * 60; + const minutesString = (minutes < 10 ? '0' + minutes : minutes); + return sign + hoursString + ':' + minutesString; + } + case 'CURRENT_TIMEZONE_NAME': + return this._timezoneName ??= Intl.DateTimeFormat().resolvedOptions().timeZone; } return undefined; diff --git a/src/vs/editor/contrib/snippet/test/browser/snippetParser.test.ts b/src/vs/editor/contrib/snippet/test/browser/snippetParser.test.ts index efb7decee2cf4..dcb2f994c485e 100644 --- a/src/vs/editor/contrib/snippet/test/browser/snippetParser.test.ts +++ b/src/vs/editor/contrib/snippet/test/browser/snippetParser.test.ts @@ -780,6 +780,18 @@ suite('SnippetParser', () => { assert.strictEqual(transform.toTextmateString(), '/foo/bar/ig'); }); + test('transform serialization joins children without comma', function () { + const transformWithFormatString = new Transform(); + transformWithFormatString.appendChild(new FormatString(1, 'upcase')); + transformWithFormatString.appendChild(new Text('_')); + transformWithFormatString.regexp = new RegExp('foo', 'g'); + const serialized = transformWithFormatString.toTextmateString(); + assert.strictEqual(serialized, '/foo/${1:/upcase}_/g'); + + const snippet = new SnippetParser().parse(`\${TM_FILENAME${serialized}}`); + assert.strictEqual(snippet.toTextmateString(), `\${TM_FILENAME${serialized}}`); + }); + test('Snippet parser freeze #53144', function () { const snippet = new SnippetParser().parse('${1/(void$)|(.+)/${1:?-\treturn nil;}/}'); assertMarker(snippet, Placeholder); diff --git a/src/vs/editor/contrib/snippet/test/browser/snippetVariables.test.ts b/src/vs/editor/contrib/snippet/test/browser/snippetVariables.test.ts index 89c480c64c4e1..9a07e1935b83a 100644 --- a/src/vs/editor/contrib/snippet/test/browser/snippetVariables.test.ts +++ b/src/vs/editor/contrib/snippet/test/browser/snippetVariables.test.ts @@ -309,12 +309,38 @@ suite('Snippet Variables Resolver', function () { assertVariableResolve3(resolver, 'CURRENT_HOUR'); assertVariableResolve3(resolver, 'CURRENT_MINUTE'); assertVariableResolve3(resolver, 'CURRENT_SECOND'); + assertVariableResolve3(resolver, 'CURRENT_MILLISECOND'); assertVariableResolve3(resolver, 'CURRENT_DAY_NAME'); assertVariableResolve3(resolver, 'CURRENT_DAY_NAME_SHORT'); assertVariableResolve3(resolver, 'CURRENT_MONTH_NAME'); assertVariableResolve3(resolver, 'CURRENT_MONTH_NAME_SHORT'); assertVariableResolve3(resolver, 'CURRENT_SECONDS_UNIX'); + assertVariableResolve3(resolver, 'CURRENT_MILLISECONDS_UNIX'); assertVariableResolve3(resolver, 'CURRENT_TIMEZONE_OFFSET'); + assertVariableResolve3(resolver, 'CURRENT_TIMEZONE_NAME'); + }); + + test('Time-based snippet variables have deterministic millisecond and unix values', function () { + const now = Date.UTC(2024, 3, 15, 12, 34, 56, 7); + const clock = sinon.useFakeTimers({ now }); + try { + const resolver = new TimeBasedVariableResolver; + const expectedDate = new Date(now); + const pad = (value: number, length: number) => String(value).padStart(length, '0'); + + assertVariableResolve(resolver, 'CURRENT_YEAR', String(expectedDate.getFullYear())); + assertVariableResolve(resolver, 'CURRENT_YEAR_SHORT', String(expectedDate.getFullYear()).slice(-2)); + assertVariableResolve(resolver, 'CURRENT_MONTH', pad(expectedDate.getMonth() + 1, 2)); + assertVariableResolve(resolver, 'CURRENT_DATE', pad(expectedDate.getDate(), 2)); + assertVariableResolve(resolver, 'CURRENT_HOUR', pad(expectedDate.getHours(), 2)); + assertVariableResolve(resolver, 'CURRENT_MINUTE', pad(expectedDate.getMinutes(), 2)); + assertVariableResolve(resolver, 'CURRENT_SECOND', pad(expectedDate.getSeconds(), 2)); + assertVariableResolve(resolver, 'CURRENT_MILLISECOND', pad(expectedDate.getMilliseconds(), 3)); + assertVariableResolve(resolver, 'CURRENT_SECONDS_UNIX', String(Math.floor(now / 1000))); + assertVariableResolve(resolver, 'CURRENT_MILLISECONDS_UNIX', String(now)); + } finally { + clock.restore(); + } }); test('Time-based snippet variables resolve to the same values even as time progresses', async function () { @@ -326,12 +352,15 @@ suite('Snippet Variables Resolver', function () { $CURRENT_HOUR $CURRENT_MINUTE $CURRENT_SECOND + $CURRENT_MILLISECOND $CURRENT_DAY_NAME $CURRENT_DAY_NAME_SHORT $CURRENT_MONTH_NAME $CURRENT_MONTH_NAME_SHORT $CURRENT_SECONDS_UNIX + $CURRENT_MILLISECONDS_UNIX $CURRENT_TIMEZONE_OFFSET + $CURRENT_TIMEZONE_NAME `; const clock = sinon.useFakeTimers(); diff --git a/src/vs/platform/agentHost/common/claudeModelConfig.ts b/src/vs/platform/agentHost/common/claudeModelConfig.ts new file mode 100644 index 0000000000000..443005560ff73 --- /dev/null +++ b/src/vs/platform/agentHost/common/claudeModelConfig.ts @@ -0,0 +1,108 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { localize } from '../../../nls.js'; +import type { ConfigSchema, ModelSelection } from './state/protocol/state.js'; + +/** + * Sub-key in `ModelSelection.config` carrying the user's reasoning-effort + * pick from the model picker. Mirror of CopilotAgent's + * `ThinkingLevelConfigKey` (copilotAgent.ts:83) so a single picker contract + * spans both providers — the picker writes `model.config.thinkingLevel`, + * and each provider narrows that string at materialize. + */ +export const CLAUDE_THINKING_LEVEL_KEY = 'thinkingLevel'; + +/** + * Reasoning-effort values accepted by the Claude SDK's `Options.effort` + * (sdk.d.ts:443 + sdk.d.ts:1214). Hand-rolled here — not imported from the + * SDK — to keep `common/` SDK-free; structurally identical to the SDK's + * `EffortLevel` so it assigns into `Options.effort` without a cast. + * + * NOTE: the live hot-swap path `applyFlagSettings({ effortLevel })` + * (sdk.d.ts:4292) only accepts a 4-value subset that omits `'max'`; that + * clamp lives at the hot-swap seam (Phase 9), not here. + */ +export type ClaudeEffortLevel = 'low' | 'medium' | 'high' | 'xhigh' | 'max'; + +/** + * Pull `thinkingLevel` out of `ModelSelection.config` and narrow it to + * {@link ClaudeEffortLevel}. Returns `undefined` when the model selection + * is absent or carries an unrecognized value (the SDK then falls through + * to its own default). Mirror of CopilotAgent's `_getReasoningEffort` + * (copilotAgent.ts:487). + */ +export function resolveClaudeEffort(model: ModelSelection | undefined): ClaudeEffortLevel | undefined { + const raw = model?.config?.[CLAUDE_THINKING_LEVEL_KEY]; + switch (raw) { + case 'low': + case 'medium': + case 'high': + case 'xhigh': + case 'max': + return raw; + default: + return undefined; + } +} + +/** Canonical ordered list of {@link ClaudeEffortLevel} values; used for sort + guard. */ +const CLAUDE_EFFORT_LEVELS: readonly ClaudeEffortLevel[] = ['low', 'medium', 'high', 'xhigh', 'max']; + +/** Type guard narrowing an arbitrary string to {@link ClaudeEffortLevel}. */ +export function isClaudeEffortLevel(value: string): value is ClaudeEffortLevel { + return (CLAUDE_EFFORT_LEVELS as readonly string[]).includes(value); +} + +function labelForClaudeEffort(level: ClaudeEffortLevel): string { + switch (level) { + case 'low': return localize('claude.modelThinkingLevel.low', "Low"); + case 'medium': return localize('claude.modelThinkingLevel.medium', "Medium"); + case 'high': return localize('claude.modelThinkingLevel.high', "High"); + case 'xhigh': return localize('claude.modelThinkingLevel.xhigh', "Extra High"); + case 'max': return localize('claude.modelThinkingLevel.max', "Max"); + } +} + +/** + * Synthesize the per-model `configSchema` advertising the `thinkingLevel` + * picker entry on Claude models that support adaptive thinking. Mirror of + * CopilotAgent's `_createThinkingLevelConfigSchema` (copilotAgent.ts:457). + * + * The `enum` is sourced from each model's own `reasoning_effort` list (a + * runtime field on CAPI's `/models` payload — different Claude models + * support different effort subsets, e.g. `['low','medium','high']`, + * `['high']`, or `[]`). Callers narrow that list to {@link ClaudeEffortLevel} + * via {@link isClaudeEffortLevel} before passing it in. + * + * The `default` is `'high'` when the model supports it, otherwise omitted + * — Claude's own server-side default for adaptive thinking is `'high'`, + * and the extension mirrors the same fallback rule at + * `extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeModels.ts:230`. + * (Anthropic's `CCAModel` rows don't carry a server-supplied default + * field; tracked at microsoft/vscode-capi#85.) + * + * Returns `undefined` for an empty list — the picker then renders no + * thinkingLevel control for that model. + */ +export function createClaudeThinkingLevelSchema(supportedEfforts: readonly ClaudeEffortLevel[]): ConfigSchema | undefined { + if (supportedEfforts.length === 0) { + return undefined; + } + const defaultEffort: ClaudeEffortLevel | undefined = supportedEfforts.includes('high') ? 'high' : undefined; + return { + type: 'object', + properties: { + [CLAUDE_THINKING_LEVEL_KEY]: { + type: 'string', + title: localize('claude.modelThinkingLevel.title', "Thinking Level"), + description: localize('claude.modelThinkingLevel.description', "Controls how much reasoning effort Claude uses."), + enum: [...supportedEfforts], + enumLabels: supportedEfforts.map(labelForClaudeEffort), + ...(defaultEffort !== undefined ? { default: defaultEffort } : {}), + }, + }, + }; +} diff --git a/src/vs/platform/agentHost/common/claudeSessionConfigKeys.ts b/src/vs/platform/agentHost/common/claudeSessionConfigKeys.ts index 9319fe6a96f9e..cb467b825538a 100644 --- a/src/vs/platform/agentHost/common/claudeSessionConfigKeys.ts +++ b/src/vs/platform/agentHost/common/claudeSessionConfigKeys.ts @@ -10,7 +10,7 @@ * Claude collapses the platform's two-axis approval model * (`autoApprove` × `mode`) onto a single `permissionMode` axis matching * the Claude SDK's native `PermissionMode` (see - * `@anthropic-ai/claude-agent-sdk` typings). The four values mirror + * `@anthropic-ai/claude-agent-sdk` typings). The six values mirror * the SDK's enum exactly so that the value flowing back into * `query({ permissionMode })` requires no translation layer. * @@ -27,4 +27,4 @@ export const enum ClaudeSessionConfigKey { * Permission-mode values advertised in the Claude session-config schema. * Mirror of the SDK's `PermissionMode` union for protocol-stable strings. */ -export type ClaudePermissionMode = 'default' | 'acceptEdits' | 'bypassPermissions' | 'plan'; +export type ClaudePermissionMode = 'default' | 'acceptEdits' | 'bypassPermissions' | 'plan' | 'dontAsk' | 'auto'; diff --git a/src/vs/platform/agentHost/common/sessionConfigKeys.ts b/src/vs/platform/agentHost/common/sessionConfigKeys.ts index 33afd3210c59c..f241039f37772 100644 --- a/src/vs/platform/agentHost/common/sessionConfigKeys.ts +++ b/src/vs/platform/agentHost/common/sessionConfigKeys.ts @@ -24,8 +24,6 @@ export const enum SessionConfigKey { Isolation = 'isolation', /** `'branch'` — base branch to work from. */ Branch = 'branch', - /** `'branchNameHint'` — client-supplied hint used during worktree creation. */ - BranchNameHint = 'branchNameHint', /** `'mode'` — agent execution mode (interactive / plan). */ Mode = 'mode', } diff --git a/src/vs/platform/agentHost/node/claude/CONTEXT.md b/src/vs/platform/agentHost/node/claude/CONTEXT.md index a4e3e5b30fed2..d889d40d81973 100644 --- a/src/vs/platform/agentHost/node/claude/CONTEXT.md +++ b/src/vs/platform/agentHost/node/claude/CONTEXT.md @@ -470,3 +470,1886 @@ Captured here so they aren't lost. None of these block Phase 2. agent / dispatcher injection point on `ICopilotApiService` and a way to source the config from the renderer. Track separately when we pick this up. + +## IAgent ↔ Claude SDK mapping + +Descriptive catalogue of how the `IAgent` protocol surface +(`src/vs/platform/agentHost/common/agentService.ts`) lines up with the +`@anthropic-ai/claude-agent-sdk` types and the +`extensions/copilot/.../claudeCodeAgent.ts` reference implementation. + +**This section is mapping, not planning.** Each entry describes how a +surface fits together; it does not commit to an implementation order or +a phase. Roadmap and phase-plan updates flow from this catalogue in a +separate pass. + +Conventions: +- "Host" = the `ClaudeAgent` running in the agent host utility process. +- "SDK" = the `@anthropic-ai/claude-agent-sdk` API + its bundled CLI + subprocess. +- "Reference extension" = the production Copilot extension's Claude + support under `extensions/copilot/src/extension/chatSessions/claude/`. +- A "portrait" entry describes one IAgent surface, the SDK primitive(s) + it lines up with, the direction of data flow, invariants, asymmetries, + and any gaps where standardization is missing. + +### Glossary additions + +**`Turn.id` (locked invariant).** Opaque protocol-level identifier for +one user→assistant exchange. **Equal to the SDK uuid of the user +`SessionMessage` that started the turn.** Live: the host sets +`SDKUserMessage.uuid = effectiveTurnId` when yielding the prompt +([claudeAgent.ts:779](claudeAgent.ts#L779)) — the SDK has been +empirically confirmed to honor caller-supplied uuids. Replay: +`Turn.id = sessionMessage.uuid` directly. Single namespace; no +protocol↔SDK uuid mapping table required for fork, truncate, or +transcript reconstruction. Imported sessions from raw Claude Code +become first-class because their on-disk uuids are valid Turn.ids. + +**Tool-call attribution map.** Per-session +`Map` populated when an assistant message carries +a `tool_use` block (the active turnId is known from the in-flight +request) and drained when the matching synthetic-user `tool_result` +arrives. Cross-message because `tool_use` and `tool_result` always +live in different SDK messages. This map exists in the live mapper +(planned at `claudeMapSessionEvents.ts`) and must also hydrate from +disk during transcript reconstruction so `tool_result` events +delivered on session restoration map back to the announcing +`tool_use`'s `turnId`. + +**`SessionMessage.message: unknown`.** Typed as `unknown` in the SDK, +empirically a discriminated union by envelope `type`: `'user'` → +`MessageParam` (text or `tool_result` blocks), `'assistant'` → +`BetaMessage` (text / thinking / tool_use blocks), +`'system'` → `SDKSystemMessage`-family (discriminated by `subtype`). +Narrow at the seam with validators (`vUserMessageContent`, +`vAssistantMessageContent`); silently drop unvalidatable records to +match the JSONL parser semantics of the reference extension. + +### Startup-only vs runtime mutability + +The SDK splits configuration along a hard line. **Startup-only** +options live on `Options` and are baked into the subprocess at +`startup()` time — changing them requires a new `Query`. **Runtime** +operations live as methods on `Query` and apply to the live session. +A handful of concepts are **bijective** (settable startup *and* +mutable runtime via a setter). The reference extension's pattern for +non-bijective changes is the +"hot-swap-or-defer-or-restart" classification driven from the +in-flight request boundary in +[`claudeCodeAgent.ts`](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeAgent.ts): + +| Bucket | Examples | When applied | +|---|---|---| +| Hot-swap (cheap, between turns) | `setModel`, `setPermissionMode`, `applyFlagSettings({ effortLevel })` | Awaited just before the next `SDKUserMessage` is yielded | +| Defer-and-coalesce | `reloadPlugins` after `setCustomizationEnabled` | Set a `_pending*` flag while busy; apply at next yield boundary | +| Restart-required | Tool-set diff, settings file change | `_pendingRestart = true`, return from iterable, catch-block restarts session | + +Note: there is no `Query.setEffort` method on the SDK. Effort is +applied via `Query.applyFlagSettings({ effortLevel })` and the `'max'` +UI value is clamped to `'xhigh'` at the seam (see [`claudeCodeAgent.ts:196`](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeAgent.ts#L196)). + +There is deliberately **no mid-turn mutation path**. Every host write +either applies immediately when idle or queues for the next prompt +boundary. + +### M1 — `sendMessage(session, prompt, attachments?, turnId?)` + +| Direction | Host → SDK | +|---|---| +| SDK primitive | `Query.streamInput(AsyncIterable)` (after `WarmQuery.query()`) | +| Reference | [claudeCodeAgent.ts](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeAgent.ts) `_createPromptIterable` | + +Host wraps the protocol prompt + attachments into a single +`SDKUserMessage` with `uuid = effectiveTurnId` (see Turn.id glossary) +and pushes it onto a per-session prompt iterable. Sequencer-guarded +so concurrent `sendMessage` calls serialize by session. The yield +boundary is also where hot-swap config (`setModel`, +`setPermissionMode`, `applyFlagSettings({ effortLevel })`) is applied +and where any `_pending*` flags are drained. + +### M2 — `respondToPermissionRequest(requestId, approved)` and M3 — `respondToUserInputRequest(requestId, response, answers?)` + +These two `IAgent` methods are paired with **two protocol signal +types** that surface to the client via `onDidSessionProgress`. They +are the response side of two distinct flows that the host has to +multiplex from **three** SDK callback origins. + +**Outbound signals (host → client) on `onDidSessionProgress`:** + +| Signal action | Carries | Renders as | +|---|---|---| +| `SessionToolCallReady` (ToolCall enters `PendingConfirmation`) | tool name + parsed input + optional `edits` preview | approve / deny prompt on the running tool call | +| `SessionInputRequested` | `SessionInputRequest { id, message?, url?, questions[]: SessionInputQuestion[] }` — typed text/number/boolean/single-select/multi-select questions, or a URL to open | structured form / select / URL-auth panel | + +**Inbound responses (client → host) on `IAgent`:** + +| IAgent method | Used for | Resolves | +|---|---|---| +| `respondToPermissionRequest(requestId, approved: boolean)` | tool-permission gates | the deferred parked inside `Options.canUseTool` | +| `respondToUserInputRequest(requestId, response: SessionInputResponseKind, answers?)` | structured user input (form questions, URL accept/decline) | the deferred parked inside `Options.canUseTool` (interactive-tool subset) **or** `Options.onElicitation` | + +**Three SDK origins, two flows.** The host receives callbacks from +the SDK at three places; `claudeMessageDispatch` / the host's +permission gate route each to the appropriate IAgent flow. + +| SDK callback | When it fires | Routes to flow | Why | +|---|---|---|---| +| `Options.canUseTool(toolName, input, { suggestions })` for arbitrary tool names | Before any tool is executed | **Permission** (`SessionToolCallReady` → `respondToPermissionRequest`) | Standard tool-permission gate | +| `Options.canUseTool('AskUserQuestion' \| 'ExitPlanMode', input, ...)` | Built-in interactive Claude tools | **User input** (`SessionInputRequested` → `respondToUserInputRequest`) | These two tools' "input" is itself a user-facing question / plan; `INTERACTIVE_CLAUDE_TOOLS` is the closed-set discriminator | +| `Options.onElicitation(request, { signal })` | An MCP server (host's own *or* third-party) calls `elicit/create` | **User input** (`SessionInputRequested` → `respondToUserInputRequest`) | MCP elicitation is the canonical path for "structured user input"; the host's in-process MCP server uses it too | + +**`canUseTool` return type** (locked invariant, [sdk.d.ts:1582](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L1582)): + +```ts +type PermissionResult = + | { behavior: 'allow'; updatedInput?: Record; updatedPermissions?: PermissionUpdate[]; toolUseID?: string; ... } + | { behavior: 'deny'; message: string; interrupt?: boolean; toolUseID?: string; ... }; +``` + +There is **no `behavior: 'ask'` variant.** `'deny'` requires +`message: string` (sent back to the model so it knows why) and +optionally `interrupt: true` to stop the turn entirely. For the +interactive-tool subset, the host returns `{ behavior: 'allow', +updatedInput }` once the user submits answers — the answers ride on +`updatedInput` so the tool's own handler sees the chosen values. + +**`onElicitation` return type** ([sdk.d.ts:966](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L966), [sdk.d.ts:1163](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L1163)): + +```ts +type OnElicitation = (request: ElicitationRequest, options: { signal: AbortSignal }) + => Promise; // { action: 'accept' | 'decline' | 'cancel'; content?: ... } +``` + +`ElicitationRequest.mode` is `'form'` (with `requestedSchema: JSON +Schema`) or `'url'` (with `url: string` for OAuth-style flows). Both +are translatable to the `SessionInputQuestion[]` / `url` fields on +`SessionInputRequest` without loss. + +**Hooks deliberately not used for either flow.** The SDK exposes +`Elicitation` and `PreToolUse` hook events that can intercept these +callbacks before they fire. Hooks are not used because the user can +disable them entirely via settings — relying on them for permission +gating would create a silent-bypass class of bugs. `canUseTool` and +`onElicitation` are non-bypassable via SDK contract. + +**Per-session sequencer.** Both flows funnel through the same +`_sessionSequencer`; at most one outstanding permission/input +request per session is in flight at a time, matching the protocol's +single-threaded session contract. + +#### Sibling: how `CopilotAgent` implements the same surface + +The Copilot CLI agent ([`copilotAgentSession.ts`](../copilot/copilotAgentSession.ts)) +implements the identical IAgent contract against a different SDK. +The shape is the same; the SDK callbacks differ. + +| Concern | `ClaudeAgent` (this folder) | `CopilotAgent` ([../copilot/](../copilot/)) | +|---|---|---| +| Permission SDK callback | `Options.canUseTool(toolName, input, ...)` (one seam, dual-routed) | `SessionConfig.handlePermissionRequest(ITypedPermissionRequest)` — typed kind: `'read' \| 'write' \| ...` | +| Permission return shape | `{ behavior: 'allow', updatedInput? } \| { behavior: 'deny', message }` | `{ kind: 'approve-once' \| 'reject' }` | +| User-input SDK callback(s) | `canUseTool` for `INTERACTIVE_CLAUDE_TOOLS` **and** `Options.onElicitation` (MCP) | `SessionConfig.onUserInputRequest({ question, choices?, allowFreeform? })` — single seam for the `ask_user` tool | +| User-input return shape | `PermissionResult` `{ allow, updatedInput }` (interactive-tool path) or `ElicitationResult` `{ action, content? }` (MCP path) | `{ answer: string, wasFreeform: boolean }` | +| Pending state | `_pendingPermissions: Map>`, `_pendingUserInputs: Map` | Same two maps, same shape | +| Outbound permission signal | `pending_confirmation` progress event → `SessionToolCallReady` action | `pending_confirmation` progress event with `permissionKind` / `permissionPath` / `parentToolCallId` (subagent routing) | +| Outbound input signal | `ActionType.SessionInputRequested` with `SessionInputRequest { id, questions[] }` | Same action, same shape | +| Inbound resolution | `respondToPermissionRequest`/`respondToUserInputRequest` walk `_sessions.values()`, return `boolean` on first match | Identical pattern ([`copilotAgent.ts:1239-1254`](../copilot/copilotAgent.ts#L1239-L1254)) | +| Auto-approve hook | Defers to SDK `permissionMode` (`default` / `acceptEdits` / `plan` / `bypassPermissions`) | Host-side: internal session-resource paths, `copilot-tool-output-*.txt` SDK temp files, `autopilot` config | +| Edit-preview building | None at this layer (Phase 7) | Builds `FileEdit` with `pending-edit-content:` URI before firing `pending_confirmation` so the client can show a diff | +| `ExitPlanMode` analogue | `INTERACTIVE_CLAUDE_TOOLS` includes `'ExitPlanMode'`; routed through `SessionInputRequested` | `_pendingPlanReviews` map; `_resolveExitPlanMode` maps the response back to `IExitPlanModeResponse { approved, feedback?, selectedAction?, autoApproveEdits? }` | +| Status (Phase 6) | Stub — both methods throw `TODO: Phase 7` ([`claudeAgent.ts:790, 794`](claudeAgent.ts#L790-L794)). Re-implementation explicitly mirrors `copilotAgent.ts:1239-1254` (see [phase7-plan.md](phase7-plan.md)) | Fully implemented | + +The Copilot CLI SDK pre-resolves the Claude-side fan-in: its single +`onUserInputRequest` covers the `ask_user` case directly, and +`handlePermissionRequest` carries a richer `kind` so the host can +build a write-edit preview without inspecting tool-name strings. +The Claude SDK is lower-level — `canUseTool` is generic over every +tool, so the host has to do the routing itself via +`INTERACTIVE_CLAUDE_TOOLS`, and `onElicitation` exists as a separate +callback rather than being folded into the permission gate. + +### M4 — `abortSession(session)` + +| Direction | Host → SDK | +|---|---| +| SDK primitive | `AbortController.abort()` on the controller passed via `Options.abortController` (per-session). `Query.interrupt()` exists but is not the primary mechanism in the reference extension | +| Reference | [`claudeCodeAgent.ts:289, 733-735`](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeAgent.ts#L289) | + +The abort signal is owned by the host: each session creates its own +`AbortController`, passes it on `Options.abortController`, and aborts +it to cancel the turn. The SDK propagates the signal into all +in-flight HTTP / subprocess work and closes the prompt iterable. +Sequencer-guarded: a queued abort runs after any in-flight +`sendMessage` has yielded its `SDKUserMessage` so the abort always +meets a Query in a defined state. The host also surfaces the +protocol `SessionTurnCancelled` action through the side-effects +layer. `Query.interrupt()` is left for an explicit "interrupt only" +follow-up if abort-signal semantics ever prove insufficient (Phase 9). + +### M5 — `changeModel(session, model)` + +| Direction | Host → SDK | +|---|---| +| SDK primitive | `Query.setModel(modelId)` (runtime) **or** `Options.model` (startup) | +| Reference | hot-swap path in `_createPromptIterable` | + +Bijective. `Options.model` seeds the initial run; `Query.setModel` +swaps mid-session. Applied at the yield boundary alongside +`setPermissionMode` and `applyFlagSettings({ effortLevel })` +(the actual effort primitive — there is no `setEffort` method). +UI `'max'` is clamped to SDK `'xhigh'` at the seam. Effort levels +above the model's `max_thinking_tokens` ceiling are clamped by the +SDK; the host does not need to gate. + +### M6 — Customizations cluster + +| IAgent surface | Direction | SDK primitive | Notes | +|---|---|---|---| +| `setClientCustomizations(...)` | Host → SDK | None — host-only state | Drives plugin manager sync; visible to SDK only via `Options.plugins` and the `_META_CUSTOMIZATION_DIRECTORY` baked at startup | +| `setClientTools(...)` | Host → SDK | In-process MCP server tool registry | Adds/removes tool definitions on the host's own MCP server; the SDK reads them through the standard MCP protocol | +| `onClientToolCallComplete(...)` | Host → SDK | Resolves the in-process MCP tool's pending promise | Same mechanism as `respondToUserInputRequest` | +| `setCustomizationEnabled(uri, enabled)` | Host → SDK | `Query.reloadPlugins()` (runtime) | **Defer-and-coalesce** when busy: set `_pendingPluginReload`, drain at next yield. Idle path applies immediately. The SDK's `reloadPlugins` returns the refreshed `commands / agents / plugins / mcpServers` — useful as a verification probe but not required for correctness | +| `getCustomizations()` | SDK → Host (projection) | `Query.supportedCommands()` / `supportedAgents()` / `mcpServerStatus()` | Compose the live snapshot from runtime SDK queries plus the host plugin manager's enabled set | +| `getSessionCustomizations(session)` | SDK → Host (projection) | Same SDK queries, scoped per-session | Per-session because each Query has its own loaded plugin set | + +**Skills as plugins.** The SDK has no `Options.skills` field. A +directory containing a `skills/` subfolder *is* a valid plugin from +the SDK's point of view (`SdkPluginConfig { type: 'local', path }`). +The host can pass a "skills-only plugin" directory via +`Options.plugins` and the SDK loads every skill in it. Documented as +a first-class pattern, not a hack. + +**Mid-turn `reloadPlugins` is undocumented.** The SDK's TS surface +makes no statement about whether `reloadPlugins` interrupts an +in-flight model turn. The defer-and-coalesce pattern makes the +question moot for correctness; if a "force reload now" debug command +is ever added, mid-stream behavior must be tested empirically first. + +### M7 — `getSessionMessages(session): Promise` + +| Direction | SDK → Host (replay) | +|---|---| +| SDK primitive | `getSessionMessages(sessionId, { dir, includeSystemMessages: true })` (out-of-process; reads JSONL transcript directly — no live `Query` required) | +| Subagent variant | `getSubagentMessages(rootSessionId, agentId, { dir })` | +| Reference | [sdkSessionAdapter.ts](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/sessionParser/sdkSessionAdapter.ts), [claudeMessageDispatch.ts](../../../../../../extensions/copilot/src/extension/chatSessions/claude/common/claudeMessageDispatch.ts) (live dispatcher) | + +**One IAgent method, two SDK calls.** The host disambiguates by URI +shape (`/subagent/` → walk up to the root +sessionId, call `getSubagentMessages`; otherwise +`getSessionMessages`). There is no protocol-level `getSubagentMessages`. + +**Always pass `includeSystemMessages: true`.** Cost is negligible +(few extra entries per session) and skipping it loses +`compact_boundary` records, which are user-visible context-loss +events. The mapper applies an explicit allowlist when surfacing +system messages as `SystemNotificationResponsePart`: + +| `SDKSystemMessage` subtype | Render? | Rationale | +|---|---|---| +| `compact_boundary` | Yes | "Conversation compacted" — context-loss event | +| `notification` (priority ≥ medium) | Yes | Loop-side text notifications | +| `api_retry`, `plugin_install`, `auth_status`, `status` | No | Live UI signals; not transcript content | +| `hook_started`, `hook_progress`, `hook_response` | No | Decorate the associated `ToolCall`, don't stand alone | +| anything else | Drop by default | Conservative; opt in subtypes as needs emerge | + +**Pagination.** `getSessionMessages` supports `limit` / `offset`; +`IAgent.getSessionMessages` returns a fully materialized +`readonly Turn[]`. Not a blocker (the reference extension does the +same), but the SDK gives us pagination for free if a paginated +protocol variant is ever added. + +#### Flat `SessionMessage[]` → `Turn[]` + +The SDK returns a chronologically ordered flat list. The protocol +expects request/response cycles. Grouping rules, derived from real +on-disk transcripts: + +``` +For each SessionMessage in order: + ('user', content[0].type === 'text'): + → start new Turn. Turn.id = sessionMessage.uuid. + Concatenate text blocks → userMessage.text. Attachments are + not in SessionMessage (stripped by SDK); replay turns get no + attachments. Acceptable: replay is for display, not re-send. + ('user', content[0].type === 'tool_result'): + → DO NOT start a new Turn. Locate the open ToolCall response + part in the current Turn whose toolCallId === tool_use_id and + attach result content + is_error. + ('user', content empty / hook-injected / shouldQuery: false): + → noise; skip. These are not turn-starters. + ('assistant', for each content block in order): + 'text' → push MarkdownResponsePart + 'thinking' → push ReasoningResponsePart + 'tool_use' → push ToolCallResponsePart (open; awaits tool_result); + record tool_use_id → Turn.id in the attribution map + empty → skip + ('system', subtype === 'compact_boundary'): + → push SystemNotificationResponsePart (compact metadata) + ('system', other allowlisted subtypes): + → push SystemNotificationResponsePart per allowlist above + ('system', other): + → drop +``` + +**Turn-level fields on replay.** +- `state` is `'completed'` for any Turn that's followed by a later + message. The tail Turn's state is unknowable from history alone; + default to `'completed'` if no orphan `tool_use` blocks remain, + otherwise mark incomplete. +- `usage` is `undefined` on replay. The SDK does not surface + per-message usage in `SessionMessage`. See M8 for the live-vs-replay + metadata asymmetry. +- `compact_boundary` is **not** a Turn boundary in either path. Surface + it as a `SystemNotification` part on the *currently active* Turn and + continue. The SDK's `logicalParentUuid` already linearises the chain + across the boundary; the mapper trusts it and does not re-derive. + +#### Subagent ToolCall on replay + +A subagent invocation lives as one Agent/Task `tool_use` envelope plus +its paired `tool_result` envelope in the parent transcript; the +subagent's own messages live in `/subagents/.jsonl` and +require a separate `getSubagentMessages` call. Replay never inlines +subagent Turns — the protocol's URI shape +(`/subagent/`) is the navigation seam. + +**Content shape on the parent Turn's completed `ToolCall`:** + +```ts +result: { + success, + pastTenseMessage, + content: [ + ...mappedToolResultBlocks, // text/structured from the tool_result envelope + { type: ToolResultContentType.Subagent, // navigation marker + resource: buildSubagentSessionUri(parentURI, toolCallId), + title, agentName, description } + ], +} +_meta: { toolKind: 'subagent', subagentDescription: } +``` + +Mirrors the live merge in +[`agentSideEffects.ts`](../agentSideEffects.ts) — the workbench +renderer reads `ToolCallCompletedState.content[]` either way, so live +and replay produce identical shapes. + +**`_meta.toolKind = 'subagent'` is the durable discriminator.** The +renderer falls back to `_meta.toolKind` when the Subagent content +block is absent, so the mapper sets it on every Agent/Task tool +invocation regardless of whether the marker is attached. + +**Workbench drives the second call.** `getSessionMessages(parentURI)` +returns parent Turns with subagent markers; when the user opens a +marker, the workbench calls `getSessionMessages(subagentURI)`. The +host dispatches by URI shape, walks up to root via +`parseSubagentSessionUri`, and calls SDK `getSubagentMessages`. One +SDK call per `IAgent.getSessionMessages` invocation — subagent +transcripts are fetched lazily, never eagerly. + +**HISTORY produces only terminal ToolCall states.** `Streaming` / +`PendingConfirmation` / `Running` / `PendingResultConfirmation` are +live-only lifecycle states. Replay flattens straight to `Completed` +or `Cancelled`. The "running content merges into result on complete" +dance from the live path is not reproduced. + +### M8 — Live `Query: AsyncGenerator` + +| Direction | SDK → Host (live) | +|---|---| +| SDK primitive | `for await (const message of query) {...}` | +| Reference | [claudeCodeAgent.ts](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeAgent.ts) `_processMessages`, [claudeMessageDispatch.ts](../../../../../../extensions/copilot/src/extension/chatSessions/claude/common/claudeMessageDispatch.ts) `dispatchMessage` | + +`SDKMessage` is a much wider union than `SessionMessage`. The live +mapper handles cases the replay path never sees: + +| `SDKMessage` variant | Mapper action | +|---|---| +| `SDKAssistantMessage` (`type: 'assistant'`) | Same as replay: push response parts. **Reconcile** with any partial accumulator first — final is canonical | +| `SDKPartialAssistantMessage` (`type: 'stream_event'`) | Update `ActiveTurn` response parts incrementally (text deltas concatenate, tool-input JSON deltas accumulate then parse on completion). Drives streaming UI | +| `SDKUserMessage` / `SDKUserMessageReplay` | Tool results streaming back; same flattening as replay | +| `SDKResultMessage` (`type: 'result'`) | **Closes the Turn**: `usage`, `total_cost_usd`, `state` (`success` / `error_max_turns` / `error_during_execution`), `error`. **No replay equivalent.** This is the trigger for `requestComplete` | +| `SDKCompactBoundaryMessage` | Same as replay | +| `SDKNotificationMessage`, `SDKStatusMessage`, `SDKHookStartedMessage`, `SDKHookProgressMessage`, `SDKHookResponseMessage`, `SDKToolProgressMessage`, `SDKAPIRetryMessage`, `SDKPluginInstallMessage`, `SDKTaskStartedMessage`, `SDKTaskUpdatedMessage`, `SDKTaskProgressMessage`, `SDKAuthStatusMessage`, `SDKMemoryRecallMessage`, `SDKToolUseSummaryMessage`, `SDKFilesPersistedEvent`, `SDKRateLimitEvent`, `SDKLocalCommandOutputMessage`, `SDKSessionStateChangedMessage`, `SDKDeferredToolUse`, `SDKElicitationCompleteMessage`, `SDKPromptSuggestionMessage` | Various live-only signals. The reference dispatcher handles only `assistant`, `user`, `result`, and 4 system subtypes (`compact_boundary`, `hook_started`, `hook_progress`, `hook_response`); everything else logs as "known unhandled" or "unknown". Side-channel events drive UI (typing indicator, progress, retry banners, hook badges) but do not become protocol response parts | + +**Partials are advisory; final `SDKAssistantMessage` is canonical.** +When the final lands, replace whatever the partial accumulator built. +Trades minor visual flicker for correctness. + +**One mapper, two drivers.** Live and replay must hydrate the same +internal maps (especially `tool_use_id → turnId`) so `tool_result` +events delivered after a session restore can resolve to the +announcing `tool_use`'s `turnId`. The mapper is the single seam. + +**Boundary asymmetry vs replay.** Live closes a Turn on +`SDKResultMessage`; replay closes a Turn on the next +non-`tool_result` `user` envelope. Live cannot wait for the next user +message — the user might never send one and a perpetually-running +Turn would surface as a hung UI. Replay cannot use `SDKResultMessage` +— the SDK does not persist it to JSONL (it carries live-only `usage` +/ `cost` / `permission_denials`). Same logical Turn, two different +end signals. + +**Live-only Turn metadata.** `total_cost_usd`, `usage`, +`permission_denials`, `is_error` arrive on `SDKResultMessage` and are +written to the Turn at the moment it closes. Replayed Turns have none +of this — `usage` is `undefined`, cost is unknown. Acceptable +asymmetry: replay is for display, not accounting. Backfilling from +`getSessionInfo` is possible but deferred until a concrete consumer +needs it. + +### M9 — Lifecycle: `createSession` (incl. fork), `onDidMaterializeSession`, `disposeSession`, `onArchivedChanged`, `shutdown`, `dispose` + +| Direction | Host → SDK (lifecycle), Client → Host (state) | +|---|---| + +The lifecycle surface is **not** a single linear pipeline. It splits +into three orthogonal axes that the catalogue keeps strictly +separate: + +1. **Birth axis**: `createSession` (optionally with a `fork` config), + followed (lazily, on first `sendMessage`) by an internal + *materialization* that the agent signals via + `onDidMaterializeSession`. Provisional draft → live SDK `Query` + and (optionally) a worktree. +2. **Soft-state axis**: `onArchivedChanged(uri, isArchived)`. + Toggles whether a materialized session's worktree directory is + present on disk; SDK session and per-session DB are untouched. +3. **Teardown axis**: `disposeSession` (single) and + `shutdown` / `dispose` (provider-wide). Tears down the SDK + `Query`, in-memory wrapper, and process-owned worktree. + +There is **no separate `IAgent.fork` method.** Forking is a flavor of +`createSession` — clients pass +`IAgentCreateSessionConfig.fork = { session, turnIndex, turnId, turnIdMapping? }` +([agentService.ts:222-234](../../common/agentService.ts#L222-L234)) +and the agent decides how to realize it. + +There is also **no `IAgent.materialize()` method.** Materialization +is an internal concern of the agent. The IAgent surface exposes only: +- `IAgentCreateSessionResult.provisional?: boolean` — a hint that the + session has no on-disk state yet. +- `IAgent.onDidMaterializeSession?: Event` — + fired *once* by the agent when a previously-provisional session has + its SDK session, worktree (if any), and on-disk metadata in place. + The `IAgentService` uses this event to defer the `sessionAdded` + protocol notification so observers don't see a half-formed session. + +| IAgent surface | SDK primitive(s) | What it does | +|---|---|---| +| `createSession(config)` (no `fork`) → `IAgentCreateSessionResult { provisional: true }` | none (no SDK call) | Records a **provisional** session: id, requested `workingDirectory` (= the repo path the client passed in), title, model, etc. No `Query`. No on-disk session file. The agent reserves the eventual session id locally. The session shows up in `listSessions` but cannot receive messages until materialized. | +| `createSession({ fork: { session, turnIndex, turnId, turnIdMapping? } })` → `IAgentCreateSessionResult { provisional: false }` | `forkSession(parentSessionId, { upToMessageId: lastUuidOfTurn(turnId), title? })` → `{ sessionId }` | **Materializes immediately on disk** because `forkSession` writes the new session file synchronously. SDK rewrites every message UUID and rebuilds the `parentUuid` chain. Result is **not provisional**. No `Query` is started yet — that still happens lazily on first `sendMessage` — but because the session already exists on disk, the materialization path will use `resume: forkedSessionId` in `Options` (see *Fresh vs resumed* below). The agent fires `onDidMaterializeSession` here, immediately after `forkSession` returns. | +| (internal) first `sendMessage` on a provisional session | `query({ options })` with `Options.sessionId = sessionId` (fresh) or `Options.resume = sessionId` (resumed) | Triggers internal materialization. Resolves effective `workingDirectory` (Copilot may create a worktree); constructs `Options`; starts `Query`; fires `onDidMaterializeSession`; then proceeds to send the actual user message. Subsequent `sendMessage` calls reuse the live `Query`. | +| `onArchivedChanged(uri, isArchived)` (optional) | none (SDK untouched) | Soft, reversible. `true`: remove worktree dir from disk if branch is preserved and tree is clean (Copilot's `_cleanupWorktreeOnArchive`). `false`: `git worktree add --existing` against the preserved branch (`_recreateWorktreeOnUnarchive`). SDK session, per-session DB, branch all untouched. Claude does not implement this yet. | +| `disposeSession(sessionId)` | `Query.interrupt()` + `Query.return()` (or asyncDispose) | Full teardown of one session: kill SDK `Query`, drop in-memory wrapper, delete state-manager entry, and (Copilot) remove the worktree if it was created in this process. Triggered by explicit protocol `disposeSession` or the empty-session GC. | +| `shutdown()` | per-session `Query.interrupt()` + asyncDispose, serialized through a sequencer; then SDK client stop | Graceful, async, **memoized** drain of all sessions. Walks `_sessions` ∪ `_createdWorktrees`, runs `_destroyAndDisposeSession` per id through `_sessionSequencer` so it interleaves with concurrent `sendMessage` / `disposeSession`. Claude additionally aborts provisional `AbortController`s first so any racing `await sdk.startup()` unwinds cleanly. | +| `dispose()` | synchronous teardown of provider | Hard provider teardown. Copilot: kicks off `shutdown()` and chains `super.dispose()` in `.finally` (cooperatively reuses the memoized drain). Claude: aborts provisional controllers, then `super.dispose()` synchronously disposes `_sessions` (each wrapper interrupts/asyncDisposes its `Query`), then releases `_proxyHandle` — wrapper-before-proxy ordering is load-bearing. | + +#### Birth axis: provisional → materialized + +The two-phase contract is locked at the IAgent layer +([agentService.ts](../../common/agentService.ts) IAgent +`createSession` + `IAgentCreateSessionResult.provisional` + +`onDidMaterializeSession`). It is a **client-observable result flag +plus an event**, not a method pair. + +- **Provisional state (non-fork `createSession`).** Returns + immediately with the caller's requested `workingDirectory` (= the + repo path) and `provisional: true`. No `Query`, no worktree, no + SDK subprocess, no on-disk session file. The session is listable + (deferred by `IAgentService` until materialization) but + message-sending is the trigger that promotes it. Drafting is + cheap: the user can compose a prompt, cancel, change models, etc. + without paying for an SDK process or a git worktree. +- **Non-provisional state (fork `createSession`).** `forkSession` + writes the new session file to disk synchronously, so the result + has `provisional: false` and `onDidMaterializeSession` fires + immediately. No `Query` is started yet; that still happens lazily. + But the materialization path that runs on first `sendMessage` will + treat the session as *resumed* rather than *fresh* — see below. +- **Internal materialization on first `sendMessage`.** When the + agent receives `sendMessage` for a still-undermaterialized session + (whether the on-disk file exists from fork or doesn't exist yet), + it: + 1. Resolves the effective `workingDirectory`. Copilot's + `_resolveSessionWorkingDirectory` consults `_createdWorktrees` + and may run `git worktree add` on a fresh branch. Claude + currently uses the requested path as-is. + 2. Constructs SDK `Options` with that `cwd` and all other + startup-only fields. + 3. Calls `query({ options })` → SDK forks the CLI subprocess, + which inherits `cwd`. Stores the resulting `Query` in + `_sessions`. + 4. Fires `onDidMaterializeSession` (only if it has not already + been fired — the fork path fires it earlier). +- **Why worktree before `query()`?** SDK subprocess `cwd` is fixed + at fork-time and cannot be changed afterwards. Worktree creation + must complete before `query()` or the SDK runs in the wrong + directory. Reverse ordering would require a second restart to + relocate. + +##### Fresh vs resumed: `Options.sessionId` vs `Options.resume` + +The SDK distinguishes a session id that the host wants the SDK to +**create** from one it wants the SDK to **resume**. They are +mutually exclusive on `Options`. The reference extension picks +between them based on whether the session file already exists on +disk +([claudeCodeAgent.ts:457-459](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeAgent.ts#L457-L459)): + +```ts +// Use sessionId for new sessions, resume for existing ones (mutually exclusive) +...(this._isResumed + ? { resume: this.sessionId } + : { sessionId: this.sessionId }), +``` + +Mapping that to the IAgent lifecycle: + +| Origin | On-disk session file exists at materialize time? | `Options` field | +|---|---|---| +| Fresh non-fork session (provisional → materialize on first `sendMessage`) | No | `sessionId: ` — SDK creates the session file | +| Forked session (`forkSession` already wrote the file) | Yes | `resume: ` — SDK resumes the on-disk transcript | +| Restored / imported session (already on disk from a prior process) | Yes | `resume: ` | + +The agent must track "is this session id backed by an on-disk file +yet?" across its provisional/materialize lifecycle so it picks the +correct field. The fork path flips the bit at `forkSession` return +time; the fresh path flips it at `query({ sessionId })` return time. + +#### Fork sub-flow + +Fork is **not** a separate IAgent method. It is a config field on +`createSession`: +[`IAgentCreateSessionConfig.fork`](../../common/agentService.ts#L222-L234). + +Protocol → SDK shape: + +| Layer | Field | Purpose | +|---|---|---| +| Protocol (`fork`) | `session: URI` | Parent session to fork from | +| Protocol (`fork`) | `turnIndex: number` | Position of the cut, for client-side display / validation | +| Protocol (`fork`) | `turnId: string` | Opaque protocol turn id at the cut. **Equal to the SDK uuid of the user `SessionMessage` that *started* turn T** (see `Turn.id` glossary). The agent must translate this to the uuid of the *last* SessionMessage of turn T before passing it to the SDK — see the `upToMessageId` derivation below. | +| Protocol (`fork`) | `turnIdMapping?: ReadonlyMap` | Service-layer-populated old→new protocol turn id table. The agent uses this to rewrite per-turn metadata (event-id mappings, etc.) in the session DB so the forked transcript still resolves correctly | +| SDK (`forkSession` opts) | `upToMessageId` | **Inclusive** message-uuid cutoff. Receives the uuid of the last SessionMessage of turn T — i.e. the SessionMessage immediately before turn T+1's user message, or the final SessionMessage in the transcript if T is the last turn. **Not** the protocol `turnId` itself: passing `turnId` directly would slice at the *user* message that *started* T, dropping T's assistant reply and tool results entirely. | +| SDK (`forkSession` opts) | `title?` | Optional fork title; defaults to `${parentTitle} (fork)` | +| SDK (`forkSession` opts) | `dir?` | Project directory; agent supplies from session config | +| SDK result | `{ sessionId }` | New session UUID, resumable via `resumeSession(sessionId)` — handed back to the IAgent layer as the new session's id | + +Key properties of SDK `forkSession` +([sdk.d.ts](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts) lines 552–571): + +- **It is a top-level SDK function, not a `Query` method.** No live + session is required to fork — the SDK reads the parent transcript + off disk and writes a new one. +- **`upToMessageId` is inclusive on the SessionMessage axis, not the + Turn axis.** The SDK has no notion of "turn." It slices the + parent's flat SessionMessage stream up to and including the + message whose uuid matches `upToMessageId`. To fork "through the + end of turn T" the agent must walk the parent transcript and pick + the *last* SessionMessage of T — the message immediately before + turn T+1's user `SessionMessage`, or the final SessionMessage of + the transcript if T is the last turn. Passing protocol `turnId` + directly would cut at T's *user* message and drop the assistant + reply, tool calls, and tool results that belong to T. +- **UUID remap is exhaustive.** Every message UUID is rewritten and + the `parentUuid` chain is rebuilt. The protocol-level invariant + that `Turn.id` ≡ user-message uuid means the host must regenerate + fresh `Turn.id`s for the forked turns; that's exactly what + `turnIdMapping` records. +- **No undo history copy.** File-history snapshots are not copied, + so a fork starts with an empty undo stack on its own files. +- **Fork → materialize is independent.** `forkSession` only writes + the new session file; it does not start a `Query`. Because the + file now exists, the IAgent layer marks the result + `provisional: false` and fires `onDidMaterializeSession` + immediately, but the SDK `Query` is still not started until the + first `sendMessage`. At that point the session goes through the + same internal materialization path as a fresh session, except + `Options` carries `resume: forkedSessionId` instead of + `sessionId: `. A worktree, if requested, is created at + that moment exactly as for a non-forked session. + +#### Soft-state axis: archive ≠ dispose + +Archive is **not** "session is on-disk-only, restored via +materialize." Materialization is a one-way edge from provisional to +live. Archive is a **toggle on a materialized session** that controls +only the worktree-on-disk question. Concretely: + +| | `disposeSession` | `onArchivedChanged(true)` | `onArchivedChanged(false)` | +|---|---|---|---| +| SDK `Query` | Killed | Untouched | Untouched | +| In-memory wrapper | Dropped | Untouched | Untouched | +| State-manager entry | Deleted | `isArchived: true` persisted | `isArchived: false` persisted | +| Worktree directory | Removed (if process-owned) | Removed (if branch preserved + tree clean) | Re-added via `git worktree add --existing` | +| Branch | Untouched | Preserved | Preserved | +| `listSessions` | Gone | Returned with `isArchived: true` | Returned with `isArchived: false` | +| `restoreSession` | N/A | Works as for any session | Works as for any session | + +There is **no required ordering** between archive and dispose. Both +are independent triggers (UI archive vs. GC / explicit dispose) and +both independently include worktree removal on the Copilot path. +Typical flow is the opposite of the question's premise: archive +first (soft, reversible), then maybe dispose later (hard, +irreversible). + +`AgentSideEffects` is the seam that persists `isArchived` to the +per-session DB and forwards to `agent.onArchivedChanged?` (errors +logged, not awaited) — so archive is fire-and-forget from the +client's perspective; the provider's worktree work runs in the +background. + +#### Teardown axis: `disposeSession` vs `shutdown` vs `dispose` + +| Surface | Scope | Sync? | Reuses `shutdown`? | Notes | +|---|---|---|---|---| +| `disposeSession(id)` | one session | async | n/a | Routes through `_sessionSequencer` so it serializes against in-flight `sendMessage`. Worktree removal consults the **in-memory** `_createdWorktrees` map — sessions created in a previous process lifetime are not removed by this path; archive cleanup picks them up via the persisted DB metadata. | +| `shutdown()` | all sessions | async, memoized | self | Walks `_sessions ∪ _createdWorktrees`. Memoization (`_shutdownPromise`) means concurrent calls fold into one drain. Claude aborts provisionals first to unwind racing `sdk.startup()` awaits. | +| `dispose()` | provider | sync surface; may chain async | Copilot: yes; Claude: no | Copilot: `shutdown().finally(super.dispose)` — cooperative. Claude: synchronous wrapper-then-proxy teardown, no graceful drain. The provider choice is intentional: Claude's wrapper is the stronger ownership and must dispose before the IPC handle. | + +`AgentService.shutdown` fans out `provider.shutdown()` in parallel; +`AgentService.dispose` calls `provider.dispose()` on every provider +and then `super.dispose()`. The host does not enforce a strict +shutdown-before-dispose order across providers — each provider +internally decides whether `dispose` chains `shutdown` or not. + +#### Invariants + +- **Provisional sessions own no SDK resources.** A provisional + session exists only in the host's state manager; killing the + process leaves no SDK subprocess to clean up. This is why the + empty-session GC can dispose them without coordination. A forked + session is **not** provisional even though it has no live `Query` + yet — its session file is on disk and disposal must remove it. +- **Materialization is one-way and signaled by event, not method.** + There is no `materialize()` call on `IAgent` and no + de-materialize. The agent fires `onDidMaterializeSession` exactly + once per session, either from the fork path (immediately on + `createSession` return) or from the first-`sendMessage` path + (after `query()` returns). Once a session has been materialized, + archive is the soft path back to "on-disk transcript, no live + worktree." +- **`Options.sessionId` and `Options.resume` are mutually + exclusive.** The host must track on-disk existence per session id + to choose the right one. Getting this wrong yields either a + duplicate-id error from the SDK (passing `sessionId` for a file + that exists) or a missing-session error (passing `resume` for a + file that does not). +- **Worktree ownership is provider-owned, not IAgent-owned.** IAgent + has no `worktree` concept; `workingDirectory` is the only contract. + Whether that directory is a worktree, a plain repo path, or + something else is a provider implementation detail. +- **Archive does not unlive a session.** The SDK `Query` keeps + running if it was running; clients should not assume archive + implies "no in-flight turn." UI that needs that guarantee must + combine archive with an explicit `abortSession`. +- **`shutdown` is the graceful path; `dispose` is the floor.** Code + that needs cleanup ordering (e.g. flushing telemetry, persisting + final state) must hook into `shutdown`, not `dispose`. `dispose` + may run after a crash with no async work permitted. + +#### Asymmetries between Copilot and Claude + +| | CopilotAgent | ClaudeAgent | +|---|---|---| +| Worktree on materialize | Yes (`_resolveSessionWorkingDirectory` + `_createdWorktrees`) | No (uses requested `workingDirectory` as-is) | +| `onArchivedChanged` | Implemented (clean + recreate) | Not implemented | +| `disposeSession` worktree path | Consults in-memory `_createdWorktrees` | No worktree state to clean | +| `shutdown` provisional handling | Drops provisional records + `_activeClients` snapshot | Aborts provisional `AbortController`s first to unwind racing `sdk.startup()` | +| `dispose` strategy | Chain `shutdown().finally(super.dispose)` | Synchronous wrapper-then-proxy teardown (no graceful drain) | +| Sequencer | `_sessionSequencer` (per-session) | `_disposeSequencer` (drain only) | + +These asymmetries are deliberate. Copilot's worktree story requires +process-lifetime metadata to clean up correctly, so its dispose path +is heavier and benefits from reusing the memoized graceful drain. +Claude's wrapper owns the SDK `Query` directly and has a +load-bearing wrapper-before-proxy disposal order that doesn't +compose well with a chained shutdown. + +### M10 — Steering and truncation: `setPendingMessages`, `IAgentSteeringConsumedSignal`, `truncateSession` + +| Direction | Client → Host (write) for steering / truncation, Host → Client (signal) for ack | +|---|---| + +Steering and truncation are the two IAgent surfaces that mutate a +session's *content* (rather than its lifecycle or its config). +They are deliberately small surfaces because the host owns the +messy parts (queueing, timing, ack semantics) and the agent only +sees the deltas it can act on. + +#### Steering: `setPendingMessages` + `IAgentSteeringConsumedSignal` + +| IAgent surface | SDK primitive(s) | What it does | +|---|---|---| +| `setPendingMessages?(session, steeringMessage, queuedMessages)` (optional) | Yield an `SDKUserMessage` with `priority: 'now'` into the prompt iterable that was passed to `query()` ([sdk.d.ts:3067-3086](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L3067-L3086)) | Notifies the agent that the session's pending-message state changed. The agent reacts by yielding the steering content as an `SDKUserMessage` whose `priority` is `'now'`, which the SDK treats as "preempt the current turn and run me first." | +| (outbound signal) `AgentSignal { kind: 'steering_consumed', session, id }` | n/a (host-emitted on SDK ack) | Agent fires this signal when the SDK confirms the steering message was delivered to the model. Host then dispatches `SessionPendingMessageRemoved` so the client clears the pending pill. | + +##### Pending-message taxonomy (locked at the protocol layer) + +The protocol distinguishes two kinds of pending messages +(`PendingMessageKind` in +[sessionState.ts](../../common/state/sessionState.ts)): + +| Kind | Semantics | Lifecycle | +|---|---|---| +| `Steering` | Inject *into the running turn* as additional context. The model sees it before its current generation completes. | Set while turn is in flight; consumed when the SDK acks the inject; removed via `IAgentSteeringConsumedSignal`. | +| `Queued` | Hold until the current turn finishes, then send as a normal `sendMessage`. | Set while turn is in flight; **server consumes server-side** by issuing `sendMessage` when the turn completes; never forwarded to the agent. | + +This taxonomy is the reason the `setPendingMessages` signature has +two parameters but only one of them ever carries a value at the +agent boundary: + +```ts +setPendingMessages?( + session: URI, + steeringMessage: PendingMessage | undefined, + queuedMessages: readonly PendingMessage[] +): void; +``` + +`queuedMessages` is **always empty** when this method is called on +the agent. The host keeps the queued list internally; once the +in-flight turn completes, the host pops the head of the queue and +calls `sendMessage` for it. The signature exposes the queue param +only so future agents (e.g. a hypothetical agent that wants to +display the queue itself) could opt in — today no agent does. + +##### Why `priority: 'now'` is the steering primitive + +`SDKUserMessage` carries an optional `priority` field with three +values that map directly onto an SDK-internal command queue +([cli.js:1030](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/cli.js#L1030): +`{ now: 0, next: 1, later: 2 }`). Lower number = higher dequeue +priority: + +| Value | Behaviour | Default for | +|---|---|---| +| `'now'` | The SDK aborts the in-flight turn (`abortController.abort("interrupt")`) the moment a `'now'`-tagged entry lands in the queue, then runs that message as the next turn. | Nothing — must be set explicitly. | +| `'next'` | Queued; runs as a normal turn after the current turn finishes. | User-typed prompts, MCP channel messages, regular slash commands. | +| `'later'` | Queued behind any pending `'next'` messages; runs only when nothing more urgent is waiting. | Task notifications, scheduled cron firings, sub-task results. | + +So "steering" reduces to one line: yield an `SDKUserMessage` with +`priority: 'now'` into the same prompt iterable the session was +started with. The SDK's watcher +([cli.js:8661](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/cli.js#L8661)) +sees the `'now'` entry, fires the abort, and the dequeue helper +immediately picks the message up because of its weight-`0` +position. + +##### `streamInput()` vs prompt iterable vs `interrupt()` + +The SDK exposes three operations that *could* be relevant to +mid-turn injection. Only one is actually used in the reference +Claude implementation: + +| Mechanism | Reference Claude impl uses it? | Role | +|---|---|---| +| Prompt iterable (passed to `query({ prompt })`) | **Yes** — [claudeCodeAgent.ts:504-506, 518-587](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeAgent.ts#L504-L506) | Long-lived `async function*` that yields *every* `SDKUserMessage` for the session's lifetime. | +| `Query.streamInput(stream)` ([sdk.d.ts:1910](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L1910)) | **No** — zero callers under `extensions/copilot/src/extension/chatSessions/claude/**` | An alternate transport for pushing additional messages into a live `Query`. The reference impl never invokes it; the prompt iterable absorbs that role. | +| `Query.interrupt()` ([sdk.d.ts:1745](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L1745)) | **No** for steering — the abort is implicit, driven by the SDK's own `'now'`-priority watcher. The host *does* use the `AbortController` for explicit aborts (M4). | Stops generation outright; would orphan the steering message. | + +Key insight: **transport and routing are orthogonal.** The prompt +iterable (or `streamInput`) is the *channel* by which a message +reaches the SDK; `priority` is the *per-message routing hint* the +SDK applies after the message arrives. Steering on the Claude SDK +does not need a special transport — yielding into the existing +prompt iterable with `priority: 'now'` is sufficient. + +##### Reference Claude impl: every message is `'now'` + +[claudeCodeAgent.ts:580](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeAgent.ts#L580) +yields *every* user message with `priority: 'now'`, not just +steering messages: + +```ts +yield { + type: 'user', + message: { role: 'user', content: prompt }, + priority: 'now', + parent_tool_use_id: null, + session_id: this.sessionId, + uuid: request.request.id as `${string}-${string}-${string}-${string}-${string}` +}; +``` + +This is correct *for the world the reference impl lives in*. The +standalone chat-session UI has one text input and no protocol-level +notion of "queued"; from the user's point of view, **every message +typed during a live turn is a steering message** by definition. +There is no other intent it could carry. The reference impl is +therefore not papering over a distinction — the distinction simply +doesn't exist at its layer. Tagging every message `'now'` faithfully +encodes "the user wants this seen as soon as possible": + +- If no turn is in flight, the SDK has nothing to abort — `'now'` + reduces to "run me right away," the same outcome `'next'` would + produce in an empty queue. +- If a turn *is* in flight, the user is steering — that's the only + intent the chat-session UI can express. + +The IAgent protocol is **richer**: the host explicitly distinguishes +two operations the chat-session UI can't. + +| Protocol op | Intent | What the user did | +|---|---|---| +| `sendMessage` | "Run this as a turn." | Hit Enter normally. | +| `setPendingMessages` (Steering) | "Inject into the running turn." | Pressed the dedicated steering control while a turn was generating. | +| `setPendingMessages` (Queued) | "Hold this until the current turn finishes, then send normally." | Hit Enter while a turn was generating, with a UI that exposes a queue. | + +Because the host already separates these intents, an IAgent Claude +provider should honor them at the SDK seam: + +| Protocol op | SDK priority | Why | +|---|---|---| +| `sendMessage` | `'next'` (or unset — `'next'` is the SDK default) | New turn; should run after the current turn finishes if one is in flight. | +| `setPendingMessages` Steering | `'now'` | Preempt the current turn (this is the whole point of the steering operation). | +| `setPendingMessages` Queued | n/a at agent boundary | Host consumes server-side and re-issues as `sendMessage` (which is `'next'`) when the turn ends. | + +So the IAgent Claude provider doesn't mirror the reference impl's +"everything is `'now'`" \u2014 it uses the richer information the host +already gives it. The reference impl isn't wrong; it's just +operating without the distinction the protocol exposes. + +##### Steering ack semantics + +The signal `IAgentSteeringConsumedSignal { kind: 'steering_consumed', session, id }` +([agentService.ts:359-362](../../common/agentService.ts#L359-L362)) +is **not** emitted when the iterable's `yield` resolves — yielding +only means the SDK accepted the message into its command queue. +The agent emits the signal when the SDK actually surfaces the +message to the model (the next `SDKUserMessage` echo on the event +stream after the SDK's `'now'`-watcher has aborted the previous +turn and dequeued this message). This matches the client's +expectation: the pending-message pill should clear when the model +has *seen* the steering, not when the queue accepted it. + +The host's reaction to the signal is to dispatch +`SessionPendingMessageRemoved { kind: PendingMessageKind.Steering, id }` +through the state machine +([reducers.ts](../../common/state/protocol/reducers.ts) line 743). +This is the second of the three steering touchpoints on the host: + +1. Client writes `SessionPendingMessageSet { kind: Steering, ... }`. +2. Host forwards the new state to `IAgent.setPendingMessages`. +3. Agent emits `steering_consumed` after SDK ack. +4. Host dispatches `SessionPendingMessageRemoved { kind: Steering, id }`. + +##### Steering vs `sendMessage` boundary + +A steering message is **not** a turn boundary. It does not get a +`Turn.id`, does not appear as a separate user `Turn` in +`getSessionMessages`, and does not emit a `SessionTurnStart`. From +the protocol Turn perspective it is invisible — its content shows +up as part of the *next* assistant message in the current turn, +because the model received it mid-generation and folded it into +the response. The agent's transcript reconstruction +(`getSessionMessages`, M7) collapses the SDK's intermediate +`SDKUserMessage` for steering into the in-progress Turn rather +than starting a new one. This is an asymmetry vs `sendMessage` +that consumers must understand: a UI showing "turns" should not +expect each pending-message-set + steering-consumed pair to add a +row. + +#### Truncation: `truncateSession` + +| IAgent surface | SDK primitive(s) | What it does | +|---|---|---| +| `truncateSession?(session, turnId?)` (optional) | None on the Claude SDK; provider-specific RPC on Copilot. Claude composes via `forkSession` instead. | Mutates the session's transcript to keep turns up to and including `turnId` (or remove all turns if `turnId` is undefined). The session's URI / id is preserved — this is **in-place** mutation, not "make a new session." | + +##### Protocol semantics + +From [`agentService.ts:509-513`](../../common/agentService.ts#L509-L513): + +> Truncate a session's history. If `turnId` is provided, keeps turns up to and including that turn. If omitted, all turns are removed. + +The `?` is load-bearing: this method is **optional**. Agents that +cannot truncate in place leave it `undefined` and the host falls +back to fork (which produces a new session id) at a higher layer +when the user explicitly asks for "rewind here." + +##### Copilot: in-place via SDK RPC + +CopilotAgent implements `truncateSession` +([copilotAgent.ts:1179-1212](../../node/copilot/copilotAgent.ts#L1179-L1212)). +Two key translations between protocol and SDK semantics: + +| Protocol | SDK | +|---|---| +| `turnId` = last turn to **keep** (inclusive) | `eventId` = first event to **remove** (everything from this point forward is dropped) | +| `truncateSession(session, turnId)` | `entry.getNextTurnEventId(turnId)` → `entry.truncateAtEventId(eventId, turnId)` | +| `truncateSession(session)` (omit turnId) | `entry.getFirstTurnEventId()` → `entry.truncateAtEventId(eventId)` (remove all) | + +The Copilot SDK exposes a session-mutation RPC +(`truncateAtEventId`) that rewrites the on-disk transcript in +place. The agent serializes the call through `_sessionSequencer` +so it doesn't race with `sendMessage` or `disposeSession`. +Provisional sessions short-circuit (nothing to truncate). + +##### Claude: deliberately not implemented + +Claude's roadmap explicitly excludes `truncateSession` +([roadmap.md:804-807](../../node/claude/roadmap.md)): + +> **Do NOT implement `IAgent.truncateSession`**. The SDK's `forkSession` +> always mints a new session ID, which is incompatible with the protocol's +> expectation that `truncateSession` mutates the existing session URI in +> place. `truncateSession?` is optional in `IAgent`... + +The Claude SDK has **no in-place transcript-mutation primitive**. +Available related primitives: + +| SDK primitive | Why it doesn't map to `truncateSession` | +|---|---| +| `forkSession(sessionId, { upToMessageId })` | Mints a *new* session id; protocol requires same URI. Already mapped under M9 fork sub-flow. | +| `Query.interrupt()` | Stops generation; doesn't remove past turns. | +| Compaction (`PreCompact` / `PostCompact` hooks, `SDKCompactBoundaryMessage`) | Summarizes earlier turns into a synthetic message; lossy and SDK-driven, not host-driven point truncation. | + +Consumers that want "rewind to here" against a Claude session must +either (a) call `createSession({ fork: { ... } })` to get a new +forked URI, or (b) wait for an SDK-level in-place truncate to +land. The IAgent layer's optional `truncateSession?` makes this +choice transparent to clients: they call it if available, +otherwise compose with fork at the UI layer. + +##### Asymmetries between Copilot and Claude + +| | CopilotAgent | ClaudeAgent | +|---|---|---| +| `truncateSession` | Implemented; serializes through `_sessionSequencer`; protocol→SDK eventId translation | Not implemented (deliberate; SDK has no in-place truncate) | +| `setPendingMessages` (steering) | Implemented; injects via Copilot SDK's `send({ mode: 'immediate' })` | Implemented (planned Phase 9); yields `SDKUserMessage` with `priority: 'now'` into the existing prompt iterable | +| `setPendingMessages` (queued) | n/a — server consumes server-side | n/a — server consumes server-side | +| `IAgentSteeringConsumedSignal` | Emitted on SDK ack | Emitted when the SDK echoes the `'now'`-priority message on the event stream after preempting the in-flight turn | + +The two SDKs land on the **same conceptual primitive** — a +per-message hint that means "preempt the current turn and serve +me first" — via different transports: + +| Surface | New turn | Steering | +|---|---|---| +| Claude Agent SDK | yield `SDKUserMessage` with `priority: 'next'` (or default) | yield `SDKUserMessage` with `priority: 'now'` | +| Copilot CLI SDK | `session.send({ ... })` (no `mode`) | `session.send({ ..., mode: 'immediate' })` | + +#### Invariants + +- **Steering preserves the in-flight Turn at the protocol level + even though the SDK preempts internally.** On the Claude SDK, + `priority: 'now'` causes the SDK to abort the current + generation and run the steering message next. The protocol Turn + reconstruction (M7) folds the resulting messages back into the + same Turn so consumers see steering as "additional context for + the current generation," not a new turn. Provider implementations + must yield via `priority: 'now'` (or the SDK's equivalent + preempt hint), **not** via `Query.interrupt()` followed by a new + send — that path produces explicit Turn boundaries. +- **`queuedMessages` is always empty at the agent boundary.** Any + agent treating non-empty `queuedMessages` is implementing + behavior the host explicitly excludes from this surface; the + parameter exists only as a future-proofing slot. +- **Steering doesn't create a new `Turn.id`.** A steering message + is folded into the current Turn's user-side history at + reconstruction time. UIs that key off Turn boundaries will not + see steering as a separate row. +- **`steering_consumed` waits for model visibility, not queue + acceptance.** The signal must fire after the SDK has actually + surfaced the message to the model, not when the agent's `yield` + resolves. Premature signals would clear the pill before the + user's intent has reached the model. +- **`truncateSession?` being undefined is a valid protocol + state.** Clients must check for the optional-ness and degrade + gracefully (e.g., offer fork instead). Agents must not throw + for "not supported"; they simply don't define the method. +- **Truncation is in-place by definition.** Any operation that + changes the session URI (forkSession) is M9, not M10. The + protocol surfaces these as different methods deliberately. + +### M11 — Config schema and `Options` ↔ `Query` duality + +This is the cross-cutting portrait of *all configuration writes* in +the system, not a single IAgent method. It explains where every +config value enters the SDK (startup `Options` field vs runtime +`Query` setter), how the host's IAgent surface routes each kind of +write, and what the reference Claude impl does with the in-flight +request boundary to make non-bijective writes safe. + +The "Startup-only vs runtime mutability" section earlier in this +document +([anchor](#startup-only-vs-runtime-mutability)) +is the short version; M11 is the full mapping. + +#### The hard split + +The Claude SDK exposes configuration on **two surfaces** that look +similar but mean different things: + +| Surface | Shape | When applied | Cost of change | +|---|---|---|---| +| `Options` (passed to `query({ options })`) | Plain object, ~70 fields | At subprocess `startup()` only — baked in for the lifetime of the `Query` | Forces a session restart (close current `Query`, spawn a new one) | +| `Query` runtime methods (`setModel`, `setPermissionMode`, `applyFlagSettings`, `setMcpServers`, `reloadPlugins`, `toggleMcpServer`, `reconnectMcpServer`, `setMaxThinkingTokens`) | Methods on the live `Query` instance | At any time the `Query` is live; serialised through the SDK's control-request channel | Cheap; no restart | + +A handful of concepts are **bijective** — they appear on both +surfaces and are kept in sync by the SDK: + +| Concept | `Options` field | `Query` setter | SDK control request | +|---|---|---|---| +| Active model | `model?: string` | `setModel(model?)` | `SDKControlSetModelRequest` ([sdk.d.ts:2425](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L2425)) | +| Permission mode | `permissionMode?: PermissionMode` | `setPermissionMode(mode)` | `SDKControlSetPermissionModeRequest` ([sdk.d.ts:2433](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L2433)) | +| Effort level | `effort?: EffortLevel` (5 values incl. `'max'`) ([sdk.d.ts:1213](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L1213)) | `applyFlagSettings({ effortLevel })` ([sdk.d.ts:1789](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L1789)) — note: 4-value subset, no `'max'` ([sdk.d.ts:4292](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L4292)) | `SDKControlApplyFlagSettingsRequest` ([sdk.d.ts:2080](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L2080)) | +| Thinking budget | `thinking: { type, budgetTokens? }` / deprecated `maxThinkingTokens` | `setMaxThinkingTokens(n \| null)` (deprecated) | `SDKControlSetMaxThinkingTokensRequest` | +| Dynamic MCP servers | `mcpServers?: Record` | `setMcpServers(servers)` | `SDKControlMcpSetServersRequest` | +| Settings layer ("flag settings") | `settings?: string \| Settings` | `applyFlagSettings(partial)` | `SDKControlApplyFlagSettingsRequest` | + +Everything else on `Options` is **startup-only**: changing it +requires closing the current `Query` and starting a new one with the +new `Options`. This includes `cwd`, `agent`, `agents`, `tools`, +`toolConfig`, `systemPrompt`, `plugins`, `hooks`, `canUseTool`, +`onElicitation`, `mcpServers` (when not subsequently overridden by +`setMcpServers`), `sandbox`, `settingSources`, and the bulk of the +type ([sdk.d.ts:977-1532](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L977-L1532)). + +#### The effort clamp at the seam + +The bijection is *not perfect* for effort. The two surfaces use +different value sets: + +| Surface | Allowed values | +|---|---| +| `Options.effort: EffortLevel` | `'low' \| 'medium' \| 'high' \| 'xhigh' \| 'max'` | +| `applyFlagSettings({ effortLevel })` (typed via `Settings`) | `'low' \| 'medium' \| 'high' \| 'xhigh'` (no `'max'`) | + +The reference extension handles this with a single-line clamp where +the runtime path crosses the seam +([claudeCodeAgent.ts:195-196](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeAgent.ts#L195-L196)): + +```ts +// Settings.effortLevel does not include 'max'; the SDK treats it as a 'high' fallback. +await this._queryGenerator.applyFlagSettings({ effortLevel: effort as 'low' | 'medium' | 'high' | 'xhigh' | undefined }); +``` + +Mapping consequence: *if* the host UI lets the user pick `'max'` +mid-session, the runtime write silently degrades to `'xhigh'` (or +the SDK's `'high'` fallback if `'xhigh'` is also unavailable on the +active model). To get true `'max'` mid-session the agent must take +the **restart-required** path: store the new effort, close the +`Query`, and spawn a new one with `Options.effort = 'max'`. + +#### IAgent surface routes for config writes + +Three write paths reach the SDK config layer: + +| IAgent surface | Carries | SDK destination | Bucket | +|---|---|---|---| +| `IAgentCreateSessionConfig.config: Record` | Provider-resolved schema fields (model, permissionMode, plugins, MCP, ...) | `Options.*` on the very first `query()` call | Startup | +| `IAgent.changeModel(session, model: ModelSelection)` | `ModelSelection { id, config?: Record }` — model id **plus** the model's per-model config bag (e.g. `{ effort: 'high' }`) ([state.ts:232-236](../../common/state/protocol/state.ts#L232-L236)) | `Query.setModel(sdkId)` for `id`, plus `Query.applyFlagSettings({ effortLevel })` (and any other model-specific runtime setter) for entries in `config` | Hot-swap (bijective, atomic per call) | +| `IAgent.setCustomizationEnabled(uri, enabled)` | One customization toggle | `Query.reloadPlugins()` (or restart if tools diverge) | Defer-and-coalesce or restart-required | + +The shape of `ModelSelection.config` is dictated by the model's +`IAgentModelInfo.configSchema` ([agentService.ts:267](../../common/agentService.ts#L267)). +For Claude models, the schema declares an `effort` property whose +allowed values follow the model's supported reasoning-effort set +(see [claudeCodeModels.ts:190-214](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeModels.ts#L190-L214)). +When the user re-picks effort in the chat UI, the workbench rebuilds +`ModelSelection` with the new `config.effort` and calls `changeModel` +— so **`changeModel` is the runtime mutation path for both `id` and +any bijective model-config field**, not just the active model id. + +Note what's **not** on the IAgent surface: there is no +`setPermissionMode`, no `setEffort` (a *standalone* setter), no +`setTools`, no `setMcpServers`, no `setSystemPrompt`. Mid-session +mutation of those values is *not* a protocol-level concern — the +host re-issues `createSession` with the new `config` (effectively +forcing a restart) when they need to change. The only two runtime +mutation surfaces that are first-class on IAgent are **active +model + its bijective config bag** (`changeModel`) and +**customization enablement** (`setCustomizationEnabled`), because +those are the two the user changes most often during a live session. + +The two unexposed-but-still-runtime SDK setters that are *not* +reached through `changeModel`'s config bag +(`Query.setPermissionMode`, generic `Query.applyFlagSettings({...})` +for non-model-bound settings) are *used internally* by the +reference extension, but driven from chat-session settings that +aren't part of the IAgent protocol. When the IAgent protocol grows +the corresponding surface, these methods become the natural +backing primitives. + +#### Hot-swap / defer-and-coalesce / restart-required taxonomy + +This is the reference extension's classification of *every* config +write, driven from the in-flight request boundary in +[`claudeCodeAgent.ts`](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeAgent.ts): + +| Bucket | Examples | When applied | Mechanism in claudeCodeAgent.ts | +|---|---|---|---| +| Hot-swap (cheap, between turns) | `setModel`, `setPermissionMode`, `applyFlagSettings({ effortLevel })` (the latter two reached via `changeModel`'s `ModelSelection.config` for model-bound fields) | Awaited just before the next `SDKUserMessage` is yielded | `_setModel` / `_setPermissionMode` / `_setEffort` (lines 159-198): if `_queryGenerator` exists, `await` the SDK setter; otherwise stash for the next `startup()` | +| Defer-and-coalesce | `reloadPlugins` after `setCustomizationEnabled` | Set a `_pending*` flag while busy; apply at next yield boundary | `_pendingPrompt` deferred (line 133) gates the prompt iterable; flags are drained when it resolves | +| Restart-required | Tool-set diff, settings file change, any startup-only `Options` field | `_pendingRestart = true`, return from iterable, catch-block restarts session | `_pendingRestart` (line 145) flips at the boundary; iterable returns; outer catch closes the `Query` and reopens with new `Options` | + +There is deliberately **no mid-turn mutation path** for any bucket. +Every host write either applies immediately when idle or queues for +the next prompt boundary. This is what makes the prompt iterable +(M1) the central choke point: it is also the only place the agent +honors pending config writes. See M1 for the yield-boundary code. + +##### `changeModel` as a single hot-swap call + +A single `changeModel` invocation can fan out to **multiple** SDK +runtime setters depending on what changed in the `ModelSelection`: + +| `ModelSelection` diff | SDK calls performed at the next yield boundary | +|---|---| +| `id` changed | `Query.setModel(sdkId)` | +| `config.effort` changed | `Query.applyFlagSettings({ effortLevel })` (with the clamp) | +| Both changed | Both setters, in agent-defined order | +| Only model-config changed (same `id`) | The relevant config setters only (no `setModel`) | + +From the protocol's point of view the call is **atomic**: the host +sends one new `ModelSelection`, the agent applies the bundle at the +next safe boundary, and the user observes the new model + new +effort together. The agent is responsible for the diff; there is no +per-field protocol method. + +#### Bijective-concept lifecycle: write resolution order + +For a bijective concept, the *resolved value at any moment* is +determined by a fixed precedence chain: + +1. The most recent **runtime setter** call (if any), e.g. + `Query.setModel('opus-4.7')`. This wins as long as the `Query` + is live. +2. The **`Options` field** the `Query` was started with. This is + the floor — every `Query` starts from `Options` and the runtime + setter only diverges from it. +3. The SDK's **internal default** if neither was set. + +When a `Query` restarts (resume, fork, or restart-required write), +precedence resets — only #2 and #3 apply until the next runtime +setter call. The reference extension preserves continuity by +storing the most-recent runtime values +(`_currentModelId`, `_currentPermissionMode`, `_currentEffort`) +and re-applying them on the new `Query` either via `Options` +(carry-through on restart) or via the runtime setter (post-startup +re-application). + +This is the invariant that makes "restart-required" work without +losing in-progress UI state: the agent restarts the SDK, but the +session-level *config view* survives because the agent maintains +its own bijective state and re-pushes it. + +#### Why this duality is unavoidable + +The duality is not a design flaw. It reflects a real cost +difference at the subprocess boundary: + +- `Options` are read once at subprocess startup, before the SDK + loads plugins, builds the system prompt, opens MCP transports, + and warms the prompt cache. Most fields touch one or more of + those subsystems, and there's no protocol for a live SDK to + rebuild them in place. +- `Query` runtime methods are *control-channel writes* over an + already-running JSON-RPC link + ([SDKControlRequestInner](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L2373) + enumerates the full set). They are cheap because they only flip + flags inside an already-warm subprocess. + +The bijective concepts (model, permission mode, effort, MCP +servers, settings) are the cases where the SDK has done the work +to make in-place mutation safe; everything else stays startup-only +because rebuilding is harder than restarting. + +#### Invariants + +- **A bijective concept's runtime setter is *always* a strict + subset of its `Options` value set, modulo a documented clamp.** + When the runtime setter's typed value range is narrower (effort: + 4 vs 5), the agent must clamp at the seam, *not* at the IAgent + surface — clamping at the IAgent surface would lose the user's + original intent for the next restart. +- **`changeModel` is bundle-atomic.** A single call carries `id` + *and* the model's full config bag. The agent applies the diff as + one or more SDK setters at the same yield boundary; consumers + must not assume "id stable ⇒ effort stable" or vice versa across + a `changeModel` call. +- **Non-bijective writes always restart.** Any write to a + startup-only `Options` field reaches the SDK only through a + fresh `query()` call. Agents must classify every config write as + hot-swap / defer-and-coalesce / restart-required up front and + pick the path; there is no "try runtime, fall back to restart" + pattern. +- **Restart preserves the bijective state.** When the agent + restarts the `Query`, it re-applies its stored bijective values + (model, permission mode, effort, ...) so the user-visible config + stays continuous across the restart. Failing to do so would make + every customization toggle silently revert the active model. +- **The IAgent surface is intentionally narrower than the SDK + surface.** Only `changeModel` and `setCustomizationEnabled` are + first-class runtime mutations. Everything else flows through + `IAgentCreateSessionConfig.config` and a session restart. This + is a protocol choice, not an SDK limitation. +- **The prompt iterable is the only mutation barrier.** All three + buckets (hot-swap, defer, restart) synchronise at the same point: + just before yielding the next `SDKUserMessage`. There is no other + mutation path the SDK exposes that doesn't also entail a turn + boundary; the host should not invent one. +- **Effort `'max'` is genuinely two-tier.** Mid-session it can only + reach the SDK as a startup-only value (a restart). Treating it as + a hot-swap silently demotes to `'xhigh'`. This is the one + user-visible asymmetry in the bijective set and must be surfaced + in the UI if the host accepts `'max'` as a runtime selection. + +### M12 — Catalog and discovery: `getDescriptor`, `models`, `listSessions`, `getSessionMetadata`, `resolveSessionConfig`, `sessionConfigCompletions` + +This is the **read-only** half of the IAgent surface — the cluster +of methods clients call before (or alongside) any session has been +created. They answer four questions the UI needs to render its +catalog: + +1. *Who is this agent?* (`getDescriptor`) +2. *What models can I pick?* (`models` observable) +3. *What sessions exist?* (`listSessions`, `getSessionMetadata`) +4. *What config fields does this agent need to create a session?* (`resolveSessionConfig`, `sessionConfigCompletions`) + +None of these methods take a session URI in a sense that mutates +state; the two with a `session` parameter (`getSessionMetadata`) +read existing on-disk state without changing it. This is the +property that makes M12 a coherent cluster — it is the *catalog* +layer, not the *control* layer. + +#### Surface inventory + +| IAgent surface | Method or field | Optional? | Mapping kind | +|---|---|---|---| +| Provider identity | `getDescriptor(): IAgentDescriptor` ([agentService.ts:468](../../common/agentService.ts#L468)) | required | Synthetic (local literal) | +| Available models | `models: IObservable` ([agentService.ts:471](../../common/agentService.ts#L471)) | required | Direct (one SDK call) + adapter | +| Session catalog | `listSessions(): Promise` ([agentService.ts:474](../../common/agentService.ts#L474)) | required | Direct (one SDK call) + sidecar join | +| Single-session fast path | `getSessionMetadata?(session): Promise` ([agentService.ts:477](../../common/agentService.ts#L477)) | optional | Direct (one SDK call) + sidecar join | +| Creation-time config schema | `resolveSessionConfig(params): Promise` ([agentService.ts:425](../../common/agentService.ts#L425)) | required | Synthetic (provider builds locally) | +| Dynamic enum lookups | `sessionConfigCompletions(params): Promise` ([agentService.ts:428](../../common/agentService.ts#L428)) | required | Synthetic / disk-backed | + +The two "Synthetic" surfaces are the protocol's way of saying: +*the SDK doesn't have an opinion about your config schema; the +provider does.* CopilotAgent and the Claude provider both build +their schemas from local knowledge (git info, model capabilities, +platform-shared properties). + +#### `getDescriptor()` — provider identity + +| | Shape | +|---|---| +| Returns | `IAgentDescriptor { provider, displayName, description }` ([agentService.ts:160-165](../../common/agentService.ts#L160-L165)) | +| CopilotAgent | Hardcoded literal `{ provider: 'copilotcli', displayName: 'Copilot CLI', description: '…' }` ([copilotAgent.ts:256-262](../copilot/copilotAgent.ts#L256-L262)) | +| Claude provider | Hardcoded literal `{ provider: 'claude', displayName: 'Claude', description: '…' }` | + +`AgentProvider` is `type AgentProvider = string` ([agentService.ts:158](../../common/agentService.ts#L158)) +— a plain alias, no nominal brand. The same string serves three +roles: + +1. The dispatch key on `IAgent.id` ([agentService.ts:406](../../common/agentService.ts#L406)). +2. The displayed identity in `IAgentDescriptor.provider` ([agentService.ts:162](../../common/agentService.ts#L162)). +3. The URI scheme for sessions, via `AgentSession.uri(provider, id)` + ([agentService.ts:374-376](../../common/agentService.ts#L374-L376)) + — so Claude sessions live at `claude:/`. + +`IAgent.id` and `IAgentDescriptor.provider` MUST be equal; CopilotAgent +hardcodes both to `'copilotcli'` ([copilotAgent.ts:206, 258](../copilot/copilotAgent.ts#L206)). +Description is a required, non-empty string — there is no fallback. + +#### `models` — observable of available models + +| | Shape | +|---|---| +| Field | `models: IObservable` | +| `IAgentModelInfo` | `{ provider, id, name, supportsVision, maxContextWindow?, configSchema?: ConfigSchema, policyState?, _meta? }` ([agentService.ts:265-274](../../common/agentService.ts#L265-L274)) | +| Claude provider source | **CAPI** via `ICopilotApiService.models(githubToken)` ([copilotApiService.ts:228-281](../shared/copilotApiService.ts#L228-L281)) — *not* `Query.supportedModels()` | +| Result shape | `CCAModel[]` from `@vscode/copilot-api` — carries `vendor`, `supported_endpoints`, `model_picker_enabled`, `model_picker_category`, plus capability metadata | + +**This provider runs against CAPI, not Anthropic.** Claude models +reach the IAgent provider through the same Copilot Chat API the +reference extension uses, not through Anthropic's API. The +Copilot subscription is the auth principal; CAPI proxies the +request to Anthropic's `/v1/messages` endpoint server-side. For +the `models` observable this means the SDK's +`Query.supportedModels()` (and the `models` field on +`initializationResult()`) are **not the right source** — they +would return Anthropic's catalog as if you were calling Anthropic +directly, ignoring CAPI's per-subscription gating, picker +enablement, and billing-multiplier metadata. + +The shared service `ICopilotApiService` already exposes the right +primitive ([copilotApiService.ts:281](../shared/copilotApiService.ts#L281)): + +```ts +models(githubToken: string, options?: ICopilotApiServiceRequestOptions): Promise; +``` + +The filter pattern extends the reference extension's three +surface-check predicates with two additional checks the impl in +[claudeAgent.ts:47-53](claudeAgent.ts#L47-L53) carries to handle the +CAPI catalog's broader contents (encoded by the test fixtures in +[claudeAgent.test.ts:497-512](../../test/node/claudeAgent.test.ts#L497-L512)): + +1. `vendor === 'Anthropic'` — picks Claude models out of the + multi-vendor catalog. +2. `supported_endpoints.includes('/v1/messages')` — keeps only + models that route through the Anthropic-format messages + endpoint the SDK actually talks to. (Claude models surfaced + only on `/chat/completions` are unusable here.) +3. `model_picker_enabled === true` — respects CAPI's gating of + models that should not appear in the picker. +4. `capabilities.supports.tool_calls === true` — matches the + reference extension's surface check at + [claudeCodeModels.ts:154-164](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeModels.ts#L154-L164). + The SDK's tool-use loop assumes tool-call capability; surfacing + a non-tool-capable Claude variant would mislead the user once + Phase 7 tool calls land. +5. `tryParseClaudeModelId(m.id)` returns a defined value — excludes + synthetic CAPI model ids (router-style aliases like `'auto'`, + future non-endpoint ids) that aren't real Claude endpoints. + Without this, a synthetic id could reach `Options.model` and + never resolve to a real subprocess model selection. + +For each surviving `CCAModel`, the provider maps into +`IAgentModelInfo`: + +- `id` ← `CCAModel.id` (the CAPI model id; flows through to + `ModelSelection.id` in M11). +- `name` ← `CCAModel.name` (display). +- `supportsVision` ← from CAPI capability flags. +- `maxContextWindow` ← from CAPI capability flags. +- `configSchema` ← synthesized from `CCAModel.capabilities.supports.thinking` + (or analogous CAPI signal) using the same + `_createThinkingLevelConfigSchema` pattern CopilotAgent uses + ([copilotAgent.ts:457-484](../copilot/copilotAgent.ts#L457-L484)). + The set of effort levels for Claude is documented in M11. +- `policyState` ← from CAPI policy flags. +- `_meta` ← billing multiplier and any other CAPI-specific + side-channel data (matches CopilotAgent's `multiplierNumeric` + pattern). + +The refresh trigger is identical to CopilotAgent: re-call +`ICopilotApiService.models()` whenever the GitHub token changes +(see CopilotAgent's `_refreshModels()` at [copilotAgent.ts:300-317](../copilot/copilotAgent.ts#L300-L317) +and its `authenticate()`-driven invocation at [copilotAgent.ts:295](../copilot/copilotAgent.ts#L295)). +No Claude SDK consultation is needed for the catalog at all — +starting a `Query` purely to list models would be wasteful and +would give the wrong answer. + +**Asymmetry with the reference extension:** the reference Claude +extension uses `IEndpointProvider.getAllChatEndpoints()` and +filters to `modelProvider === 'Anthropic'` +([claudeCodeModels.ts:154-164](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeModels.ts#L154-L164)). +`IEndpointProvider` is a workbench-side abstraction over the same +CAPI catalog. The IAgent Claude provider, sitting in the agent +host process, can't reach `IEndpointProvider` directly — but it +doesn't need to. `ICopilotApiService.models()` is the agent-host +equivalent and returns the same CAPI catalog. The two paths +resolve to the same source of truth; the picker presented to the +user is identical. + +`IAgentModelInfo._meta` ([agentService.ts:273](../../common/agentService.ts#L273)) +is a per-provider side-channel; CopilotAgent uses it for billing +multipliers (`multiplierNumeric`). Since Claude also runs through +CAPI, the same `multiplierNumeric` (and other CAPI metadata) is +available on each `CCAModel` and should flow through verbatim. + +#### `listSessions()` — session catalog + +| | Shape | +|---|---| +| Returns | `IAgentSessionMetadata[]` ([agentService.ts:100-124](../../common/agentService.ts#L100-L124)) | +| Required fields | `session: URI`, `startTime: number`, `modifiedTime: number` | +| Optional fields | `project`, `summary`, `status`, `activity`, `model`, `workingDirectory`, `customizationDirectory`, `isRead`, `isArchived`, `diffs`, `_meta` | +| Claude SDK source | **Top-level** `listSessions(options?): Promise` ([sdk.d.ts:729](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L729)) — *not* a `Query` method | +| `SDKSessionInfo` shape | `{ sessionId, summary, lastModified, customTitle?, firstPrompt?, gitBranch?, cwd?, tag?, createdAt }` ([sdk.d.ts:2782-2825](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L2782-L2825)) | + +The SDK reads `~/.claude/projects/**/*.jsonl` under the hood; the +provider does **not** scan disk itself. The reference extension +confirms this — every catalog call is a one-line forwarding wrapper +in `ClaudeCodeSdkService` ([claudeCodeSdkService.ts:78-117](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeSdkService.ts#L78-L117)), +and `getAllSessions(token)` ([claudeCodeSessionService.ts:75-110](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/sessionParser/claudeCodeSessionService.ts#L75-L110)) +calls `_sdkService.listSessions()` directly with no fallback to +JSONL parsing for the catalog. (Raw JSONL parsing exists but is +restricted to subagent transcripts the SDK doesn't expose; see +[sessionParser/README.md](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/sessionParser/README.md).) + +##### CopilotAgent's `listSessions` shape + +CopilotAgent ([copilotAgent.ts:525-558](../copilot/copilotAgent.ts#L525-L558)) +joins three sources: + +1. SDK `client.listSessions()` for the canonical session list. +2. Per-session host-side **sidecar metadata** (`_readStoredSessionMetadata`) + for fields the SDK doesn't carry — model, customization + directory, working directory. +3. A `Limiter<>(4)` to bound parallel project-info resolution. + +Two filters apply to the result: + +- **No-sidecar filter (CopilotAgent only).** Sessions without sidecar + metadata are *dropped* — so Copilot's `listSessions` returns only + sessions this host has seen before. Sessions created on another + machine or in another VS Code install are invisible until they've + been re-opened through this host. **Claude does NOT inherit this + filter** — the Claude SDK's session list includes external + Claude-CLI-created sessions that have no host-side sidecar but + must still surface (Phase-5 exit criterion). Claude treats the + sidecar as a best-effort enrichment overlay; missing sidecar + fields fall back to whatever the SDK supplies + ([claudeAgent.ts:761-797](claudeAgent.ts#L761-L797)). +- **Provisional sessions are not included** ([copilotAgent.ts:736-742](../copilot/copilotAgent.ts#L736-L742)). + They have no SDK session yet, so `client.listSessions()` doesn't + know about them and there's no sidecar until materialization + (M9). + +##### Claude's `listSessions` differences from CopilotAgent + +The Claude provider should mirror CopilotAgent's join pattern, but +the two SDKs disagree on which fields they carry: + +| Field | CopilotAgent source | Claude source | +|---|---|---| +| `summary` | SDK | SDK (`summary` or `firstPrompt`) | +| `startTime` | SDK | SDK (`createdAt`) | +| `modifiedTime` | SDK | SDK (`lastModified`) | +| `workingDirectory` | sidecar | SDK (`cwd`) — sidecar redundant | +| `model` | sidecar | sidecar (SDK doesn't carry it) | +| `project` | resolved from `cwd` | resolved from `cwd` | +| `customizationDirectory` | sidecar | sidecar | +| `_meta.git` | not populated by `listSessions` | not populated by `listSessions` | +| `isArchived` | host-side archive store, not from SDK | host-side archive store, not from SDK | +| `status` | not populated by `listSessions` | not populated by `listSessions` | + +Archive state and live status are *not* part of the catalog mapping +in either provider — they live in higher layers (the host-side +archive store, the live-session tracker) and are stitched in by +the agent service before the result reaches the client. + +#### `getSessionMetadata?(session)` — single-session fast path + +| | Shape | +|---|---| +| Returns | `Promise` | +| Marker | Optional method (`?`) | +| Claude SDK source | **Top-level** `getSessionInfo(sessionId, options?): Promise` ([sdk.d.ts:581](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L581)) | + +The SDK note on `getSessionInfo` is precise: it *only reads the +single session file rather than every session in the project*. So +this is genuinely a single-file probe, not a filtered enumerate — +the right primitive for "show this session in a list of links" or +"check if this session still exists." + +CopilotAgent implements it ([copilotAgent.ts:560-591](../copilot/copilotAgent.ts#L560-L591)) +via `client.getSessionMetadata(sessionId)` joined with the same +sidecar read as `listSessions`. Returns `undefined` if either is +missing. + +The Claude provider should also implement it (the SDK has the +matching primitive) for parity with CopilotAgent. Callers must +still null-check because the method is optional on the interface. + +#### `resolveSessionConfig(params)` — creation-time config schema + +| | Shape | +|---|---| +| Input | `IAgentResolveSessionConfigParams { provider?, workingDirectory?, config? }` ([agentService.ts:237-241](../../common/agentService.ts#L237-L241)) | +| Returns | `ResolveSessionConfigResult { schema: SessionConfigSchema, values: Record }` ([commands.ts:865-870](../../common/state/protocol/commands.ts#L865-L870)) | +| Property type | `SessionConfigPropertySchema` ([state.ts:494-504](../../common/state/protocol/state.ts#L494-L504)) — extends `ConfigPropertySchema` with `enumDynamic?` and `sessionMutable?` | +| Claude SDK source | None — fully synthetic | + +There is no SDK call here. Both providers build the schema +locally from host knowledge. CopilotAgent's resolver +([copilotAgent.ts:819-877](../copilot/copilotAgent.ts#L819-L877)) +follows a fixed sequence: + +1. Probe git state via `IAgentHostGitService` for `defaultBranch / + currentBranch` (so the schema can include a branch picker only + when the cwd is a repo). +2. Build the `isolation` enum (`folder | worktree`, with + `worktree` gated on git presence; default `worktree` in repos). +3. Resolve the *current* `isolationValue` from `params.config` or + the default. +4. Conditionally add a `branch` property when `gitInfo` is present; + set `enumDynamic: true` and seed `enum: [branchDefault]`. +5. Merge with `platformSessionSchema.definition` for platform-wide + properties (`autoApprove`, `mode`, …). +6. Run `sessionSchema.validateOrDefault(params.config, defaults)` + to produce `values`. + +The Claude provider can mirror this skeleton. Provider-specific +properties to consider: + +- **`permissionMode`** — six values + (`'default' | 'acceptEdits' | 'bypassPermissions' | 'plan' | + 'dontAsk' | 'auto'`); a static enum, no `enumDynamic`. Mark + `sessionMutable: true` (M11 hot-swap; `Query.setPermissionMode()` + is bijective). The full set matches the SDK's `PermissionMode` + type at [sdk.d.ts:1560](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L1560). + `'auto'` is the model-classifier-driven approval mode the SDK + exposes; surfacing it on the IAgent enum means the client UI + can pick it without a future schema bump. +- **`isolation` / `branch`** — share with CopilotAgent's pattern; + branch picker uses `enumDynamic` when isolation is `worktree`. +- **`outputStyle`** — `Query.initializationResult().available_output_styles` + exposes the list as a *composed* source if the provider chooses + to surface it. +- **Platform-shared properties** — the same `platformSessionSchema.definition` + CopilotAgent merges in. + +##### Same schema serves creation *and* post-creation display + +The `resolveSessionConfig` schema is the **single source of truth** +read surface for both the create-session form and the live-session +settings UI. The two phases differ only in what the caller passes +as `params.config`: + +1. At create time, the form passes the user's in-progress values; + defaults come from `schema.properties[k].default`, and resolved + values come back as `values[k]`. Submitting the form invokes + `createSession({ config: values })` — + [`IAgentCreateSessionConfig.config`](../../common/agentService.ts#L211) + is the creation-time write bag. +2. After creation, the live-session settings UI re-fetches the + same schema by calling `resolveSessionConfig` with + `params.config` reflecting the session's *current* values; only + properties marked `sessionMutable: true` are interactive. + +The write path for a runtime change is *not* `resolveSessionConfig` +itself — that method is read-only. The protocol surface for +routing an arbitrary live config edit back into the running +session is **TBD** — there is no generic `setSessionConfigValues` +on `IAgent` today. The two existing first-class runtime paths, +`changeModel` (atomic id + model-config diff) and +`setCustomizationEnabled` (per-customization toggle), cover the +two cases that have shipped; everything else (including +`permissionMode`) currently has to round-trip as a fresh +`createSession` with the new bag (a restart per M11) until a +generic live-edit method lands. + +The forward-looking flow, once that protocol method exists, is: + +1. Client calls `resolveSessionConfig(params)` to get the schema + + current values for the running session. +2. User edits a `sessionMutable: true` property in the settings UI + (e.g. flips `permissionMode` from `'plan'` to `'default'`). +3. Client routes the edit through the future generic setter + (working name: `setSessionConfigValues(session, values)`) — the + bijective post-creation conduit that M11's hot-swap taxonomy + already classifies on the implementation side. +4. The provider's implementation calls the matching M11 hot-swap + routine (`Query.setPermissionMode()` for `permissionMode`, + `Query.setModel()` plus effort apply for `model` + `effort`, + etc.). +5. The SDK acknowledges, the host re-fetches values, the next + `resolveSessionConfig` call observes the new state. + +So `permissionMode` (and any other M11 hot-swappable property) +lives in **two** IAgent surfaces: the schema declares it as +`sessionMutable: true` (this method), and — once the protocol +surface lands — the runtime mutation flows through the generic +live-edit setter (M11). They MUST agree: if the schema says a +property is mutable but no M11 setter exists, the edit will +round-trip as a no-op or fail. M11's restart-required bucket +(`cwd`, `executable`, `addDirectories`, etc.) MUST NOT be marked +`sessionMutable: true` here. + +The reference Claude extension has no equivalent of +`resolveSessionConfig` (the chat-session UI has no creation form; +sessions inherit settings from `vscode.workspace.getConfiguration` +and `~/.claude/settings.json`). The IAgent Claude provider is the +first place this schema gets explicitly assembled for Claude. + +#### `sessionConfigCompletions(params)` — dynamic enum lookups + +| | Shape | +|---|---| +| Input | `IAgentSessionConfigCompletionsParams extends IAgentResolveSessionConfigParams { property: string, query? }` ([agentService.ts:243-246](../../common/agentService.ts#L243-L246)) | +| Returns | `SessionConfigCompletionsResult { items: SessionConfigValueItem[] }` ([commands.ts:933-936](../../common/state/protocol/commands.ts#L933-L936)) | +| Item shape | `{ value: string, label: string, description? }` ([commands.ts:879-886](../../common/state/protocol/commands.ts#L879-L886)) | +| Claude SDK source | None — synthetic / disk-backed | + +**Out of scope for the Claude provider, at least for the initial +landing.** This method only matters when the schema marks a +property `enumDynamic: true`, signalling that the seed `enum` is +incomplete and the real list must be fetched on user input. +CopilotAgent ([copilotAgent.ts:880-887](../copilot/copilotAgent.ts#L880-L887)) +uses it for one property — `branch` — backed by an +`IAgentHostGitService` shell-out and capped at +`_BRANCH_COMPLETION_LIMIT = 25` ([copilotAgent.ts:217](../copilot/copilotAgent.ts#L217)). + +The Claude provider's anticipated schema (Claude-specific +`permissionMode`, `outputStyle`, plus the platform-shared +properties) has **no `enumDynamic` properties on the Claude side** +— `permissionMode` is a static six-value enum, `outputStyle` is a +static list from `Query.initializationResult().available_output_styles`, +and the rest are booleans / fixed enums. Branch and isolation come +from the platform-shared schema, which the agent service handles +uniformly above the provider. + +The practical consequence: the Claude provider's implementation is +likely a one-liner returning `{ items: [] }` until a Claude-specific +dynamic enum surfaces. The interface requires the method to exist, +but a no-op satisfies it. If a future schema property does need +dynamic completions, the provider can opt in property-by-property +(matching CopilotAgent's `if (property === 'branch')` shape) — +branch completions, if they apply, reuse the same +`IAgentHostGitService` shell-out with no Claude-specific code +path. + +#### Schema vs values: the duality + +`ResolveSessionConfigResult` returns *both* a schema and a values +object, and the JSDoc framing is precise: + +> *schema*: JSON Schema describing available configuration +> properties given the current context. +> +> *values*: Current configuration values (echoed back with +> server-resolved defaults applied). + +These are not redundant. They encode different kinds of knowledge: + +| Field | Encodes | Owner | +|---|---|---| +| `schema.properties[k].default` | Display-time default — what the form renders if the field is empty | Static (provider's schema-build code) | +| `values[k]` | Resolved current value the server will actually use *right now* | Dynamic (`validateOrDefault(params.config, defaults)`) | + +The duality matters in three places: + +1. **Cross-property resolution.** `values` can encode resolutions + the schema can't. CopilotAgent's branch resolves to + `currentBranch` for `folder` isolation but `defaultBranch` for + `worktree` ([copilotAgent.ts:833](../copilot/copilotAgent.ts#L833)) + — the schema's `default` shows one value, `values` reflects the + live cross-property resolution. +2. **Validation drift.** `validateOrDefault` strips fields that + don't validate against the current schema. So a value the user + supplied but is no longer valid (because some other property + changed) is silently dropped from `values` and re-derived from + defaults. +3. **Intentional gaps.** A property may be in `schema` but not in + `values` — e.g. CopilotAgent omits a `permissions` slot from + `values` so auto-approval falls through ([copilotAgent.ts:867-870](../copilot/copilotAgent.ts#L867-L870)). + Clients must treat missing keys in `values` as "no resolved + value yet," not "value is `undefined`." + +`params.config` does **not** round-trip verbatim — it feeds +`validateOrDefault`, which (a) keeps user-supplied values that +validate, (b) replaces invalid values with defaults, (c) fills +missing keys from defaults. The server is the canonical resolver. + +#### `enumDynamic` and `sessionMutable`: the two session-only flags + +The two extensions to `ConfigPropertySchema` that make +`SessionConfigPropertySchema` distinct ([state.ts:494-504](../../common/state/protocol/state.ts#L494-L504)): + +| Flag | Meaning | Client behaviour | +|---|---|---| +| `enumDynamic?: boolean` | The full set of allowed values is too large to enumerate; `enum` carries seed/recent values only | Call `sessionConfigCompletions(property, query)` on user input | +| `sessionMutable?: boolean` | The user may change this property *after* session creation | Show in post-creation settings UI; otherwise the property is creation-time only | + +`enumDynamic` is opt-in — providers that statically enumerate +(full enum, no flag) are valid. `sessionMutable` is the +creation-vs-mutation toggle: properties without it become +read-only after `createSession`. For Claude, `permissionMode` is a +clean candidate for `sessionMutable: true` because it sits in M11's +hot-swap bucket (`Query.setPermissionMode()` is bijective); +`isolation` and `cwd` are not (no SDK setter; they pin the +subprocess) so they stay creation-time. + +#### Asymmetries between Copilot and Claude + +| Surface | CopilotAgent | Claude provider | +|---|---|---| +| `getDescriptor` | Synthetic literal | Synthetic literal | +| `models` | CAPI via `client.listModels()` (CopilotClient wraps CAPI) | **CAPI** via `ICopilotApiService.models(githubToken)` filtered to `vendor === 'Anthropic'` ∩ `supported_endpoints ∋ '/v1/messages'` ∩ `model_picker_enabled === true` — *not* `Query.supportedModels()` | +| `listSessions` | SDK `client.listSessions()` joined with sidecar; **drops sessions without sidecar** | SDK top-level `listSessions(options?)` joined with sidecar; **does NOT drop** — sidecar is a best-effort enrichment overlay so external Claude-CLI sessions surface unconditionally | +| `getSessionMetadata` | SDK `client.getSessionMetadata(id)` joined with sidecar | SDK top-level `getSessionInfo(id, options?)` joined with sidecar | +| `resolveSessionConfig` | Synthetic; git probe + `platformSessionSchema` merge | Synthetic; same skeleton + Claude-specific (`permissionMode`, `outputStyle`, …) | +| `sessionConfigCompletions` | Branch picker via `IAgentHostGitService` | Likely a no-op (`{ items: [] }`) on initial landing — no Claude-specific `enumDynamic` properties | + +The catalog cluster is the surface where the two SDKs are *most +symmetric* — both expose the same shapes with cosmetic naming +differences, and both providers reduce them through the same +sidecar-join + synthetic-schema pattern. The "every catalog call is +a one-line wrapper" property of the reference Claude extension +([claudeCodeSdkService.ts:78-117](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeSdkService.ts#L78-L117)) +is a strong signal that the IAgent provider's catalog code can be +trivially thin. The one notable divergence is `models`: both +providers run against CAPI, but Claude reaches it through the +shared `ICopilotApiService` rather than a vendor-specific SDK +client. + +#### Invariants + +- **`IAgent.id` ≡ `IAgentDescriptor.provider` ≡ session URI scheme.** + All three carry the same string. Diverging them would route + catalog reads, session URIs, and dispatch through inconsistent + keys. +- **`listSessions` returns only host-known sessions.** Sessions + without sidecar metadata are filtered out. The catalog is *not* + a faithful mirror of the SDK's on-disk session list; it is the + intersection of "SDK knows about it" and "this host has seen it." +- **Provisional sessions never appear in `listSessions`.** They + have no SDK session yet, so the SDK's catalog call doesn't + surface them. The host's `sessionAdded` deferred-notification + pattern (M9) is what makes this safe — clients see materialized + sessions only. +- **`isArchived` and `status` are stitched in above the agent.** + Neither field comes from the SDK; the agent service joins them + from host-side stores before the result reaches the client. + Providers must not invent these fields locally. +- **`models` is sourced from CAPI, not from the Claude SDK.** + `Query.supportedModels()` would return Anthropic's catalog + ignoring CAPI gating; the provider MUST go through + `ICopilotApiService.models()` and apply the three-predicate + filter (`vendor === 'Anthropic'`, `supported_endpoints ∋ + '/v1/messages'`, `model_picker_enabled === true`) to match the + reference extension's picker. +- **`getSessionMetadata?` is a parity surface.** Where the SDK + has a single-file probe (`getSessionInfo` on Claude, + `getSessionMetadata` on Copilot), the provider should implement + this method. Providers that lack the primitive omit the method + rather than fall back to filtering `listSessions()`. +- **`resolveSessionConfig` is synthetic and provider-owned.** No + SDK consultation. The provider holds the schema; the host + contributes only `IAgentHostGitService` and platform-shared + properties via `platformSessionSchema.definition`. +- **`values` is server-canonical.** Clients must not assume + `params.config` round-trips verbatim. Round-tripping happens + only when `validateOrDefault` finds every input field valid + against the current schema; any drift is silently corrected. +- **`enumDynamic` does not imply non-empty `enum`.** Providers may + return an empty `enum` with `enumDynamic: true` if no seed + values are warranted. Clients must call + `sessionConfigCompletions` rather than rendering the seed list + as authoritative. +- **`sessionMutable` is the only post-creation mutability signal in + the schema.** A property without it is creation-time only, + regardless of whether the underlying SDK setter exists. The + schema is the contract the client renders; M11's hot-swap + taxonomy is the *implementation's* answer to runtime mutation. + These layers must agree: `sessionMutable: true` on a property + whose SDK write requires a restart would mislead the client. +- **`resolveSessionConfig` is the schema source for runtime + mutations too.** The settings UI for a running session re-reads + this method to render `sessionMutable: true` properties; the + edit then routes through the future generic live-edit setter + (working name: `setSessionConfigValues`; protocol surface TBD — + no method on `IAgent` today). The two paths that have shipped + — `changeModel` and `setCustomizationEnabled` — carry the + matching M11 hot-swap routines for those specific properties; a + property that is mutable in the schema MUST have a matching M11 + hot-swap path, and a property that has no M11 path MUST NOT be + marked `sessionMutable: true`. Until the generic setter lands, + any `sessionMutable: true` property whose edit isn't covered by + `changeModel`/`setCustomizationEnabled` is implicitly + restart-required: clients must round-trip the change as a fresh + `createSession`. +- **`sessionConfigCompletions` is required-but-may-be-empty.** The + Claude provider's expected initial implementation is a no-op + returning `{ items: [] }` because none of its anticipated + schema properties carry `enumDynamic: true`. Implementing it as + a no-op is correct and idiomatic; the method exists to back + `enumDynamic` properties when (and only when) they appear. + +### M13 — Authentication: `authenticate`, `getProtectedResources` + +Auth is **mostly abstracted away from this mapping exercise**. The +Claude SDK ships a substantial OAuth surface — phantom +`SDKControlClaude*OAuth*` types in the +`SDKControlRequestInner` union ([sdk.d.ts:2373](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L2373)), +`accountInfo()`, `SDKAuthStatusMessage`, `'authentication_failed'` +error reason — but the host pre-empts all of it via a localhost +proxy. The SDK believes it is talking to Anthropic; we never let +it find out otherwise. + +#### The only mapping that matters + +`authenticate(resource, token)` accepts a GitHub OAuth token. That +token is fed into `IClaudeProxyService.start(token)` to obtain an +`IClaudeProxyHandle { baseUrl, nonce }`. The handle's two fields +become two entries in `Options.settings.env`: + +```ts +// claudeAgent.ts:429-434 +const settingsEnv: Record = { + ANTHROPIC_BASE_URL: proxyHandle.baseUrl, + ANTHROPIC_AUTH_TOKEN: `${proxyHandle.nonce}.${sessionId}`, + // ... +}; +``` + +That's the whole mapping. `ANTHROPIC_BASE_URL` points the SDK at +the local proxy; `ANTHROPIC_AUTH_TOKEN` is the per-session bearer +the proxy validates ([claudeProxyAuth.ts:39-60](claudeProxyAuth.ts#L39-L60)). +Outbound CAPI calls from the proxy use the GitHub token captured +at `start(token)` time. Everything downstream — token minting, +401/403 handling, model listing — is `ICopilotApiService`'s job +(M12), not the agent's. + +#### IAgent surface — the bare minimum + +| Surface | What the provider does | +|---|---| +| `getProtectedResources()` ([agentService.ts:480](../../common/agentService.ts#L480)) | Return `[GITHUB_COPILOT_PROTECTED_RESOURCE]` ([agentService.ts:200-206](../../common/agentService.ts#L200-L206)). Synchronous, hardcoded, identical to CopilotAgent. | +| `authenticate(resource, token)` ([agentService.ts:502-506](../../common/agentService.ts#L502-L506)) | Reject unknown `resource` with `false`. On a new GitHub token, call `_claudeProxyService.start(token)`, swap `_proxyHandle`, dispose the old handle. Cited at [claudeAgent.ts:238-265](claudeAgent.ts#L238-L265). | +| `AHP_AUTH_REQUIRED` throw | Any session lifecycle method that runs before `authenticate()` has landed must throw `ProtocolError(AHP_AUTH_REQUIRED, msg, this.getProtectedResources())`. CopilotAgent does this at [copilotAgent.ts:382-385](../copilot/copilotAgent.ts#L382-L385); ClaudeAgent currently throws a plain `Error` at [claudeAgent.ts:413-416](claudeAgent.ts#L413-L416) — to be corrected in Phase 6.1 Cycle B (see [phase6.1-plan.md](phase6.1-plan.md)). | + +#### Why ClaudeAgent's `authenticate` ordering differs from CopilotAgent + +CopilotAgent commits `_githubToken` first, then runs `_stopClient()` +and `_refreshModels()` — both local, infallible side effects. +ClaudeAgent's side effect is `_claudeProxyService.start(token)`, +which can fail (port bind, network probe). So the Claude path +acquires the new handle *first*, only commits `_githubToken` and +`_proxyHandle` after `start()` resolves, then disposes the old +handle. This keeps retry semantics correct: a failed `start()` +leaves token state untouched, so the next `authenticate()` call +sees the same token as still-new and retries instead of short- +circuiting on "unchanged." + +That's the only structural divergence worth recording in this +exercise. + +#### Invariants + +- **Resource id MUST match exactly.** `authenticate()` MUST + early-out with `false` for unknown `resource` strings; the agent + service relies on the `false` return to OR-collapse provider + responses. +- **Token state and side-effect state MUST commit together, in an + order that lets retry succeed.** Acquire fallible side-effect + first, commit fields together after success, release the previous + side-effect last. +- **The SDK's OAuth surface MUST stay dark.** The proxy + substitution is the contract: `ANTHROPIC_BASE_URL` + + `ANTHROPIC_AUTH_TOKEN` cover everything; `accountInfo()`, + `SDKAuthStatusMessage`, and the phantom `SDKControlClaude*OAuth*` + control types are not surfaced to clients. +- **`ANTHROPIC_API_KEY` MUST be scrubbed on both sides.** Subprocess + env strips it ([claudeAgent.ts:533](claudeAgent.ts#L533)); the + proxy refuses inbound `x-api-key` ([claudeProxyAuth.ts:30-32](claudeProxyAuth.ts#L30-L32)). + Either alone leaves a bypass. +- **`AHP_AUTH_REQUIRED` MUST carry the resource manifest in `data`.** + Shape is `AuthRequiredErrorData { resources: ProtectedResourceMetadata[] }` + ([errors.ts:107-110](../../common/state/protocol/errors.ts#L107-L110)). + Plain `Error` throws break the client's auth UI driver. + +### Open mapping questions + + +Things this catalogue has not yet fully resolved. Captured here so +they aren't lost; not blockers to extending the catalogue further. + +- **Tail-Turn state heuristic on replay.** When the JSONL ends with + an open `tool_use` (no matching `tool_result`), the protocol Turn + state is genuinely ambiguous. Need a documented rule. +- **`SDKUserMessageReplay` semantics.** The SDK distinguishes + `SDKUserMessage` (live input) from `SDKUserMessageReplay` (echoed + on resume) at the type level. The mapper currently treats them + identically on the live path; verify that's correct under all + resume paths. +- **System-message allowlist evolution.** The list above is + conservative. As the agent host gains UI for hook progress, plugin + install, rate limits, etc., some currently-dropped subtypes may + promote to `SystemNotification` parts. Track decisions by subtype. diff --git a/src/vs/platform/agentHost/node/claude/claudeAgent.ts b/src/vs/platform/agentHost/node/claude/claudeAgent.ts index 3eee2d159c1ff..55f04f674c83f 100644 --- a/src/vs/platform/agentHost/node/claude/claudeAgent.ts +++ b/src/vs/platform/agentHost/node/claude/claudeAgent.ts @@ -19,11 +19,13 @@ import { ILogService } from '../../../log/common/log.js'; import { ISyncedCustomization } from '../../common/agentPluginManager.js'; import { createSchema, platformSessionSchema, schemaProperty } from '../../common/agentHostSchema.js'; import { ClaudePermissionMode, ClaudeSessionConfigKey } from '../../common/claudeSessionConfigKeys.js'; +import { createClaudeThinkingLevelSchema, isClaudeEffortLevel, resolveClaudeEffort } from '../../common/claudeModelConfig.js'; import { SessionConfigKey } from '../../common/sessionConfigKeys.js'; import { AgentProvider, AgentSession, AgentSignal, GITHUB_COPILOT_PROTECTED_RESOURCE, IAgent, IAgentCreateSessionConfig, IAgentCreateSessionResult, IAgentDescriptor, IAgentMaterializeSessionEvent, IAgentModelInfo, IAgentResolveSessionConfigParams, IAgentSessionConfigCompletionsParams, IAgentSessionMetadata, IAgentSessionProjectInfo } from '../../common/agentService.js'; import { ISessionDataService } from '../../common/sessionDataService.js'; import type { ResolveSessionConfigResult, SessionConfigCompletionsResult } from '../../common/state/protocol/commands.js'; -import { ProtectedResourceMetadata, type ModelSelection, type ToolDefinition } from '../../common/state/protocol/state.js'; +import { AHP_AUTH_REQUIRED, ProtocolError } from '../../common/state/sessionProtocol.js'; +import { PolicyState, ProtectedResourceMetadata, type ModelSelection, type ToolDefinition } from '../../common/state/protocol/state.js'; import { CustomizationRef, SessionInputResponseKind, type MessageAttachment, type SessionInputAnswer, type ToolCallResult, type Turn } from '../../common/state/sessionState.js'; import { IAgentHostGitService } from '../agentHostGitService.js'; import { projectFromCopilotContext } from '../copilot/copilotGitProject.js'; @@ -53,6 +55,20 @@ function isClaudeModel(m: CCAModel): boolean { ); } +/** + * Augments the published `@vscode/copilot-api` `CCAModelSupports` with the + * per-model `adaptive_thinking` / `reasoning_effort` fields the runtime + * CAPI `/models` payload already carries but the SDK type doesn't yet + * declare. Tracked at microsoft/vscode-capi#85; remove this when the SDK + * catches up. Mirror of the same pattern at + * `extensions/copilot/src/platform/endpoint/common/endpointProvider.ts` + * (its locally-declared `IChatModelCapabilities`). + */ +interface IClaudeModelSupports { + readonly adaptive_thinking?: boolean; + readonly reasoning_effort?: readonly string[]; +} + /** * Project a {@link CCAModel} into the agent host's * {@link IAgentModelInfo} surface. The returned `provider` is the @@ -61,12 +77,20 @@ function isClaudeModel(m: CCAModel): boolean { * upstream `vendor: 'Anthropic'` field. */ function toAgentModelInfo(m: CCAModel, provider: AgentProvider): IAgentModelInfo { + const supports = m.capabilities?.supports; + const supportedEfforts = ((supports as IClaudeModelSupports | undefined)?.reasoning_effort ?? []).filter(isClaudeEffortLevel); + const configSchema = createClaudeThinkingLevelSchema(supportedEfforts); + const policyState = m.policy?.state as PolicyState | undefined; + const multiplier = m.billing?.multiplier; return { provider, id: m.id, name: m.name, maxContextWindow: m.capabilities?.limits?.max_context_window_tokens, - supportsVision: !!m.capabilities?.supports?.vision, + supportsVision: !!supports?.vision, + ...(configSchema ? { configSchema } : {}), + ...(policyState ? { policyState } : {}), + ...(typeof multiplier === 'number' ? { _meta: { multiplierNumeric: multiplier } } : {}), }; } @@ -89,6 +113,13 @@ function toAgentModelInfo(m: CCAModel, provider: AgentProvider): IAgentModelInfo * - `project`: the resolved {@link IAgentSessionProjectInfo} (if any), * computed once at create time so duplicate `createSession` calls * for the same URI return identical project metadata. + * - `model` / `config`: the `IAgentCreateSessionConfig.model` and + * `IAgentCreateSessionConfig.config` bag from `createSession`. + * Carried verbatim through to materialize so the first `query()`'s + * `Options.*` reflect the user's choices instead of SDK defaults + * (M11 / Phase 6.1 C2). The bag is `Record` because + * schema validation already happened at `resolveSessionConfig`; this + * is the post-validation runtime payload. */ interface IClaudeProvisionalSession { readonly sessionId: string; @@ -96,6 +127,8 @@ interface IClaudeProvisionalSession { readonly workingDirectory: URI | undefined; readonly abortController: AbortController; readonly project: IAgentSessionProjectInfo | undefined; + readonly model: ModelSelection | undefined; + readonly config: Record | undefined; } /** @@ -209,6 +242,8 @@ export class ClaudeAgent extends Disposable implements IAgent { * writes it on first turn and fork's `vacuumInto` carries it forward. */ private static readonly _META_CUSTOMIZATION_DIRECTORY = 'claude.customizationDirectory'; + private static readonly _META_MODEL = 'claude.model'; + private static readonly _META_PERMISSION_MODE = 'claude.permissionMode'; constructor( @ILogService private readonly _logService: ILogService, @@ -235,6 +270,18 @@ export class ClaudeAgent extends Disposable implements IAgent { return [GITHUB_COPILOT_PROTECTED_RESOURCE]; } + private _ensureAuthenticated(): IClaudeProxyHandle { + const handle = this._proxyHandle; + if (!handle) { + throw new ProtocolError( + AHP_AUTH_REQUIRED, + 'Authentication is required to use Claude', + this.getProtectedResources(), + ); + } + return handle; + } + async authenticate(resource: string, token: string): Promise { if (resource !== GITHUB_COPILOT_PROTECTED_RESOURCE.resource) { return false; @@ -280,7 +327,16 @@ export class ClaudeAgent extends Disposable implements IAgent { if (this._githubToken !== tokenAtStart) { return; } - const filtered = all.filter(isClaudeModel).map(m => toAgentModelInfo(m, this.id)); + // Stable sort surfaces the CAPI-flagged chat-default model + // first. The picker treats `models[0]` as the de facto + // default (modelPicker.ts:144 — `_selectedModel ?? models[0]`) + // since `IAgentModelInfo` carries no explicit `isDefault` + // bit. Stable comparator returns 0 for equal-priority models + // so CAPI's ordering wins on ties. + const filtered = all + .filter(isClaudeModel) + .sort((a, b) => Number(b.is_chat_default) - Number(a.is_chat_default)) + .map(m => toAgentModelInfo(m, this.id)); this._models.set(filtered, undefined); } catch (err) { this._logService.error(err, '[Claude] Failed to refresh models'); @@ -295,6 +351,7 @@ export class ClaudeAgent extends Disposable implements IAgent { // #region Stubs — implemented in later phases async createSession(config: IAgentCreateSessionConfig = {}): Promise { + this._ensureAuthenticated(); if (config.fork) { // Fork moved to Phase 6.5: requires translating // `config.fork.turnId` (a protocol turn ID) to an SDK message UUID @@ -353,6 +410,8 @@ export class ClaudeAgent extends Disposable implements IAgent { workingDirectory: config.workingDirectory, abortController: new AbortController(), project, + model: config.model, + config: config.config, }); return { @@ -410,10 +469,7 @@ export class ClaudeAgent extends Disposable implements IAgent { if (!provisional.workingDirectory) { throw new Error(`Cannot materialize Claude session ${sessionId}: workingDirectory is required`); } - const proxyHandle = this._proxyHandle; - if (!proxyHandle) { - throw new Error('Claude proxy is not running; agent must be authenticated first'); - } + const proxyHandle = this._ensureAuthenticated(); const subprocessEnv = this._buildSubprocessEnv(); // Settings env: forwarded to the Claude subprocess via the SDK's @@ -446,7 +502,21 @@ export class ClaudeAgent extends Disposable implements IAgent { disallowedTools: ['WebSearch'], includeHookEvents: true, includePartialMessages: true, - permissionMode: 'default', + // M11 / Phase 6.1 C2 + I2: surface the user's createSession choices + // to the SDK. `Options.permissionMode` accepts the SDK's six-value + // `PermissionMode` union (sdk.d.ts:1560); our schema mirrors it, + // so the validated string flows through with no translation. + // + // The latest model lives on the provisional record (kept in + // sync via `changeModel` once Phase 9 ships). The latest + // session config bag lives there too — no sidecar re-read + // here because the in-memory record is already authoritative + // for the create-time → first-send window. Mirrors + // CopilotAgent's pattern at `copilotAgent.ts:777` where + // `provisional.model` is the source of truth at materialize. + model: provisional.model?.id, + effort: resolveClaudeEffort(provisional.model), + permissionMode: this._resolvePermissionMode(provisional.config), sessionId, settingSources: ['user', 'project', 'local'], settings: { env: settingsEnv }, @@ -476,7 +546,11 @@ export class ClaudeAgent extends Disposable implements IAgent { // Persist customization-directory metadata BEFORE firing the // materialize event — see plan section 3.4 ordering rationale. try { - await this._writeCustomizationDirectory(provisional.sessionUri, provisional.workingDirectory); + await this._writeSessionMetadata(provisional.sessionUri, { + customizationDirectory: provisional.workingDirectory, + model: provisional.model, + permissionMode: this._resolvePermissionMode(provisional.config), + }); } catch (err) { session.dispose(); this._provisionalSessions.delete(sessionId); @@ -542,23 +616,133 @@ export class ClaudeAgent extends Disposable implements IAgent { } /** - * Persist the user's customization-directory pick to the per-session - * DB so {@link listSessions} can surface it (and Phase 6+ worktree - * materialization can still find the original folder). Mirrors - * CopilotAgent's `_storeSessionMetadata` pattern. + * Pull `permissionMode` out of the post-validation `IAgentCreateSessionConfig.config` + * bag, narrowing the runtime `unknown` value to the SDK's six-value + * `PermissionMode` union (sdk.d.ts:1560). Falls back to `'default'` + * when the bag is absent or carries something the schema validator + * shouldn't have accepted (defense-in-depth). + */ + private _resolvePermissionMode(config: Record | undefined): ClaudePermissionMode { + const raw = config?.[ClaudeSessionConfigKey.PermissionMode]; + switch (raw) { + case 'default': + case 'acceptEdits': + case 'bypassPermissions': + case 'plan': + case 'dontAsk': + case 'auto': + return raw; + default: + return 'default'; + } + } + + /** + * Persist Claude-namespaced session metadata (customizationDirectory, + * `ModelSelection`, `permissionMode`) to the per-session DB so + * {@link listSessions} can surface it (and Phase 6+ worktree + * materialization can find the original folder). Mirrors + * CopilotAgent's `_storeSessionMetadata` pattern + * (`copilotAgent.ts:1532`): single `openDatabase` ref, `Promise.all` + * batching, only-write-on-defined. + * + * `model` is JSON-encoded via {@link _serializeModelSelection} so the + * parallel `{ id, config }` shape round-trips. `permissionMode` is + * stored verbatim (single string from a closed enum). */ - private async _writeCustomizationDirectory(session: URI, workingDirectory: URI): Promise { + private async _writeSessionMetadata(session: URI, fields: { customizationDirectory?: URI; model?: ModelSelection; permissionMode?: ClaudePermissionMode }): Promise { const dbRef = this._sessionDataService.openDatabase(session); + const db = dbRef.object; try { - await dbRef.object.setMetadata( - ClaudeAgent._META_CUSTOMIZATION_DIRECTORY, - workingDirectory.toString(), - ); + const work: Promise[] = []; + if (fields.customizationDirectory) { + work.push(db.setMetadata(ClaudeAgent._META_CUSTOMIZATION_DIRECTORY, fields.customizationDirectory.toString())); + } + if (fields.model) { + work.push(db.setMetadata(ClaudeAgent._META_MODEL, this._serializeModelSelection(fields.model))); + } + if (fields.permissionMode) { + work.push(db.setMetadata(ClaudeAgent._META_PERMISSION_MODE, fields.permissionMode)); + } + await Promise.all(work); } finally { dbRef.dispose(); } } + /** + * Read all Claude-namespaced session metadata from the per-session DB. + * Returns `{}` when no DB is present (external Claude CLI session, + * fresh install). Mirrors CopilotAgent's `_readSessionMetadata` + * (`copilotAgent.ts:1559`) — `tryOpenDatabase` so absence is not an + * error, single `Promise.all` for the parallel reads. + */ + private async _readSessionMetadata(session: URI): Promise<{ customizationDirectory?: URI; model?: ModelSelection; permissionMode?: ClaudePermissionMode }> { + const ref = await this._sessionDataService.tryOpenDatabase(session); + if (!ref) { + return {}; + } + try { + const [customizationDirectoryRaw, modelRaw, permissionModeRaw] = await Promise.all([ + ref.object.getMetadata(ClaudeAgent._META_CUSTOMIZATION_DIRECTORY), + ref.object.getMetadata(ClaudeAgent._META_MODEL), + ref.object.getMetadata(ClaudeAgent._META_PERMISSION_MODE), + ]); + return { + customizationDirectory: customizationDirectoryRaw ? URI.parse(customizationDirectoryRaw) : undefined, + model: this._parseModelSelection(modelRaw), + permissionMode: this._narrowPermissionMode(permissionModeRaw), + }; + } finally { + ref.dispose(); + } + } + + private _serializeModelSelection(model: ModelSelection): string { + return JSON.stringify(model); + } + + private _parseModelSelection(raw: string | undefined): ModelSelection | undefined { + if (!raw) { + return undefined; + } + try { + const value: { id?: unknown; config?: unknown } | string | number | boolean | null = JSON.parse(raw); + if (value && typeof value === 'object' && typeof value.id === 'string') { + const result: ModelSelection = { id: value.id }; + if (value.config && typeof value.config === 'object') { + const config: Record = {}; + for (const [key, configValue] of Object.entries(value.config)) { + if (typeof configValue === 'string') { + config[key] = configValue; + } + } + if (Object.keys(config).length > 0) { + result.config = config; + } + } + return result; + } + } catch { + // Older session metadata stored the raw model id as a plain string. + } + return { id: raw }; + } + + private _narrowPermissionMode(raw: string | undefined): ClaudePermissionMode | undefined { + switch (raw) { + case 'default': + case 'acceptEdits': + case 'bypassPermissions': + case 'plan': + case 'dontAsk': + case 'auto': + return raw; + default: + return undefined; + } + } + disposeSession(session: URI): Promise { // Routed through {@link _disposeSequencer} so a concurrent // {@link shutdown} already serializing teardown for this same @@ -633,17 +817,8 @@ export class ClaudeAgent extends Disposable implements IAgent { return Promise.all(sdkEntries.map(async entry => { try { const sessionUri = AgentSession.uri(this.id, entry.sessionId); - const dbRef = await this._sessionDataService.tryOpenDatabase(sessionUri); - if (dbRef) { - try { - const raw = await dbRef.object.getMetadata(ClaudeAgent._META_CUSTOMIZATION_DIRECTORY); - return this._toAgentSessionMetadata(entry, { - customizationDirectory: raw ? URI.parse(raw) : undefined, - }); - } finally { - dbRef.dispose(); - } - } + const overlay = await this._readSessionMetadata(sessionUri); + return this._toAgentSessionMetadata(entry, overlay); } catch (err) { this._logService.warn(`[Claude] Overlay read failed for session ${entry.sessionId}`, err); } @@ -652,7 +827,35 @@ export class ClaudeAgent extends Disposable implements IAgent { })); } - private _toAgentSessionMetadata(entry: SDKSessionInfo, overlay: { customizationDirectory?: URI }): IAgentSessionMetadata { + /** + * Phase 6.1 / Cycle D4 — per-session lookup. Mirrors + * {@link CopilotAgent.getSessionMetadata} but accepts the + * external-CLI case: a session that exists on disk via the raw + * Anthropic CLI has no per-session DB, so we MUST NOT gate on the + * sidecar (the way Copilot's variant does). The SDK is the source + * of truth for existence; the overlay merely decorates. + * + * Failures in the overlay read are swallowed — a corrupt DB on one + * session must not lose the SDK-supplied summary/cwd. Failures in + * the SDK lookup propagate (the caller is doing a single targeted + * fetch and should learn that the SDK module is broken). + */ + async getSessionMetadata(session: URI): Promise { + const sessionId = AgentSession.id(session); + const sdkInfo = await this._sdkService.getSessionInfo(sessionId); + if (!sdkInfo) { + return undefined; + } + let overlay: { customizationDirectory?: URI; model?: ModelSelection } = {}; + try { + overlay = await this._readSessionMetadata(session); + } catch (err) { + this._logService.warn(`[Claude] Overlay read failed for session ${sessionId}`, err); + } + return this._toAgentSessionMetadata(sdkInfo, overlay); + } + + private _toAgentSessionMetadata(entry: SDKSessionInfo, overlay: { customizationDirectory?: URI; model?: ModelSelection }): IAgentSessionMetadata { return { session: AgentSession.uri(this.id, entry.sessionId), startTime: entry.createdAt ?? entry.lastModified, @@ -660,6 +863,7 @@ export class ClaudeAgent extends Disposable implements IAgent { summary: entry.customTitle ?? entry.summary, workingDirectory: entry.cwd ? URI.file(entry.cwd) : undefined, customizationDirectory: overlay.customizationDirectory, + model: overlay.model, }; } @@ -678,18 +882,22 @@ export class ClaudeAgent extends Disposable implements IAgent { type: 'string', title: localize('claude.sessionConfig.permissionMode', "Approvals"), description: localize('claude.sessionConfig.permissionModeDescription', "How Claude handles tool approvals."), - enum: ['default', 'acceptEdits', 'bypassPermissions', 'plan'], + enum: ['default', 'acceptEdits', 'bypassPermissions', 'plan', 'dontAsk', 'auto'], enumLabels: [ localize('claude.sessionConfig.permissionMode.default', "Ask Each Time"), localize('claude.sessionConfig.permissionMode.acceptEdits', "Auto-Approve Edits"), localize('claude.sessionConfig.permissionMode.bypassPermissions', "Bypass Approvals"), localize('claude.sessionConfig.permissionMode.plan', "Plan Only (Read-Only)"), + localize('claude.sessionConfig.permissionMode.dontAsk', "Don't Ask"), + localize('claude.sessionConfig.permissionMode.auto', "Auto"), ], enumDescriptions: [ localize('claude.sessionConfig.permissionMode.defaultDescription', "Prompt for every tool call."), localize('claude.sessionConfig.permissionMode.acceptEditsDescription', "Auto-approve file edits; prompt for shell and other tools."), localize('claude.sessionConfig.permissionMode.bypassPermissionsDescription', "Auto-approve every tool call."), localize('claude.sessionConfig.permissionMode.planDescription', "Read-only research mode; no tool calls executed."), + localize('claude.sessionConfig.permissionMode.dontAskDescription', "Auto-approve every tool call without prompting."), + localize('claude.sessionConfig.permissionMode.autoDescription', "Let the model classifier choose between approve and prompt per call."), ], default: 'default', sessionMutable: true, @@ -781,6 +989,13 @@ export class ClaudeAgent extends Disposable implements IAgent { message: { role: 'user', content: contentBlocks }, session_id: sessionId, parent_tool_use_id: null, + // M1 / Glossary: `Turn.id ↔ SDKUserMessage.uuid`. The SDK + // types this as a branded `${string}-…` template-literal + // alias of Node's `crypto.UUID`; cast at the boundary + // rather than threading the brand up to every caller. + // Mirrors the reference extension at + // `extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeAgent.ts:585`. + uuid: effectiveTurnId as `${string}-${string}-${string}-${string}-${string}`, }; await entry.send(sdkPrompt, effectiveTurnId); diff --git a/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts b/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts index d3838335f6c32..bc410cfa68afa 100644 --- a/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts +++ b/src/vs/platform/agentHost/node/claude/claudeAgentSdkService.ts @@ -35,6 +35,16 @@ export interface IClaudeAgentSdkService { */ listSessions(): Promise; + /** + * Looks up a single session's metadata by id. Resolves to `undefined` + * when the SDK has no record of it (deleted from disk, never created, + * or just outside the searched project tree). Used by + * {@link import('./claudeAgent.js').ClaudeAgent.getSessionMetadata} + * to compose SDK-supplied fields (summary, cwd, timestamps) with the + * per-session DB overlay. Phase 6.1 / Cycle D4. + */ + getSessionInfo(sessionId: string): Promise; + /** * Pre-warms the SDK subprocess and runs the init handshake. Returns * a {@link WarmQuery} whose `.query(promptIterable)` binds the @@ -61,6 +71,7 @@ export interface IClaudeAgentSdkService { */ export interface IClaudeSdkBindings { listSessions(options?: ListSessionsOptions): Promise; + getSessionInfo(sessionId: string): Promise; startup(params: { options: Options; initializeTimeoutMs?: number }): Promise; } @@ -101,6 +112,11 @@ export class ClaudeAgentSdkService implements IClaudeAgentSdkService { return sdk.listSessions(undefined); } + async getSessionInfo(sessionId: string): Promise { + const sdk = await this._getSdk(); + return sdk.getSessionInfo(sessionId); + } + async startup(params: { options: Options; initializeTimeoutMs?: number }): Promise { const sdk = await this._getSdk(); return sdk.startup(params); diff --git a/src/vs/platform/agentHost/node/claude/claudeAgentSession.ts b/src/vs/platform/agentHost/node/claude/claudeAgentSession.ts index 92939db34ed2b..c90b056adb535 100644 --- a/src/vs/platform/agentHost/node/claude/claudeAgentSession.ts +++ b/src/vs/platform/agentHost/node/claude/claudeAgentSession.ts @@ -11,7 +11,7 @@ import { Disposable, toDisposable } from '../../../../base/common/lifecycle.js'; import { URI } from '../../../../base/common/uri.js'; import { ILogService } from '../../../log/common/log.js'; import { AgentSignal } from '../../common/agentService.js'; -import { IClaudeMapperState, mapSDKMessageToAgentSignals } from './claudeMapSessionEvents.js'; +import { mapSDKMessageToAgentSignals } from './claudeMapSessionEvents.js'; /** * One in-flight {@link send} request. Length of {@link ClaudeAgentSession._inFlightRequests} @@ -80,13 +80,6 @@ export class ClaudeAgentSession extends Disposable { */ private _queuedPrompts: SDKUserMessage[] = []; - /** - * Mutable state threaded into {@link mapSDKMessageToAgentSignals}. - * Lives on the session (not the mapper module) so that concurrent - * sessions don't cross-contaminate part-id allocations. - */ - private readonly _mapperState: IClaudeMapperState = { currentBlockParts: new Map() }; - /** * Flips to `true` on the first `system:init` SDK message. Phase 7+ * teardown+recreate flows pass `Options.resume = sessionId` to the @@ -229,7 +222,6 @@ export class ClaudeAgentSession extends Disposable { message, this.sessionUri, turnId, - this._mapperState, this._logService, ); for (const signal of signals) { diff --git a/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts b/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts index 5fe2bf549a973..72b261bd283d7 100644 --- a/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts +++ b/src/vs/platform/agentHost/node/claude/claudeMapSessionEvents.ts @@ -4,35 +4,23 @@ *--------------------------------------------------------------------------------------------*/ import type { SDKMessage } from '@anthropic-ai/claude-agent-sdk'; -import { generateUuid } from '../../../../base/common/uuid.js'; import type { URI } from '../../../../base/common/uri.js'; import type { ILogService } from '../../../log/common/log.js'; import type { AgentSignal } from '../../common/agentService.js'; import { ActionType } from '../../common/state/sessionActions.js'; import { ResponsePartKind } from '../../common/state/sessionState.js'; -/** - * Mutable mapping state owned by `ClaudeAgentSession` and threaded into - * {@link mapSDKMessageToAgentSignals}. Kept on the session — not in this - * module — so multiple sessions don't share state and the mapper itself - * stays a pure function. - */ -export interface IClaudeMapperState { - /** - * Maps content_block index → response part id. Populated on - * `content_block_start`, drained on `content_block_stop`, cleared on - * `message_start`. Used to route `content_block_delta` events to - * the right `SessionDelta` / `SessionReasoning` partId. - */ - readonly currentBlockParts: Map; -} - /** * Map one SDK message to zero or more agent signals. * - * Pure function. All state is in {@link IClaudeMapperState}, which the - * caller owns. Tests can therefore exercise the mapper directly with a - * fake state object. + * Pure function — no state. Phase 6 sets `canUseTool: deny`, so a turn + * is exactly one assistant message; `BetaRawContentBlockStartEvent.index` + * is therefore monotonic within a turn and can be used directly as a + * partId disambiguator: `${turnId}#${index}`. Phase 7 introduces + * multi-message turns (text → tool_use → tool_result → text) where the + * SDK resets `index` per message; the mapper will then need to mix in + * `message.id` (or an equivalent per-message counter) to keep partIds + * collision-free. * * Phase 6 emits: * - {@link ActionType.SessionResponsePart} (Markdown) on @@ -47,27 +35,61 @@ export interface IClaudeMapperState { * * Reducer ordering invariant: `SessionResponsePart` MUST precede the * first `SessionDelta` / `SessionReasoning` for that part id (see - * `actions.ts:233, 540`). This mapper allocates the part on - * `content_block_start` BEFORE any delta can arrive — deltas are - * SDK-ordered after the start — so the invariant holds by construction. + * `actions.ts:233, 540`). The SDK protocol orders `content_block_start` + * before any delta at the same index, so the invariant holds by + * construction. An out-of-protocol delta with no preceding start is a + * reducer no-op (`reducers.ts:240`), so no defensive guard is needed. */ export function mapSDKMessageToAgentSignals( message: SDKMessage, session: URI, turnId: string, - state: IClaudeMapperState, logService: ILogService, ): AgentSignal[] { switch (message.type) { case 'stream_event': - return mapStreamEvent(message.event, session, turnId, state, logService); + return mapStreamEvent(message.event, session, turnId, logService); case 'result': return mapResult(message, session, turnId); + case 'assistant': + return mapAssistantCanonical(message, logService); default: return []; } } +/** + * Handle the canonical {@link SDKAssistantMessage} (`type: 'assistant'`) + * the SDK delivers as the final, authoritative message for a turn, + * alongside its `'stream_event'` partials. CONTEXT.md M8:875 names this + * envelope canonical: in principle the host could replace whatever the + * partial accumulator built. In practice the protocol reducer is + * append-only — there is no `SessionResponsePart` replacement action — + * so re-emitting `SessionResponsePart` / `SessionDelta` / + * `SessionReasoning` here would duplicate, not reconcile, the activeTurn + * content. With `Options.includePartialMessages: true` (Phase 6 sec. 3.4), + * partials produce the same content the canonical message carries, so + * dropping is the correct behavior. + * + * The remaining job is defense-in-depth: Phase 6 sets `canUseTool: + * deny`, so the canonical message should never carry `tool_use` blocks. + * If one arrives anyway (SDK race, future change, transport bug), warn + * and drop — mirrors the {@link mapStreamEvent} `content_block_start` + * `tool_use` warn-and-drop. Phase 7 lifts both warn-and-drop guards + * once tool calls are wired through. + */ +function mapAssistantCanonical( + message: Extract, + logService: ILogService, +): AgentSignal[] { + for (const block of message.message.content) { + if (block.type === 'tool_use') { + logService.warn(`[claudeMapSessionEvents] dropped tool_use block on canonical SDKAssistantMessage (id=${block.id}, name=${block.name})`); + } + } + return []; +} + function mapResult( message: Extract, session: URI, @@ -112,20 +134,13 @@ function mapStreamEvent( event: Extract['event'], session: URI, turnId: string, - state: IClaudeMapperState, logService: ILogService, ): AgentSignal[] { const sessionStr = session.toString(); switch (event.type) { - case 'message_start': - state.currentBlockParts.clear(); - return []; - case 'content_block_start': { const block = event.content_block; if (block.type === 'text') { - const partId = generateUuid(); - state.currentBlockParts.set(event.index, partId); return [{ kind: 'action', session, @@ -135,15 +150,13 @@ function mapStreamEvent( turnId, part: { kind: ResponsePartKind.Markdown, - id: partId, + id: `${turnId}#${event.index}`, content: '', }, }, }]; } if (block.type === 'thinking') { - const partId = generateUuid(); - state.currentBlockParts.set(event.index, partId); return [{ kind: 'action', session, @@ -153,7 +166,7 @@ function mapStreamEvent( turnId, part: { kind: ResponsePartKind.Reasoning, - id: partId, + id: `${turnId}#${event.index}`, content: '', }, }, @@ -170,10 +183,7 @@ function mapStreamEvent( } case 'content_block_delta': { - const partId = state.currentBlockParts.get(event.index); - if (partId === undefined) { - return []; - } + const partId = `${turnId}#${event.index}`; if (event.delta.type === 'text_delta') { return [{ kind: 'action', @@ -203,10 +213,8 @@ function mapStreamEvent( return []; } + case 'message_start': case 'content_block_stop': - state.currentBlockParts.delete(event.index); - return []; - case 'message_delta': case 'message_stop': return []; diff --git a/src/vs/platform/agentHost/node/claude/phase5-plan.md b/src/vs/platform/agentHost/node/claude/phase5-plan.md index 9478ed302d6bb..ef320fa1105f6 100644 --- a/src/vs/platform/agentHost/node/claude/phase5-plan.md +++ b/src/vs/platform/agentHost/node/claude/phase5-plan.md @@ -2,6 +2,11 @@ > **Handoff plan** — written to be executed by an agent with no prior conversation context. All file paths and line citations are verified against the workspace at synthesis time. Cross-reference [roadmap.md](./roadmap.md) before committing exact phase numbers. +> **Status note (post-Phase 6.5 design — Phase 6.1 Cycle G).** Two pieces of this plan are now historical and superseded: +> +> 1. **Fork narrative.** The fork narrative below was drafted under the working assumption that the agent host would translate `protocolTurnId → SDK-event-uuid` *at fork time* via a live SDK session handle (e.g. `ClaudeAgentSession.getNextTurnEventId(…)` or a JSONL walk through `sdk.getSessionMessages`). That approach was attempted and reverted; **Phase 6.5 ships the contract-based persisted-mapping approach instead** — the mapping is captured on every `type:'result'` ingest by Phase 13's result-message mapper and stored in the session-data DB, so fork is an O(1) DB lookup with no JSONL inference. See [roadmap.md §"Phase 6.5 — Fork"](./roadmap.md) and [CONTEXT.md M9 fork sub-flow](./CONTEXT.md) for the canonical contract. The references to `getNextTurnEventId`, JSONL walks, and `sdk.getSessionMessages` lookups in the body of this plan are historical — read them as the design that was *replaced*, not the design that shipped. +> 2. **`permissionMode` enum width.** The §B5 schema example shows the original 4-value enum `['default', 'acceptEdits', 'bypassPermissions', 'plan']`. The canonical enum is the 6-value form `['default', 'acceptEdits', 'bypassPermissions', 'plan', 'dontAsk', 'auto']` (matching `ClaudePermissionMode` in [`claudeSessionConfigKeys.ts`](../../common/claudeSessionConfigKeys.ts) and the SDK's `PermissionMode` typedef), expanded in Phase 6.1 Cycle E1. See [CONTEXT.md M11/M12](./CONTEXT.md) and Cycle E1 in [phase6.1-plan.md](./phase6.1-plan.md) for the live code. + ## 1. Goal Replace the seven Phase-5 stubs in [claudeAgent.ts](claudeAgent.ts) (`createSession`, `disposeSession`, `getSessionMessages`, `listSessions`, `resolveSessionConfig`, `sessionConfigCompletions`, `shutdown`) with real implementations. **No live LLM traffic** in this phase — `sendMessage` stays a Phase-6 stub. The SDK's `query()` is **not** spawned in `createSession`. diff --git a/src/vs/platform/agentHost/node/claude/phase6-plan.md b/src/vs/platform/agentHost/node/claude/phase6-plan.md index 4ee8abb2ec5cd..34dcb4b3f8f82 100644 --- a/src/vs/platform/agentHost/node/claude/phase6-plan.md +++ b/src/vs/platform/agentHost/node/claude/phase6-plan.md @@ -2,6 +2,8 @@ > **Handoff plan** — written to be executed by an agent with no prior conversation context. All file paths and line citations are verified against the workspace at synthesis time. Cross-reference [roadmap.md](./roadmap.md) before committing exact phase numbers. +> **Status note (post-Phase 6.5 design — Phase 6.1 Cycle G).** The throw inside `createSession({ fork })` references `sdk.getSessionMessages` as the lookup mechanism, reflecting the originally-planned lazy-walk approach. **Phase 6.5 ships the contract-based persisted-mapping approach instead** — the `protocolTurnId → lastSdkMessageUuid` mapping is captured by Phase 13's result-message mapper on every `type:'result'` ingest and stored in the session-data DB; fork performs an O(1) DB lookup, not a JSONL walk. See [roadmap.md §"Phase 6.5 — Fork"](./roadmap.md) and [CONTEXT.md M9 fork sub-flow](./CONTEXT.md) for the canonical contract. + ## 1. Goal Replace [claudeAgent.ts](claudeAgent.ts)'s `sendMessage` stub with a real implementation that streams a single assistant turn (no tool execution) from the Claude SDK back to the workbench client as `AgentSignal`s. Introduce the **provisional / materialize** lifecycle pattern that Phase 5 deliberately deferred: `createSession` returns immediately with `provisional: true`, the SDK subprocess fork happens lazily on the first `sendMessage`, and `onDidMaterializeSession` fires once the SDK init handshake completes. @@ -867,6 +869,16 @@ These are decisions Phase 6 locks down so later phases are pure-additive. ### 8.1 Phase 6.5 — fork +> **Status update (post-Phase-6):** Phase 6.5 was attempted on top of +> Phase 6 and **fully reverted**. The implementation outline below is +> preserved as a historical record of the design at Phase-6 lock-down +> time; the **current source of truth** is `roadmap.md` § "Phase 6.5 — +> Fork (deferred)". The reverted attempt used a JSONL forward-scan +> heuristic to infer turn boundaries; the new approach persists +> `turnId → lastSdkMessageUuid` on result-message ingest, anchored on +> Phase 13's mapper. Phase 6.5 is no longer a stacked PR on Phase 6 — +> it sequences after Phase 13. + **Critical SDK divergence from CopilotAgent**: Claude SDK's `forkSession(sessionId, { upToMessageId, title })` at [sdk.d.ts:540-565](../../../../../../node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts) takes a **message UUID**, not an event id. This is structurally different from CopilotAgent's `getNextTurnEventId(turnId) → toEventId` pattern. Mirroring CopilotAgent's pattern would have been wrong. **Phase 6.5 implementation outline**: diff --git a/src/vs/platform/agentHost/node/claude/phase6.1-plan.md b/src/vs/platform/agentHost/node/claude/phase6.1-plan.md new file mode 100644 index 0000000000000..ecfe47f3c37b4 --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/phase6.1-plan.md @@ -0,0 +1,582 @@ +# Phase 6.1 Implementation Plan — Mapping Conformance Pass + +> **Status:** Cycles A–G + post-Cycle-F architectural cleanup (including the stateless-mapper follow-up) complete. +> +> **Handoff plan** — written to be executed by an agent with no prior conversation context. All file paths and line citations are verified against the workspace at synthesis time. Cross-reference [roadmap.md](./roadmap.md) before committing exact phase numbers. + +## 1. Goal + +Bring the shipped Claude IAgent surface (Phases 4–6) back into M1–M13 mapping conformance with [CONTEXT.md](./CONTEXT.md). This is a **drift-correction phase**, not a feature phase — every change is a fix to existing code or to a doc bug in `CONTEXT.md`. + +**Phase 6.1 deliverable:** the shipped Phase 4–6 surface (`createSession`, `_materializeProvisional`, `sendMessage`, mapper, descriptor, models observable, listSessions, sidecar) agrees with the M1, M8, M11, M12, and M13 portraits — verified by per-cycle unit tests and a single live smoke run. + +**Out of scope (deferred):** + +- `abortSession` (`claudeAgent.ts:801` throws `TODO: Phase 9`) — Phase 9. +- `changeModel` — Phase 9. +- M9 fork `resume: sessionId` — Phase 6.5 (separate stacked PR). +- M1 yield-boundary mutation barrier — Phase 9 (no hot-swap state shipped yet, so the barrier has nothing to coordinate). +- Phase 7 tool calls, Phase 8 edits, Phase 10+ — all unaffected. + +**Exit criteria:** + +1. `CONTEXT.md` cites only methods that exist on `IAgent` (no `setSessionConfigValues` references). M11 and M12 invariants are internally consistent. +2. Both `createSession` and `_materializeProvisional` throw `ProtocolError(AHP_AUTH_REQUIRED, …, this.getProtectedResources())` when called pre-auth — never plain `Error`. +3. The outbound `SDKUserMessage` from `sendMessage` carries `uuid: effectiveTurnId`. Turn.id ↔ SDKUserMessage.uuid invariant holds for every turn. +4. `IAgentCreateSessionConfig.config` flows from `createSession` → provisional record → sidecar → `Options.*` on the first `query()` call. First turn uses the requested `model`, `permissionMode`, and `effort` — SDK defaults never silently win. +5. `permissionMode` enum carries 6 values (`'default' | 'acceptEdits' | 'bypassPermissions' | 'plan' | 'dontAsk' | 'auto'`), matching the SDK's `PermissionMode` type and the M11 portrait. +6. `IAgentDescriptor.displayName === 'Claude'` (the user-facing brand for this provider; "Claude Code" is forbidden as a UI string). `isClaudeModel` matches the M12-ratified predicate set. `toAgentModelInfo` surfaces `configSchema` (synthesized from CCAModel thinking capability), `policyState`, and `_meta`. `getSessionMetadata?` is implemented. `listSessions` surfaces ALL SDK-known sessions; the per-session sidecar is a best-effort enrichment overlay (deviation from CopilotAgent's drop-without-sidecar pattern, justified by Claude SDK's external-CLI session model). +7. `claudeMapSessionEvents.ts` handles the `'assistant'` (final canonical) `SDKMessage` envelope alongside `'stream_event'` and `'result'`. Final assistant content reconciles with prior partials per the M8:875 invariant. +8. Stale fork narratives in `phase5-plan.md` and `phase6-plan.md` agree with the Phase 6.5 contract-based fork model in `roadmap.md`. + +## 2. Verified drift (audit table) + +Findings cross-checked against `CONTEXT.md` and source by three independent reviewers (Opus, GPT, Gemini) plus a debate pass. + +### CRITICAL + +- **C1 (M1)** — [claudeAgent.ts:780](claudeAgent.ts#L780) builds `SDKUserMessage` without `uuid`. M1 + Glossary mandate `uuid = effectiveTurnId`. Breaks Turn.id ↔ SDKUserMessage.uuid invariant; Phase 6.5 fork and Phase 13 replay cannot function. +- **C2 (M11)** — [claudeAgent.ts:422-469](claudeAgent.ts#L422-L469) builds `Options` with no `model`, hardcoded `permissionMode: 'default'`. M11 says `IAgentCreateSessionConfig.config` MUST flow into `Options.*` on the first `query()` call. The provisional record `IClaudeProvisionalSession` (around line 92) **also** has no `config` field to carry the bag forward — structural gap. +- **C3 (M13)** — [claudeAgent.ts:415](claudeAgent.ts#L415) (`_materializeProvisional`) throws plain `Error('Claude proxy is not running...')`. [CONTEXT.md:2247](CONTEXT.md#L2247) says any lifecycle method that runs before `authenticate()` must throw `ProtocolError(AHP_AUTH_REQUIRED, msg, this.getProtectedResources())`. +- **C4 (M13)** — `createSession` at [claudeAgent.ts:297](claudeAgent.ts#L297) does NOT guard against unauthenticated state. CopilotAgent guards `createSession` directly at [copilotAgent.ts:382-385](../copilot/copilotAgent.ts#L382-L385). A client can call `createSession` pre-auth, get a provisional handle, then trip `_materializeProvisional`'s plain `Error` on first `sendMessage`. + +### IMPORTANT + +- **I1 (M8)** — [claudeMapSessionEvents.ts:54-66](claudeMapSessionEvents.ts#L54-L66) switches on `'stream_event' | 'result' | default`. The `'assistant'` envelope (final canonical) is dropped. [CONTEXT.md:875](CONTEXT.md#L875): "Partials are advisory; final `SDKAssistantMessage` is canonical." Phase 7 directly depends on this. +- **I2 (M11)** — [claudeSessionConfigKeys.ts:30](../../common/claudeSessionConfigKeys.ts#L30) and [claudeAgent.ts:681-701](claudeAgent.ts#L681-L701) declare 4-value `permissionMode` enum. [CONTEXT.md:1962](CONTEXT.md#L1962) specifies 5 (adds `'dontAsk'`). The SDK typedef at [sdk.d.ts:1560](../../../../../../extensions/copilot/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L1560) actually exposes 6 (adds `'auto'`). Cycle A ratifies whether `'auto'` belongs in M11. +- **I3 (M12)** — ~~[claudeAgent.ts:229](claudeAgent.ts#L229) `displayName: 'Claude'`. [CONTEXT.md:1741](CONTEXT.md#L1741) specifies `'Claude Code'`.~~ **Withdrawn during Cycle D** — re-reading [CONTEXT.md:1741](CONTEXT.md#L1741) shows the doc already specifies `'Claude'`; no drift exists. "Claude Code" is also forbidden as a user-facing brand string. Cycle D1 is therefore a no-op (no code change). +- **I4 (M12)** — [claudeAgent.ts:47-53](claudeAgent.ts#L47-L53) `isClaudeModel` has 5 predicates. [CONTEXT.md:1789-1795](CONTEXT.md#L1789-L1795) specifies 3 (vendor, supported_endpoints, model_picker_enabled). Direction unratified — narrow impl OR document additions. +- **I5 (M12)** — [claudeAgent.ts:63-71](claudeAgent.ts#L63-L71) `toAgentModelInfo` returns 5 fields. [CONTEXT.md:1802-1812](CONTEXT.md#L1802-L1812) says it must also surface `configSchema` (synth from `CCAModel.capabilities.supports.thinking`), `policyState`, `_meta` (billing multiplier). +- **I6 (M12)** — `getSessionMetadata?` not implemented on `claudeAgent.ts`. [CONTEXT.md:1880-1900](CONTEXT.md#L1880-L1900): "should also implement for parity with CopilotAgent." +- **I7 (M12)** — Sidecar at lines 211/553/639 only persists `claude.customizationDirectory`. CopilotAgent's sidecar at [copilotAgent.ts:1532-1580](../copilot/copilotAgent.ts#L1532-L1580) carries `model`. Without sidecar persistence of `model`, `listSessions`/`getSessionMetadata` cannot satisfy [CONTEXT.md:1896](CONTEXT.md#L1896). +- **I8 (M12)** — [claudeAgent.ts:655-662](claudeAgent.ts#L655-L662) `_toAgentSessionMetadata` doesn't include `model` field at all. +- **I9 (M12)** — ~~[claudeAgent.ts:632-651](claudeAgent.ts#L632-L651) `listSessions` returns SDK-only sessions when no sidecar exists. [CONTEXT.md:2124](CONTEXT.md#L2124) specifies Claude should follow Copilot's "drop sessions without sidecar" filter pattern.~~ **Withdrawn during Cycle D** — the directive itself is wrong: dropping sidecar-less sessions in Claude erases sessions created by the external Claude CLI (an explicit Phase-5 exit criterion). The SDK is the source of truth for the session list; the sidecar is an enrichment overlay. CONTEXT.md is the doc bug, not `claudeAgent.ts`. Plan correction lands in Cycle D; D5 becomes a no-op (no code change). + +### Mapping doc bugs (Cycle A scope) + +- **Q1** — `CONTEXT.md` references `setSessionConfigValues` 4× (lines 1995, 1998, 2008, 2195). Method does NOT exist on `IAgent` (verified by grep across `src/vs/platform/agentHost/common/`). Definitive doc bug. +- **Q2** — M11 implies `permissionMode` should be `sessionMutable: true`, but Q1's missing protocol method means there's no mechanism to perform the mutation post-creation. Self-contradiction. Resolution must accompany Q1. + +### NIT + +- **N1 (M9)** — `phase5-plan.md` and `phase6-plan.md` describe pre-Phase-6.5 fork model. +- **N2 (M8)** — `claudeMapSessionEvents.ts` doesn't differentiate `result.subtype` error states for telemetry. Defer to Phase 14. + +### Documented exclusions (correctly stubbed; defer per roadmap) + +- `abortSession` at [claudeAgent.ts:801](claudeAgent.ts#L801) throws `TODO: Phase 9` — defer. +- `changeModel` unimplemented — Phase 9. +- M9 `resume: sessionId` for fork path — Phase 6.5. +- M1 yield-boundary mutation barrier — Phase 9 (no hot-swap state shipped yet). + +## 3. Cycle structure + +Order: **A → B → C → E → D → F → G**. + +| Cycle | Scope | Files | +|---|---|---| +| A ✓ | Doc corrections (zero code) | [CONTEXT.md](./CONTEXT.md) | +| B ✓ | Auth conformance (C3 + C4) | [claudeAgent.ts](./claudeAgent.ts) | +| C ✓ | Send-seam uuid (C1) | [claudeAgent.ts](./claudeAgent.ts) | +| E ✓ | Materialize + metadata coherence (C2 + I2 + I7 + I8) | [claudeAgent.ts](./claudeAgent.ts), [claudeSessionConfigKeys.ts](../../common/claudeSessionConfigKeys.ts) | +| D ✓ | Catalog completeness (I3 + I4 + I5 + I6 + I9) | [claudeAgent.ts](./claudeAgent.ts) | +| F ✓ | Mapper widening: 'assistant' canonical (I1) | [claudeMapSessionEvents.ts](./claudeMapSessionEvents.ts) | +| G | Stale phase-plan refresh (N1) | [phase5-plan.md](./phase5-plan.md), [phase6-plan.md](./phase6-plan.md), [roadmap.md](./roadmap.md) | + +### Cycle A ✓ — Mapping doc corrections (CONTEXT.md only) + +*Completed. See [Implementation Notes](#implementation-notes) for the decisions taken and the resulting CONTEXT.md changes.* + +- Replace 4× `setSessionConfigValues` (lines 1995, 1998, 2008, 2195) with the correct routing. Two options to choose from: + 1. Reference `IAgentCreateSessionConfig.config` (create-time only) and remove `sessionMutable: true` from M11 (resolves Q2). + 2. Mark as `TODO: protocol surface for live config edits not yet defined` and keep `sessionMutable: true` as a forward-looking marker. +- Resolve Q2 sessionMutable contradiction. +- Ratify I4 direction (narrow impl to 3 predicates OR document the 2 extras). +- Ratify I2 SDK 6-value vs CONTEXT 5-value (does `'auto'` belong in M11?). +- Update M13 OPEN_Q ("plain Error vs ProtocolError") status to "Cycle B fixes". + +**Acceptance:** `CONTEXT.md` cites only methods that exist on `IAgent`; sessionMutable invariants are internally consistent. + +**Risk:** None (doc-only). Decisions in this cycle constrain Cycles D and E. + +### Cycle B ✓ — Auth conformance (M13 / C3 + C4) + +*Completed. See [Implementation Notes](#implementation-notes) for the resulting code changes.* + +**Files:** [claudeAgent.ts](./claudeAgent.ts), [../../test/node/claudeAgent.test.ts](../../test/node/claudeAgent.test.ts) + +- Import `ProtocolError`, `AHP_AUTH_REQUIRED` (mirror imports already used by [copilotAgent.ts](../copilot/copilotAgent.ts)). +- **C3 fix:** replace plain `Error` in `_materializeProvisional` (line 415) with `ProtocolError(AHP_AUTH_REQUIRED, msg, this.getProtectedResources())`. +- **C4 fix:** add the same guard at the head of `createSession` (line 297). Mirror [copilotAgent.ts:382-385](../copilot/copilotAgent.ts#L382-L385). +- Update existing tests asserting old error message; add new tests for `createSession`-while-unauth. + +**Acceptance:** Both `createSession` and `_materializeProvisional` throw `ProtocolError` with code `-32007` and `data.resources` populated when called pre-auth. + +**Risk:** Test breakage (grep `'Claude proxy is not running'`); workbench-side handlers matching the message. + +### Cycle C ✓ — Send-seam uuid (M1 / C1) + +*Completed. See [Implementation Notes](#implementation-notes) for the resulting code changes.* + +**Files:** [claudeAgent.ts](./claudeAgent.ts), [../../test/node/claudeAgent.test.ts](../../test/node/claudeAgent.test.ts) + +- Add `uuid: effectiveTurnId as branded` to the `SDKUserMessage` literal at lines 780–785. Branded type is the SDK's `` `${string}-${string}-${string}-${string}-${string}` ``. + +**Acceptance:** Yielded `SDKUserMessage.uuid === effectiveTurnId`. + +**Risk:** Pre-existing transcripts have SDK-random uuids; one-time discontinuity. No callers depend on this yet (Phase 6.5 deferred). + +### Cycle E ✓ — Materialize + metadata coherence (M11 / C2 + I2 + I7 + I8) + +*Completed. See [Implementation Notes](#implementation-notes) for the resulting code changes.* + +The largest cycle. Unifying insight: provisional state, materialize-time config read, startup `Options`, and persisted session metadata all share the same data path. + +**Files:** [claudeAgent.ts](./claudeAgent.ts), [../../common/claudeSessionConfigKeys.ts](../../common/claudeSessionConfigKeys.ts), [../../test/node/claudeAgent.test.ts](../../test/node/claudeAgent.test.ts) + +- **E1.** Add `'dontAsk'` (and per Cycle A ratification, possibly `'auto'`) to `permissionMode` enum + label/description. +- **E2.** Add structural `config?: Record` field to `IClaudeProvisionalSession` (around line 92) so the `createSession` config bag actually survives until materialize. Currently dropped on the floor. +- **E3.** Persist `model` (and per Cycle A ratification, `permissionMode`/`effort`) to sidecar at create + materialize. Mirror CopilotAgent's pattern at [copilotAgent.ts:549-551](../copilot/copilotAgent.ts#L549-L551), [copilotAgent.ts:1532-1580](../copilot/copilotAgent.ts#L1532-L1580). Update `_writeSessionMetadata` and `_readSessionMetadata` accordingly. +- **E4.** At `_materializeProvisional`, **read from sidecar** for the latest config (not just from `provisional.config`). This matches CopilotAgent's pattern at [copilotAgent.ts:771](../copilot/copilotAgent.ts#L771) and handles the case where session config was edited after `createSession` but before first `sendMessage`. +- **E5.** Build `Options.model` from the resolved `CCAModel.id`. Verify SDK's `Options.model` accepts the CCAModel.id format (vs needing translation). Cite `sdk.d.ts:Options` for the type. +- **E6.** Replace hardcoded `permissionMode: 'default'` at line 449 with the resolved value. +- **E7.** Expand `_toAgentSessionMetadata` (lines 655–662) to include `model` field from sidecar (I8 fix). +- **E8** (optional, M11 invariant). Store `_currentModel`, `_currentPermissionMode`, `_currentEffort` on `ClaudeAgentSession` for "Restart preserves bijective state." Defer if `changeModel` (Phase 9) isn't shipping yet. + +**Acceptance:** First turn uses requested `model` + `permissionMode` + `effort` (not SDK default). Round-trip: `createSession({ model: 'claude-sonnet-4-5' })` → first turn runs that model → `listSessions`/`getSessionMetadata` (after Cycle D) reflect it. + +**Risk:** + +1. First-turn behavior change is user-visible (correct, but flag in PR). +2. Sidecar schema change requires migration story — set `model: undefined` for legacy sidecars; impl tolerates undefined. +3. Must verify `CCAModel.id` ↔ SDK `Options.model` format compatibility before landing. +4. Tests on the materialize `Options` shape break. + +### Cycle D ✓ — Catalog completeness (M12 / I3 + I4 + I5 + I6 + I9) + +**Files:** [claudeAgent.ts](./claudeAgent.ts), [../../test/node/claudeAgent.test.ts](../../test/node/claudeAgent.test.ts), [CONTEXT.md](./CONTEXT.md) + +- **D1.** ~~`getDescriptor.displayName: 'Claude Code'` (was `'Claude'`).~~ **No-op.** Re-reading CONTEXT.md shows `'Claude'` is already canonical; "Claude Code" is also forbidden as a user-facing brand. I3 was a misread of the doc; no code change needed. Cycle A's audit-row revision (in this Cycle D pass) corrects the audit table. +- **D2.** No-op per Cycle A ratification — the impl's 5 predicates are canonical; M12 documents all five with rationale. +- **D3.** Expand `toAgentModelInfo` with `configSchema` (synth from `CCAModel.capabilities.supports.{adaptive_thinking,min_thinking_budget,max_thinking_budget}` — Anthropic models expose adaptive-thinking capability rather than `supportedReasoningEfforts`; mirror CopilotAgent's `_createThinkingLevelConfigSchema` shape but source the enum from the 5-value `ClaudeEffortLevel` union when the model supports adaptive thinking), `policyState` (from `CCAModelPolicy.state`), `_meta.multiplierNumeric` (from `CCAModelBilling.multiplier`). +- **D4.** Implement `getSessionMetadata?(session)` using SDK top-level `getSessionInfo(sessionId)` (sdk.d.ts:581 — verified to exist) joined with the `_readSessionMetadata` sidecar reader from Cycle E. Add a thin wrapper to `IClaudeAgentSdkService` so the SDK module stays behind the service boundary. +- **D5.** ~~Filter `listSessions` to drop sessions without sidecar, mirroring CopilotAgent's pattern.~~ **No-op.** External Claude CLI sessions have no sidecar and MUST surface (Phase-5 exit criterion); the shipped code at [claudeAgent.ts:761-797](claudeAgent.ts#L761-L797) already does the right thing. The plan's directive was a misread of conformance — Copilot can drop because every Copilot session is born inside VS Code; Claude cannot drop because external CLI sessions are first-class. CONTEXT.md correction (line ~1888 + line ~2159 table cell) lands in this cycle. + +**Acceptance:** Models observable carries `configSchema` for adaptive-thinking models, `policyState`, and `_meta.multiplierNumeric`; `getSessionMetadata` returns metadata for known sessions with `model`/`customizationDirectory` populated from sidecar and `summary`/`cwd`/timestamps from SDK; `listSessions` continues to surface all SDK-known sessions (sidecar-less ones too). + +*Completed. See [Implementation Notes](#implementation-notes) for the resulting code changes.* + +**Risk:** + +1. ~~Verify `IClaudeAgentSdkService.getSessionInfo` exists (interface inspection).~~ Verified — sdk.d.ts:581 exposes `getSessionInfo(sessionId, options?)`. Service interface needs a one-line wrapper addition. +2. ~~Verify CCAModel exposes thinking-capability data.~~ Verified — `CCAModelSupports` exposes `min_thinking_budget`/`max_thinking_budget` (types.d.ts:208-214). No `supportedReasoningEfforts` field on Anthropic CCAModel rows; D3 must synth the 5-value `ClaudeEffortLevel` enum directly when the model supports adaptive thinking. +3. Tests on the model filter shape may break (D2). — *No code change in D2; ignore.* + +### Cycle F ✓ — Mapper widening: 'assistant' canonical (M8 / I1) + +*Completed. See [Implementation Notes](#implementation-notes) for the resulting code changes.* + +**Files:** [claudeMapSessionEvents.ts](./claudeMapSessionEvents.ts), [claudeAgentSession.ts](./claudeAgentSession.ts) (state shape only), [../../test/node/claudeMapSessionEvents.test.ts](../../test/node/claudeMapSessionEvents.test.ts) + +- Add `case 'assistant'` to `mapSDKMessageToAgentSignals` (line 54–66 switch). +- Implement final-canonical reconciliation: emit final response parts that override partial-accumulated content. Extend `IClaudeMapperState` to track per-block content if reconciliation requires it. +- Verify reducer ordering: `SessionResponsePart` MUST precede first delta for that part id (existing invariant). +- **Scope discipline:** limit to verified live-envelope drift; do NOT pre-commit to replay-style reconciliation since replay is still stubbed at [claudeAgent.ts:598-605](claudeAgent.ts#L598-L605). + +**Acceptance:** synthetic `SDKAssistantMessage` produces canonical response parts; deltas-followed-by-final test passes. + +**Risk:** UI render glitches if reconciliation order is wrong. Phase 6's `canUseTool: deny` keeps `'assistant'` messages text-only in practice — safer landing zone than Phase 7. Phase 7 directly depends on this fix. + +### Cycle G ✓ — Stale phase-plan + roadmap refresh (NIT, doc-only) + +**Files:** [phase5-plan.md](./phase5-plan.md), [phase6-plan.md](./phase6-plan.md), [roadmap.md](./roadmap.md) + +Drift surface verified by a 3-reviewer council pass (Councillor-GPT / Opus / Gemini) on roadmap.md vs CONTEXT.md M1–M13. All three converged tightly on the same findings. Synthesized findings live at `/memories/session/review.md`; this cycle is the place they land. + +#### G1 — Phase-plan stale fork narrative (original Cycle G scope) + +- Replace lazy-fork-time-lookup language in [phase5-plan.md](./phase5-plan.md) and [phase6-plan.md](./phase6-plan.md) with persisted-mapping language per the Phase 6.5 contract-based approach (already in [roadmap.md](./roadmap.md)). + +#### G2 — Roadmap drift fixes (added from council review) + +Each item below is a confirmed contradiction or staleness in [roadmap.md](./roadmap.md) relative to CONTEXT.md's locked M-mapping. Severity reflects implementation risk if a future phase consumes the roadmap as written. + +| # | Phase | roadmap.md says | CONTEXT.md says | Fix in roadmap.md | Severity | +|---|---|---|---|---|---| +| G2.1 | 9 steering | `Query.streamInput()` for mid-turn injection ([roadmap.md:687](./roadmap.md#L687)) | M10: zero callers of `streamInput` in reference; primitive is yielding `SDKUserMessage` with `priority: 'now'` into the existing prompt iterable ([CONTEXT.md:1180-1257](./CONTEXT.md#L1180-L1257)) | Replace `streamInput` language with prompt-iterable + `priority: 'now'` | HIGH | +| G2.2 | 9 changeModel | `Query.setModel()` only ([roadmap.md:689](./roadmap.md#L689)) | M11: bundle-atomic — `ModelSelection.id` + `config.effort` ⇒ `setModel` + `applyFlagSettings({ effortLevel })` with `'max' → 'xhigh'` clamp ([CONTEXT.md:1614-1665](./CONTEXT.md#L1614-L1665)) | Add effort fan-out, document the clamp | MED-HIGH | +| G2.3 | 11 customizations | `_pendingRestart` (restart-on-toggle) ([roadmap.md:735-737](./roadmap.md#L735-L737)) | M11: `reloadPlugins` is **defer-and-coalesce**, not restart ([CONTEXT.md:487](./CONTEXT.md#L487), [710](./CONTEXT.md#L710), [1544](./CONTEXT.md#L1544), [1585](./CONTEXT.md#L1585)) | Reclassify as defer-and-coalesce with `_pendingPluginReload` flag drained at next yield boundary | HIGH | +| G2.4 | 5 lifecycle | Allocate session + persist metadata immediately ([roadmap.md:407-424](./roadmap.md#L407-L424)) | M9: `IAgentCreateSessionResult.provisional`, `onDidMaterializeSession`, deferred `sessionAdded`. Provisional sessions own no SDK resources, no sidecar until materialization ([CONTEXT.md:920-1000](./CONTEXT.md#L920-L1000)) | Add provisional/materialize vocabulary; reference `onDidMaterializeSession` event; note no on-disk sidecar before materialization | MEDIUM | +| G2.5 | 5 listSessions | Generic SDK → IAgentSessionMetadata mapping ([roadmap.md:430](./roadmap.md#L430)) | M12: Claude does NOT drop sessions without sidecar (Copilot does); SHOULD implement `getSessionMetadata?` ([CONTEXT.md:1869-1878](./CONTEXT.md#L1869-L1878), [1904-1915](./CONTEXT.md#L1904-L1915), [2189-2212](./CONTEXT.md#L2189-L2212)) | Spell out the sidecar policy (best-effort enrichment, NOT filter), name `getSessionMetadata?` as in-scope for Phase 5 | MEDIUM | +| G2.6 | 7 tools | Generic permission/user-input wiring ([roadmap.md:632-651](./roadmap.md#L632-L651)) | M7: per-session `Map` cross-message attribution ([CONTEXT.md:503-512](./CONTEXT.md#L503-L512), [880](./CONTEXT.md#L880)). M2/M3: dual routing in `canUseTool` — arbitrary tools → `respondToPermissionRequest`, `INTERACTIVE_CLAUDE_TOOLS` (`'AskUserQuestion' \| 'ExitPlanMode'`) → `respondToUserInputRequest`; plus `Options.onElicitation` for MCP user input ([CONTEXT.md:597-612](./CONTEXT.md#L597-L612)) | Add the attribution map and the dual-routing requirement to Phase 7 scope | HIGH | +| G2.7 | 10 MCP | Per-query MCP recreation + yield-restart on tool diff ([roadmap.md:716-721](./roadmap.md#L716-L721)) | M11: `setMcpServers` is bijective (cheap runtime via `SDKControlMcpSetServersRequest`) ([CONTEXT.md:1488-1548](./CONTEXT.md#L1488-L1548)) | Distinguish in-process tool path (`createSdkMcpServer`) from external `setMcpServers` (runtime-mutable); reserve restart for the in-process diff case only | MEDIUM | +| G2.8 | 9–11 | Per-write paths described without taxonomy | M11 hot-swap / defer-and-coalesce / restart-required taxonomy ([CONTEXT.md:1576-1611](./CONTEXT.md#L1576-L1611)) | Adopt the M11 three-bucket taxonomy as the organizing framework for Phase 9/10/11 config-write paragraphs | MEDIUM (systemic) | +| G2.9 | 5/9/12 | `setPermissionMode` "internal SDK concern, not a protocol method" ([roadmap.md:691-694](./roadmap.md#L691-L694)) | M12: `permissionMode` is `sessionMutable: true` (M11 hot-swap, bijective) but the generic live-edit setter is TBD ([CONTEXT.md:1920-1935](./CONTEXT.md#L1920-L1935), [1958-1984](./CONTEXT.md#L1958-L1984)) | Acknowledge the protocol-evolution gap; note that until the generic setter lands, schema-mutable `permissionMode` round-trips as a `createSession` restart | LOW-MEDIUM | +| G2.10 | 6.5 fork | Correctly persisted-mapping + `Options.resume` ([roadmap.md:554-559](./roadmap.md#L554-L559)) | M9: also fires `onDidMaterializeSession` immediately because `forkSession` writes the session file synchronously ([CONTEXT.md:942-947](./CONTEXT.md#L942-L947)) | Add a one-line note that the fork path fires the materialize event eagerly (sketchy not wrong; documentation gap) | LOW | + +#### G3 — Compatible / no-change (council confirmed alignment) + +- Phase 13 "do not implement `truncateSession`" — aligned with M10. +- Phase 6 `abortSession` mechanism (`AbortController.abort()`, not `Query.interrupt()`) — aligned with M4. +- Phase 6.5 fork core flow — turn-id mapping + `Options.resume` aligned with M9 (apart from G2.10 documentation gap). + +**Acceptance:** roadmap.md and the two phase-plans agree with the M1–M13 portraits in CONTEXT.md. The 9 council-confirmed drift items above are resolved by edits on the roadmap side (none require CONTEXT.md changes; CONTEXT is already correct). + +**Risk:** None — doc-only. + +#### Implementation notes (Cycle G — what landed) + +Applied as in-place edits in this PR; no code changes. + +- **G1 (phase plans).** Rather than rewriting the bodies of two completed-phase handoff plans, added a *Status note (post-Phase 6.5 design — Phase 6.1 Cycle G)* block at the top of [phase5-plan.md](./phase5-plan.md) and [phase6-plan.md](./phase6-plan.md). The phase5-plan note covers two drifts: (a) the lazy-fork-time-lookup language (`getNextTurnEventId`, JSONL walks, `sdk.getSessionMessages`) is marked historical and superseded by Phase 6.5's persisted-mapping contract; (b) the §B5 4-value `permissionMode` enum example is marked superseded by the 6-value canonical expanded in Cycle E1. The phase6-plan note covers the same fork-language drift in §1's `createSession({ fork })` description. Body text is preserved as historical record; readers are pointed at [roadmap.md](./roadmap.md), [CONTEXT.md](./CONTEXT.md), and [`claudeSessionConfigKeys.ts`](../../common/claudeSessionConfigKeys.ts) for current contracts. +- **G2.1–G2.10 (roadmap.md).** All 10 drift items resolved by in-place edits to the relevant phase sections: + - Phase 5 lifecycle rewritten around `IAgentCreateSessionResult.provisional` / `onDidMaterializeSession` (G2.4); `listSessions` sidecar policy spelled out as best-effort enrichment, `getSessionMetadata?` named in scope (G2.5); `resolveSessionConfig` notes the M12 generic-live-edit-setter gap (G2.9 partial). + - Phase 6.5 "Architectural model" sub-section gained a one-line note that fork fires `onDidMaterializeSession` eagerly because `forkSession` writes the file synchronously (G2.10). + - Phase 7 gained the `Map` attribution model and the `INTERACTIVE_CLAUDE_TOOLS` dual-routing branch in `canUseTool` (G2.6). + - Phase 9 rewritten with M11 hot-swap / defer-and-coalesce / restart-required taxonomy as opening framing (G2.8 partial); steering replaced `Query.streamInput` with prompt-iterable `priority: 'now'` yield + `IAgentSteeringConsumedSignal` semantics (G2.1); `changeModel` made bundle-atomic with `setModel` + `applyFlagSettings({ effortLevel })` fan-out and the `'max' → 'xhigh'` clamp documented (G2.2); `setPermissionMode` paragraph notes the protocol-surface gap (G2.9). + - Phase 10 reorganized to distinguish in-process tools (`createSdkMcpServer` + `Options.mcpServers`, restart-required) from external MCP servers (`Query.setMcpServers`, hot-swap) (G2.7). + - Phase 11 reclassified `setCustomizationEnabled` from `_pendingRestart` to defer-and-coalesce via `_pendingPluginReload` + `Query.reloadPlugins`, with the tool-set-divergence case kept as the narrow restart-required fallback (G2.3). +- **G3.** No edits required; council confirmed alignment. + +All drift items are resolved by edits on the roadmap / phase-plan side. CONTEXT.md was treated as the truth oracle and is unchanged. + +## 4. Files to modify (consolidated) + +| File | Cycles | What | +|---|---|---| +| [CONTEXT.md](./CONTEXT.md) | A | Mapping doc fixes; status updates | +| [claudeAgent.ts](./claudeAgent.ts) | B, C, D, E | Auth (createSession + materialize); uuid; toAgentModelInfo expansion; getSessionMetadata; permissionMode 5/6-value; sidecar with model; Options.model + permissionMode seeding; provisional.config; _toAgentSessionMetadata.model. **D1 (displayName) and D5 (listSessions filter) reversed during Cycle D — no code change for those steps.** | +| [claudeAgentSession.ts](./claudeAgentSession.ts) | F (state), E (optional E8) | Mapper state for reconciliation; bijective state for restart preservation | +| [claudeMapSessionEvents.ts](./claudeMapSessionEvents.ts) | F | `'assistant'` case + reconciliation | +| [../../common/claudeSessionConfigKeys.ts](../../common/claudeSessionConfigKeys.ts) | E1 | permissionMode enum expansion | +| [phase5-plan.md](./phase5-plan.md), [phase6-plan.md](./phase6-plan.md), [roadmap.md](./roadmap.md) | G | Stale fork descriptions | +| [../../test/node/claudeAgent.test.ts](../../test/node/claudeAgent.test.ts), [../../test/node/claudeMapSessionEvents.test.ts](../../test/node/claudeMapSessionEvents.test.ts) | All | Test coverage per cycle | + +## 5. Consensus risks + +1. **Test breakage at auth boundary** — grep `'Claude proxy is not running'`; coordinate with workbench if any consumer matches the message. +2. **Transcript discontinuity at uuid fix** — sessions written before Cycle C have SDK-random uuids; accept one-time discontinuity. +3. **First-turn behavior change at startup-config seeding** — user-visible but correct; flag in PR description. +4. **Sidecar schema change** — must tolerate legacy sidecars without `model` field (default to undefined). +5. **CCAModel ↔ Options.model format compatibility** — verify before Cycle E5 landing; fallback is a model-id-mapping helper. +6. ~~**listSessions behavior change at D5** — sessions from other hosts disappear from the list. Document in PR.~~ **Withdrawn** — D5 reversed during Cycle D; no behavior change ships. +7. **`IClaudeAgentSdkService.getSessionInfo` interface** — Cycle D4 may need a one-line interface addition. +8. **Mapper reconciliation correctness** — Cycle F could double-emit text if not done carefully; test with synthetic messages. + +## 6. Verification + +**Per cycle** + +- Unit tests for the cycle's affected files. +- `npm run compile-check-ts-native`. +- `npm run valid-layers-check`. + +**After all cycles — live smoke** + +1. Authenticate. +2. `createSession({ config: { model: , permissionMode: 'plan' } })`. +3. First `sendMessage`. +4. Verify response renders. +5. Verify SDK Turn `uuid` matches protocol `turnId` in transcript. +6. Verify `listSessions` and `getSessionMetadata` return the chosen model. + +## 7. Provenance + +This plan was synthesized from a 3-councillor council-plan run (Opus, GPT, Gemini) with a debate pass. The fan-out produced three independent drift audits; the synthesis preserved findings with at least 2/3 agreement; the debate pass added 4 items the synthesis missed (C4, I7, I8, I9) and promoted Q1 from open question to definitive doc bug. The order `A → B → C → E → D → F → G` reflects the post-debate consensus. + +## Implementation Notes + +### Cycle A — doc corrections (completed) + +**Decisions taken** + +| Question | Choice | Rationale | +|---|---|---| +| Q1 / Q2 — how to handle 4× `setSessionConfigValues` references | **Option 2 (Forward-looking)** — keep `sessionMutable: true` on `permissionMode`; keep the same-schema-serves-both framing (`resolveSessionConfig` reads at both creation and post-creation, `IAgentCreateSessionConfig.config` writes at creation); mark the generic post-creation setter (working name: `setSessionConfigValues`) as a TBD protocol surface; require any `sessionMutable: true` property without a covering `changeModel`/`setCustomizationEnabled` path to round-trip via `createSession` until the generic setter lands. | The schema's mutability flag is the read-side contract — `permissionMode` has a bijective SDK setter (`Query.setPermissionMode`), so it's correctly marked. The doc bug was citing a method that doesn't exist; the doc fix is to mark it TBD without erasing the design intent. | +| I4 — `isClaudeModel` predicate count | **Option A** — document all 5 predicates in M12 with rationale. | Impl extras (`tool_calls`, `tryParseClaudeModelId`) have JSDoc-documented reasons (reference-extension parity, exclude synthetic ids like `'auto'`). Narrowing the impl would be a regression. No code change needed in Cycle D2. | +| I2 — does `'auto'` belong in M11 enum? | **Option Z** — include `'auto'`; enum is 6 values matching the SDK's `PermissionMode` type. | Surfacing the SDK's full enum on the IAgent surface lets the client UI reach the model-classifier-driven approval mode without a future schema bump. Cycle E1 expands the enum from 4 → 6. | + +**Files changed** + +- [CONTEXT.md](./CONTEXT.md) — 5 edits applied via `multi_replace_string_in_file`, then 4 of them revised after reviewer pushback on the Q1/Q2 framing: + 1. M12 `isClaudeModel` filter prose (around line 1786) — `three predicates AND'd together` → 5 predicates with per-predicate rationale, citing impl JSDoc and reference-extension parity. *(Final.)* + 2. M12 `permissionMode` bullet in `resolveSessionConfig` properties list (around line 1975) — documents the full 6-value SDK enum (including `'auto'`) and re-affirms `sessionMutable: true` (M11 hot-swap; `Query.setPermissionMode()` is bijective). *(Revised after pushback.)* + 3. M12 `Same schema serves creation *and* post-creation display` subsection (around line 1996) — `resolveSessionConfig` is the read surface for both phases; `IAgentCreateSessionConfig.config` is the creation-time write bag; the generic post-creation setter is TBD; live edits to `sessionMutable: true` properties not covered by `changeModel`/`setCustomizationEnabled` round-trip via `createSession` until the generic setter lands. *(Revised after pushback.)* + 4. M12 `resolveSessionConfig is the schema source for runtime mutations too` invariant (around line 2192) — schema and implementation must agree (matching M11 hot-swap path required for any `sessionMutable: true` property); the future generic live-edit setter is the protocol conduit; restart-via-`createSession` is the interim fallback. *(Revised after pushback.)* + 5. M13 `AHP_AUTH_REQUIRED throw` row (around line 2247) — `— see open mapping question below` → `— to be corrected in Phase 6.1 Cycle B (see [phase6.1-plan.md](phase6.1-plan.md))`. Eliminates the dangling reference. *(Final.)* + 6. M12 `sessionConfigCompletions` prose (around line 2076) — `static five-value enum` → `static six-value enum` to match the I2 ratification. *(Added in revision pass.)* + +**Verification** + +- `grep setSessionConfigValues CONTEXT.md` — 4 matches remaining, all correctly framed as the **TBD generic post-creation setter** per Option 2. No prescriptive citations of a non-existent shipped method. +- `grep "five-value\|five values" CONTEXT.md` — 0 matches. All `permissionMode` enum mentions reflect the 6-value ratification. +- `grep "see open mapping question below" CONTEXT.md` — 0 matches. Dangling reference resolved. + +**Deviations from plan** + +- The first pass of edits picked Option 1 (Strict). Reviewer pushed back on Q1's framing: `IAgentCreateSessionConfig.config` is the creation-time write, `resolveSessionConfig` is the read surface for both creation and post-creation — not "creation-only." Three of the five edits were re-done to land on Option 2 (Forward-looking) instead. Cycle A's net behaviour is therefore: doc fixes the broken `setSessionConfigValues` citations by labelling them TBD, preserves the design intent of `sessionMutable: true` on `permissionMode`, and surfaces `'auto'` per I2. + +**Notes for downstream cycles** + +- **Cycle E1** must add **two** values to the `permissionMode` enum (`'dontAsk'` and `'auto'`), not one. Final enum is 6 values matching SDK `PermissionMode` type. +- **Cycle D2** is now a no-op for `isClaudeModel` — the impl's 5 predicates are ratified as canonical. Still need D1 (displayName), D3 (toAgentModelInfo), D4 (getSessionMetadata), D5 (listSessions filter). +- **Cycle E** must mark `permissionMode` as `sessionMutable: true` in the `resolveSessionConfig` schema. Until the generic live-edit setter lands as a protocol surface, the implementation may either (a) implement `permissionMode` edits via a Claude-specific path, or (b) leave the live edit unsupported with the round-trip-via-`createSession` fallback documented for clients. Either is acceptable for Phase 6.1; designing the generic setter is out of scope. + +### Cycle B — auth conformance C3 + C4 (completed) + +**Code changes** + +- [claudeAgent.ts](./claudeAgent.ts): + - Added `import { AHP_AUTH_REQUIRED, ProtocolError } from '../../common/state/sessionProtocol.js';`. + - Introduced shared private helper `_ensureAuthenticated(): IClaudeProxyHandle` — returns the live `_proxyHandle` or throws `ProtocolError(AHP_AUTH_REQUIRED, 'Authentication is required to use Claude', this.getProtectedResources())`. + - **C4**: `createSession` calls `this._ensureAuthenticated()` at the head (before fork-branch), so a pre-auth `createSession` is rejected as `AHP_AUTH_REQUIRED` with the protected-resources hint instead of silently minting a provisional record. + - **C3**: `_materializeProvisional` replaces the plain `Error('Claude proxy is not running...')` with `const proxyHandle = this._ensureAuthenticated();`. The materialize path is now defense-in-depth (C4 makes the bare-public-API path unreachable), but stays in place for completeness and to keep both call sites symmetrical. + +**Tests** + +- New test (passes): `'createSession before authenticate throws ProtocolError(AHP_AUTH_REQUIRED) with protected resources'`. Verifies `code === AHP_AUTH_REQUIRED`, `message === 'Authentication is required to use Claude'`, and `data` is the `getProtectedResources()` array. +- Six pre-existing `createSession`-using tests updated to call `await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok')` first (lines 922, 958, 978, 1851, 1878, 2074 in `claudeAgent.test.ts`). The fork-rejection test still asserts `/Phase 6\.5/` because auth now succeeds before the fork branch is reached. +- Test count: **46 pass / 0 fail** (was 41 → +1 new + 6 updated +0 broken). Integration test (`claudeAgent.integrationTest.ts`) was already authenticating first; no change required. + +**Verification** + +- `grep "Claude proxy is not running"` across the workspace — 0 hits in non-plan code, confirming no workbench-side handler depends on the old error message. +- `get_errors` on both touched files — 0 errors. + +**Deviations from plan** + +- Plan called for guards inline at each call site. Implementation extracted them into a shared `_ensureAuthenticated()` helper to (a) keep both error throws byte-identical and (b) return the handle so `_materializeProvisional` can keep its narrowed `IClaudeProxyHandle` type without a non-null assertion. Net behaviour identical; readability improved. + +**Notes for downstream cycles** + +- **Cycle E** (and any future caller of `_proxyHandle` outside `authenticate`/`shutdown`) should funnel through `_ensureAuthenticated()` to inherit the auth-conformance error contract for free. +- **Cycle G2** (workbench surfacing) — verify the workbench's `AgentService` already maps `ProtocolError(AHP_AUTH_REQUIRED, …, [resources])` to its sign-in flow (CopilotAgent has the matching shape at `copilotAgent.ts:384`); no Claude-specific surfacing work expected. + +### Cycle C — Send-seam uuid C1 (completed) + +**Code changes** + +- [claudeAgent.ts](./claudeAgent.ts) at the `sendMessage` SDK-prompt-literal site (lines ~787–795 post-edit): added `uuid: effectiveTurnId as \`${string}-${string}-${string}-${string}-${string}\`` to the `SDKUserMessage` literal. The brand cast at the boundary mirrors the reference extension at [`claudeCodeAgent.ts:585`](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeAgent.ts#L585) and the doc snippet at [CONTEXT.md:1272](./CONTEXT.md#L1272). One-line JSDoc cites the M1 / Glossary invariant (`Turn.id ↔ SDKUserMessage.uuid`) so future readers don't unwind the brand cast as cosmetic. + +**Tests** + +- New test (passes): `'sendMessage tags SDKUserMessage.uuid with the effective turn id (M1 / Turn.id ↔ uuid invariant)'`. Stages a single turn with explicit `turnId: 'turn-explicit'`, drains the prompt iterable, asserts `drained[0].uuid === 'turn-explicit'`. Test-suite count: **47 pass / 0 fail** (was 46 → +1 new). +- **Scope decision:** the originally drafted version of the test asserted both the explicit-turnId path AND the `turnId ?? generateUuid()` fallback path across two turns. The two-turn shape requires `queryAdvance` gating to keep the SDK transcript aligned per turn (mirroring the test at line 1100), which expanded the test outside Cycle C's scope. Reduced to a single-turn assertion — the fallback path is structurally trivial (`turnId ?? generateUuid()`, one line) and already exercised by every other `sendMessage` test in the suite that omits the `turnId` argument. + +**Verification** + +- TDD red-green-refactor: red test failed with `uuid: undefined` (matching the C1 finding); green after one-line fix. +- `get_errors` on both touched files — 0 errors. + +**Deviations from plan** + +- None. Plan called for `as branded` with the SDK's `\`${string}-${string}-${string}-${string}-${string}\`` shape; that's exactly what landed. + +**Notes for downstream cycles** + +- **Phase 6.5 fork** (deferred) can now treat `SDKUserMessage.uuid` as authoritative for turn-id lookup; the persisted-mapping-table contract from `roadmap.md` populates from this same value. +- **Phase 13 replay** (deferred) similarly relies on this invariant — `SDKUserMessageReplay.uuid` (whose type makes `uuid` required, not optional) will round-trip through the same id. + +### Cycle E — Materialize + metadata coherence C2 + I2 + I7 + I8 (completed) + +**Code changes** + +- [../../common/claudeSessionConfigKeys.ts](../../common/claudeSessionConfigKeys.ts) — `ClaudePermissionMode` union widened from 4 to 6 values (`'default' | 'acceptEdits' | 'bypassPermissions' | 'plan' | 'dontAsk' | 'auto'`); JSDoc updated to cite `sdk.d.ts:1560` and the I2 ratification. +- [claudeAgent.ts](./claudeAgent.ts): + - **E1** — `resolveSessionConfig` `permissionMode` schema gains `'dontAsk'` + `'auto'` enum values, with matching localized `enumLabels` + `enumDescriptions`. + - **E2** — `IClaudeProvisionalSession` extended with `readonly model: ModelSelection | undefined` and `readonly config: Record | undefined`. `createSession` now seeds both fields from the inbound `IAgentCreateSessionConfig` instead of dropping them. + - **E5/E6** — materialize site builds `Options.model = provisional.model?.id` and `Options.permissionMode = this._resolvePermissionMode(provisional.config)` (was hardcoded `'default'` with no `model`). The provisional record is the source of truth for these values at materialize — see deviations below for why E4's sidecar-priority read was reverted. + - **E3** — refactored `_writeCustomizationDirectory(session, workingDirectory)` → `_writeSessionMetadata(session, { customizationDirectory?, model?, permissionMode? })`. Single `openDatabase` ref, `Promise.all` batched writes, only-write-on-defined; mirrors CopilotAgent's `_storeSessionMetadata` shape (`copilotAgent.ts:1532`). + - **E3 read-side** — added `_readSessionMetadata(session): Promise<{ customizationDirectory?, model?, permissionMode? }>` mirroring `copilotAgent.ts:1559`. Uses `tryOpenDatabase` so missing DB is not an error. + - **E3 helpers** — added `_serializeModelSelection` (JSON.stringify) and `_parseModelSelection` (object-shape narrowing with legacy plain-string fallback) mirroring `copilotAgent.ts:492-522`. Added `_narrowPermissionMode` for the read-side type guard. Added `_resolvePermissionMode(config)` for the create-time bag → SDK enum coercion (used at materialize to resolve the `provisional.config` bag). + - **E4** — *not implemented*. See deviations below. + - **E7** — `_toAgentSessionMetadata` overlay arg widened to `{ customizationDirectory?, model? }` and emits `model` on `IAgentSessionMetadata`. `listSessions` overlay reader replaced with a single `_readSessionMetadata` call; the per-key `tryOpenDatabase` + `getMetadata` block is gone (deduplicated through the new helper). + - Added private `_META_MODEL = 'claude.model'` and `_META_PERMISSION_MODE = 'claude.permissionMode'` constants alongside the existing `_META_CUSTOMIZATION_DIRECTORY`. + +**Tests** + +Two new tests added to [`claudeAgent.test.ts`](../../test/node/claudeAgent.test.ts), one pre-existing test updated: + +- Updated: `'resolveSessionConfig returns Claude-native permissionMode + reused Permissions schema'` (line ~2208) — `permissionModeEnum` snapshot expanded from 4 → 6 values; JSDoc updated to cite `sdk.d.ts:1560` and the Cycle A ratification. +- New (E2/E5/E6): `'createSession config.model + config.config.permissionMode flow into Options on first send (M11 / Phase 6.1 C2)'` — round-trip test asserting `Options.model === created-time model.id` and `Options.permissionMode === created-time permissionMode` after the first `sendMessage`. +- New (E3/E7): `'createSession.model round-trips through the per-session DB to listSessions[].model (Phase 6.1 I8 + I7 + C2)'` — full round-trip: `createSession({ model })` → `sendMessage` materializes (writes sidecar via `_writeSessionMetadata`) → `sdk.sessionList = [...]` → `listSessions` surfaces `model` on the entry's `IAgentSessionMetadata`. + +**Effort addendum** — `Options.effort` wired separately after the model + permissionMode work landed: + +- New file [../../common/claudeModelConfig.ts](../../common/claudeModelConfig.ts): + - `CLAUDE_THINKING_LEVEL_KEY = 'thinkingLevel'` — the `ModelSelection.config` sub-key carrying the picker's effort pick. Mirrors CopilotAgent's `ThinkingLevelConfigKey` (`copilotAgent.ts:83`) so a single picker contract spans both providers. + - `ClaudeEffortLevel` type — hand-rolled 5-value union (`'low' | 'medium' | 'high' | 'xhigh' | 'max'`) structurally identical to the SDK's `EffortLevel` (sdk.d.ts:443) but defined in `common/` so the layer stays SDK-free. + - `resolveClaudeEffort(model)` — pure narrowing function mirroring CopilotAgent's `_getReasoningEffort` (`copilotAgent.ts:487`). Pulled out of `ClaudeAgent` so the narrow can be exercised directly without standing up the full agent fixture. +- [claudeAgent.ts](./claudeAgent.ts) — materialize site sets `Options.effort = resolveClaudeEffort(provisional.model)`. No private `_resolveEffort` method, no `_CLAUDE_THINKING_LEVEL_KEY` constant on the class, no `EffortLevel` SDK import — the structural assignment from `ClaudeEffortLevel` to `Options.effort` is checked by TS without a cast. +- **Source of truth**: effort lives inside `ModelSelection.config.thinkingLevel` (a model-config sub-key), **not** as a top-level Claude session-config key. This mirrors CopilotAgent exactly and means the effort value piggybacks on the existing model serialize/parse + sidecar round-trip — no new `_META_EFFORT` constant, no new `_writeSessionMetadata` field, no new `resolveSessionConfig` schema property required. +- **Enum width at Options seam vs hot-swap seam**: `Options.effort` (sdk.d.ts:1214) accepts the full 5-value `EffortLevel` union. The live hot-swap path `applyFlagSettings({ effortLevel })` (sdk.d.ts:4292) only accepts a 4-value subset that omits `'max'`; that clamp lives in Phase 9, not here. +- New tests: + - [claudeModelConfig.test.ts](../../test/common/claudeModelConfig.test.ts) — focused unit tests on the extracted `resolveClaudeEffort` helper. One test covers all 5 accepted strings → SDK enum values; one covers the 5 degrade-to-`undefined` failure modes (no model, no config bag, empty config bag, unrelated config key, unrecognized value). + - [claudeAgent.test.ts](../../test/node/claudeAgent.test.ts): `'createSession model.config.thinkingLevel flows into Options.effort on first send (M11 / Phase 6.1 C2)'` — full round-trip wiring assertion that `Options.effort === 'high'` after `createSession({ model: { id, config: { thinkingLevel: 'high' } } })` + `sendMessage`. + +Test-suite count: **52 pass / 0 fail** (was 47 → +5 new + 1 updated). + +**Verification** + +- TDD red-green per micro-step (E1 → E2/E5/E6 → E3/E7 → E4); each step's RED-flip was proven by a failing test, then GREEN-flipped by the minimal impl change. Full-suite re-run after each step confirmed no other tests broke. +- `get_errors` on both modified files at each step — 0 errors. + +**Deviations from plan** + +- Plan listed E1 → E7 as one cycle; impl decomposed into 4 micro-steps (E1; E2/E5/E6; E3/E7; E4) for cleaner red-green cycles. Net behaviour identical except for E4 (see next bullet). +- **E4 (sidecar-priority read at materialize) reverted after a subagent investigation of CopilotAgent.** The plan called for `_materializeProvisional` to read the sidecar via `_readSessionMetadata` and prefer those values over `provisional.*`, citing `copilotAgent.ts:771` as the matching pattern. Investigation showed that line is the *provisional initialization site*, not a sidecar read — CopilotAgent's actual materialize-time pattern (`copilotAgent.ts:777-783`) reads `provisional.model` directly and reads the *live* session config via `IAgentConfigurationService.getSessionConfigValues(sessionUri)`, not from the per-session DB. The DB sidecar in CopilotAgent is for durability across restarts (`listSessions`, `_resumeSession`), not as the materialize-time source of truth. In Phase 6.1, ClaudeAgent has no `changeModel` (deferred to Phase 9 per E8) and no `IAgentConfigurationService` integration (deferred to Phase 7), so there is **no mutation channel** between `createSession` and the first `sendMessage` that could write to the sidecar; an "E4 sidecar wins" branch would be dead code at this phase and would diverge from CopilotAgent's pattern. **Resolution:** materialize uses `provisional.model?.id` and `_resolvePermissionMode(provisional.config)` directly; sidecar persistence (E3) and `_readSessionMetadata` are kept because `listSessions` (E7) consumes the latter. The mid-stream live re-read lands in Phase 7 (config) and Phase 9 (`changeModel`), each through its own production-correct channel. +- Plan called the write helper `_writeSessionMetadata` (singular) and the read helper `_readSessionMetadata`. CopilotAgent's reference uses `_storeSessionMetadata` for the write. We chose `_writeSessionMetadata` for symmetry with the read helper and the plan's wording. +- Plan E5 risk-flagged `CCAModel.id ↔ Options.model` format compatibility ("verify before landing; fallback is a model-id-mapping helper"). Verified: SDK `Options.model` is `string` (sdk.d.ts:1289), and CCAModel.id is also a flat string identifier — no translation needed. The brand cast at the persist boundary (`_serializeModelSelection`) is JSON-only, not format-translation. +- Plan E8 (store `_currentModel`/`_currentPermissionMode`/`_currentEffort` on `ClaudeAgentSession` for "Restart preserves bijective state") deferred to Phase 9 (`changeModel`) per the plan's own "(optional, M11 invariant). … Defer if `changeModel` (Phase 9) isn't shipping yet." +- The plan's Cycle A note "**Cycle E** must mark `permissionMode` as `sessionMutable: true`" — `sessionMutable: true` was already present on `permissionMode` in the schema before this cycle (verified during E1). No additional change needed. + +**Notes for downstream cycles** + +- **Cycle D's `getSessionMetadata?(session)`** consumes `_readSessionMetadata` directly — the plan's D4 ("joined with the sidecar reader from Cycle E") references this helper. Cycle D should compose `_readSessionMetadata(session)` with `IClaudeAgentSdkService.getSessionInfo(id)` and pipe the result through `_toAgentSessionMetadata`. +- **Phase 7 live-config re-read** is the correct channel for mid-stream `permissionMode` (and any other mutable session-config) edits. Mirror CopilotAgent's `_materializeProvisional` pattern (`copilotAgent.ts:777-783`): inject `IAgentConfigurationService` and call `getSessionConfigValues(sessionUri.toString())` at materialize. The sidecar persist on materialize stays (so post-restart cold reads via `listSessions`/`getSessionMetadata` still work); the live read **replaces** the materialize-time read of `provisional.config`, it does **not** stack with a sidecar read. +- **Phase 9 `changeModel`** is the correct channel for live model edits. Mirror CopilotAgent's `changeModel` (`copilotAgent.ts:1212-1224`): mutate `provisional.model` while still provisional, RPC the live SDK after materialize, and **then** persist via `_writeSessionMetadata` so the sidecar reflects the new state for the next cold read. +- **Migration story (E3 risk #2)**: legacy sidecars written before Cycle E only have `claude.customizationDirectory`. `_readSessionMetadata` returns `model: undefined` and `permissionMode: undefined` for those rows — `listSessions` surfaces `model: undefined` on those entries (acceptable; the SDK row still appears) and the `_parseModelSelection` legacy-plain-string branch covers any older schema attempts. +- **Cycle G (stale phase-plan refresh)**: must update `phase5-plan.md`/`phase6-plan.md` references to the 4-value `permissionMode` enum to the 6-value canonical. + +### Cycle D — Catalog completeness M12 / I3 + I4 + I5 + I6 + I9 (completed) + +**Code changes** + +- [../../common/claudeModelConfig.ts](../../common/claudeModelConfig.ts): + - **D3 helper** — added `createClaudeThinkingLevelSchema(supportedEfforts: readonly ClaudeEffortLevel[])` returning a `ConfigSchema` whose `thinkingLevel.enum` is sourced from each model's own `reasoning_effort` list (different Claude models support different subsets, e.g. `['low','medium','high']`, `['high']`, `[]`). Mirror of CopilotAgent's `_createThinkingLevelConfigSchema(supportedReasoningEfforts, defaultReasoningEffort)` at copilotAgent.ts:457 — same per-model variation, same matching-`enumLabels` shape. Returns `undefined` for an empty list so the picker renders no thinkingLevel control for that model. Also exports `isClaudeEffortLevel(value): value is ClaudeEffortLevel` for callers that need to narrow runtime strings into the SDK's `EffortLevel` shape; `claudeAgent.ts` filters the per-model `reasoning_effort` array through it. +- [claudeAgent.ts](./claudeAgent.ts): + - **D3** — `toAgentModelInfo(m, provider)` expanded with three optional fields: + - `configSchema = createClaudeThinkingLevelSchema(supportedEfforts)` where `supportedEfforts = (supports.reasoning_effort ?? []).filter(isClaudeEffortLevel)`. Reads the per-model list off the runtime CAPI `/models` payload (which already carries `reasoning_effort: string[]` and `adaptive_thinking: boolean` on `capabilities.supports`) by narrowing through a local `IClaudeModelSupports` type — the published `@vscode/copilot-api` types don't yet declare these fields, tracked at [microsoft/vscode-capi#85](https://github.com/microsoft/vscode-capi/issues/85). Same pattern the extension already uses at [`extensions/copilot/src/platform/endpoint/common/endpointProvider.ts`](../../../../../../extensions/copilot/src/platform/endpoint/common/endpointProvider.ts) (its locally-declared `IChatModelCapabilities`). Drop the `IClaudeModelSupports` augmentation when the SDK catches up. + - `policyState = m.policy?.state as PolicyState | undefined` — propagates `CCAModelPolicy.state` straight through to clients so the picker can grey-out disabled rows. + - `_meta = { multiplierNumeric: m.billing?.multiplier }` (only when `multiplier` is a number) — surfaces `CCAModelBilling.multiplier` under the `_meta` side-channel slot per `IAgentModelInfo._meta` (`agentService.ts:271`). Spread-only-when-defined so non-multiplied models stay clean. + - **D4** — added `getSessionMetadata(session: URI): Promise` mirroring `IAgent.getSessionMetadata?` (`agentService.ts:477`). Composes `_sdkService.getSessionInfo(id)` (SDK is the source of truth for existence) with `_readSessionMetadata(session)` (overlay), pipes through `_toAgentSessionMetadata`. Crucially **does NOT** gate on the sidecar — external Claude CLI sessions have no DB but must hydrate (Phase-5 exit criterion + the D5 reversal). Overlay read failures are caught and logged so a single corrupt DB cannot lose the SDK-supplied summary/cwd; SDK lookup failures propagate so the caller learns the SDK module is broken. + - **D1, D2, D5** — no-op (rationale captured in the audit-row strikethroughs at lines 47/53 and the cycle-step bullets D1/D5). +- [claudeAgentSdkService.ts](./claudeAgentSdkService.ts): + - Added `getSessionInfo(sessionId): Promise` to the `IClaudeAgentSdkService` interface and the `IClaudeSdkBindings` structural slice, plus the production passthrough on `ClaudeAgentSdkService` (one-line `await this._getSdk(); return sdk.getSessionInfo(sessionId)`). Keeps the SDK module behind the service boundary so the agent's `getSessionMetadata` doesn't reach into `@anthropic-ai/claude-agent-sdk` directly. + +**Tests** + +Two new tests added to [`claudeAgent.test.ts`](../../test/node/claudeAgent.test.ts), one pre-existing test updated, two test doubles extended, plus a `makeSupports` test helper: + +- Updated: `'authenticate populates models filtered to Claude family'` — model snapshot expanded to include `policyState: 'enabled'` and `_meta: { multiplierNumeric: 1 }` from the default `makeModel` fixture (`policy.state` + `billing.multiplier` were already on the fixture; D3 makes them visible on `IAgentModelInfo`). +- New (D3): `'authenticate sources configSchema enum from each model\'s reasoning_effort list (Phase 6.1 / Cycle D3 / I5)'` — five-model fixture exercising every per-model variation in one assertion: full `['low','medium','high']`, single-value `['high']`, empty `[]` (no schema), unknown values filtered out (`['low','bogus','high']` → `['low','high']`), and missing field (no schema). Snapshot is `{ modelId: configSchema | undefined }` so the per-model variation is the contract. +- New (D4): `'getSessionMetadata joins SDK info with sidecar overlay, returns SDK-only fields for external sessions, and undefined for unknown ids (Phase 6.1 / Cycle D4 / I7)'` — three-call assertion in one test (sidecar + external + unknown) snapshotting the joined output, plus the SDK-lookup-call list to lock the call shape. +- `makeSupports({ adaptive_thinking, reasoning_effort })` test helper — narrows the augmented runtime shape through one widening cast at the test boundary (mirror of the prod-side `IClaudeModelSupports`); tests then build per-model fixtures via spread without escape-hatch casts at every fixture site. +- `FakeClaudeAgentSdkService.getSessionInfo` — searches `sessionList` by id by default; `getSessionInfoOverride` hook for "session moved off disk" tests; `getSessionInfoCalls` records ids for call-shape assertions. +- `ProxyRoundTripSdkService` (integration test) — added `getSessionInfo(): Promise` stub to satisfy the interface; the integration suite never exercises the lookup so the stub is unconditional. +- Test-suite count: **54 pass / 0 fail** (was 52 → +2 new + 1 updated; `claudeModelConfig.test.ts` still 2/2). + +**Deviation note (D3 — corrected mid-cycle)** + +The first D3 implementation used a static 5-value `ClaudeEffortLevel` union for *any* adaptive-thinking model, gated on the presence of `min_thinking_budget` + `max_thinking_budget` on `CCAModelSupports`. That was wrong: per-model variation is the contract. Different Claude models expose different effort subsets at runtime (the extension's [`pickReasoningEffort`](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeModels.ts) at line 208 reads `endpoint.supportsReasoningEffort` per-endpoint, e.g. `['low','medium','high']`, `['high']`, or `[]`). The corrected approach reads the runtime `reasoning_effort` field via a local `IClaudeModelSupports` type augmentation at the read boundary; SDK-type drift tracked at [microsoft/vscode-capi#85](https://github.com/microsoft/vscode-capi/issues/85). + +**Verification** + +- TDD red-green per micro-step (D3 → D4); each step's RED-flip was proven by a failing test, then GREEN-flipped by the minimal impl change. Full-suite re-run after each step confirmed no other tests broke. +- `get_errors` on all modified files at each step — 0 errors. + +**Deviations from plan** + +- **D3 source for the thinking-level enum.** Plan said "synth from `CCAModel.capabilities.supports.{adaptive_thinking,min_thinking_budget,max_thinking_budget}`". The shipped CCAModel typedef (types.d.ts:207–208) has no `adaptive_thinking` field — only the two budget bounds. Resolution: detect adaptive thinking via `min_thinking_budget !== undefined && max_thinking_budget !== undefined`, source the enum from the static `ClaudeEffortLevel` union (the picker-emit side already uses it; the materialize-narrow side already accepts it). Diverges from CopilotAgent's `supportedReasoningEfforts`-driven shape but is the only correct path for Anthropic models. +- **D4 sidecar-gated vs SDK-gated existence check.** CopilotAgent's `getSessionMetadata` (`copilotAgent.ts:560-590`) returns `undefined` when there's no stored sidecar — every Copilot session is born inside VS Code, so the sidecar is a sound presence test. Claude cannot inherit that pattern: the same external-CLI carve-out that drives the listSessions D5 reversal also applies here. Resolution: use `_sdkService.getSessionInfo(sessionId)` as the existence test (SDK miss ⇒ undefined) and treat the overlay as decoration only. +- **No `IAgentModelInfo` interface change.** The three new fields (`configSchema`, `policyState`, `_meta`) were already optional on the interface (`agentService.ts:269-271`); D3 only fills them in for the Claude provider. CopilotAgent's `_listModels` already surfaces all three (`copilotAgent.ts:600-610`). + +**Notes for downstream cycles** + +- **Phase 9 `changeModel`** can use the same `createClaudeThinkingLevelSchema` helper to advertise the picker contract on a per-model basis after live model swaps; no new helper needed. +- **`_meta` schema convention**: D3 introduces the `multiplierNumeric` key under Claude's `_meta`. Matches CopilotAgent's existing `_meta.multiplierNumeric` (verified at `copilotAgent.ts:608`); clients reading either provider's models can index this slot uniformly. + +### Cycle D follow-ups (post-D, pre-F) + +Three small drift-free extensions to D3 landed between Cycle D close and Cycle F start, all within Cycle D's already-listed file scope. Captured here so the plan doesn't lie about what shipped. + +- **Direct unit tests for the D3 helpers.** Added [`../../test/common/claudeModelConfig.test.ts`](../../test/common/claudeModelConfig.test.ts) covering `resolveClaudeEffort` (2 tests), `isClaudeEffortLevel` (1), and `createClaudeThinkingLevelSchema` (3 — variation snapshot, default-rule, aliasing safety). 5 tests total. The shipped `claudeAgent.test.ts` already exercises the helpers end-to-end through `toAgentModelInfo`, but direct tests pin the picker-emit / materialize-narrow contract independent of the agent boundary. +- **Default thinking level rule.** `createClaudeThinkingLevelSchema` now emits `default: 'high'` when (and only when) `'high'` is in the model's `supportedEfforts` list; no default otherwise. Mirrors the extension's canonical rule at [`claudeCodeModels.ts:230`](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeModels.ts#L230). Surfaces through `IConfigSchema.properties[key].default` so the JSON-schema-driven picker pre-selects 'high' on a fresh session for any model that supports it. +- **Option A — sort `is_chat_default` models first.** [`claudeAgent.ts`](./claudeAgent.ts) `_refreshModels` now stable-sorts `all.filter(isClaudeModel)` by `is_chat_default` descending before mapping through `toAgentModelInfo`. The `IAgentModelInfo` protocol carries no `isDefault` bit; the workbench picker uses `models[0]` as the de-facto default at [`modelPicker.ts:144`](../../../../../sessions/contrib/copilotChatSessions/browser/modelPicker.ts#L144) (`_selectedModel ?? models[0]`). Stable comparator (returns 0 on ties) preserves CAPI's relative ordering among non-default models. Test added: `'authenticate surfaces the CAPI chat-default model first; ties preserve insertion order'`. **No protocol change** to `IAgentModelInfo`. +- **Test count after follow-ups:** **59 pass / 0 fail** (`claudeAgent.test.ts`: 54 → 55; `claudeModelConfig.test.ts`: 0 → 5; `claudeAgent.integrationTest.ts`: unchanged). + +### Cycle F — Mapper widening: 'assistant' canonical M8 / I1 (completed) + +**Code changes** + +- [claudeMapSessionEvents.ts](./claudeMapSessionEvents.ts): + - Added `case 'assistant':` to the top-level `mapSDKMessageToAgentSignals` switch, dispatching to a new private `mapAssistantCanonical(message, logService)` helper. + - `mapAssistantCanonical` deliberately returns `[]` for text/thinking content. The reducer at [`reducers.ts:338-356`](../../common/state/protocol/reducers.ts#L338-L356) is **append-only** — there is no `SessionResponsePart` replacement action. Because `Options.includePartialMessages: true` (Phase 6 §3.4) drives the partials to produce the same content the canonical message carries, re-emitting on the canonical envelope would duplicate, not reconcile, the activeTurn `responseParts` list. + - Defense-in-depth: scans `message.message.content` for `tool_use` blocks and warns each one with the `id`/`name`. Mirrors the existing `content_block_start` `tool_use` warn-and-drop at [claudeMapSessionEvents.ts:163-167](claudeMapSessionEvents.ts#L163-L167). Both warns lift in Phase 7 once tool calls are wired through. + - JSDoc on the new helper documents M8:875 ("partials advisory; canonical authoritative") and explains why the only correct action under append-only-reducer + `includePartialMessages: true` is "drop". + +**Tests** + +Two new tests in [`claudeAgent.test.ts`](../../test/node/claudeAgent.test.ts), one new builder, one extension to `createTestContext`, and a small log-capturing fake: + +- New helper: `makeAssistantMessage(sessionId, content)` builds an `SDKAssistantMessage` envelope (`type: 'assistant'`) with the SDK's full `Anthropic.Messages.Message` shape. Sits next to the existing `make*` builders. Uses `'fake-assistant-id'` etc. as static fixtures since these IDs aren't asserted by any test. +- New helper: `CapturingLogService` — minimal `ILogService` test double that records `warn(message)` calls into a `warns: string[]` array for assertions. Other methods are no-ops. Plumbed via a new optional `logService` field on `createTestContext`'s options bag (defaults to `NullLogService`); the option flows through to the fake `IClaudeProxyService` (whose [stale token logging path](./claudeAgent.ts#L1099) wants a real-ish log service in the test). +- New test (passes): `'canonical SDKAssistantMessage with tool_use content fires defense-in-depth warning (Phase 6.1 / Cycle F)'`. Stages a `system:init`, a canonical `SDKAssistantMessage` carrying a `tool_use` content block, and a `result:success`. Asserts `responsePartCount === 0` and `logService.warns` contains a `tool_use`-mentioning entry. +- New test (passes): `'canonical SDKAssistantMessage with text content does not double-emit signals already produced by stream_event partials (Phase 6.1 / Cycle F)'`. Stages the canonical no-double-emit scenario: `system:init` → `stream_event(message_start, content_block_start, text_delta='hello', content_block_stop, message_stop)` → canonical `SDKAssistantMessage([{type:'text', text:'hello'}])` → `result:success`. Asserts exactly **1** `SessionResponsePart` and exactly **1** `SessionDelta` (with `content === 'hello'`) — no duplication from the canonical envelope. +- Test count: **61 pass / 0 fail** (was 59 → +2 new). `claudeModelConfig.test.ts` still 5/5 — total agentHost claude suite **66 pass**. + +**Verification** + +- TDD red-green: red phase confirmed by running just the two new tests with the mapper still on `default` arm — tool_use warning test failed (no `case 'assistant'` arm meant `message.message.content` was never inspected). Green after adding the case to the switch and the helper. +- `get_errors` on both touched files — 0 errors. + +**Deviations from plan** + +- **No `IClaudeMapperState` extension.** Plan flagged "Extend `IClaudeMapperState` to track per-block content if reconciliation requires it." Reconciliation does NOT require it: under append-only-reducer + `includePartialMessages: true`, the canonical message's correct action is "drop, with a defense-in-depth scan." No state extension shipped; [claudeAgentSession.ts](./claudeAgentSession.ts) untouched. +- **No standalone `claudeMapSessionEvents.test.ts`.** Plan listed [`../../test/node/claudeMapSessionEvents.test.ts`](../../test/node/claudeMapSessionEvents.test.ts) as a target; the existing convention in this codebase tests the mapper end-to-end through the agent (consistent with the existing `'text content_block emits SessionResponsePart(Markdown) before SessionDelta'` family of tests). Adding a standalone file would duplicate the harness for no gain. The new tests were added to `claudeAgent.test.ts` next to its peers. +- **Reducer ordering invariant — verified by construction, not by new test.** The plan called out "Verify reducer ordering: `SessionResponsePart` MUST precede first delta for that part id." Cycle F adds zero new emissions in the canonical text path, so the invariant cannot regress. The existing tests at [`claudeAgent.test.ts:1369`](../../test/node/claudeAgent.test.ts#L1369) and [`:1444`](../../test/node/claudeAgent.test.ts#L1444) continue to lock the invariant for the `stream_event`-driven path. + +**Notes for downstream cycles** + +- **Phase 7 (tool calls).** The defense-in-depth `tool_use` warn lifts in Phase 7. The mapper will then emit `SessionToolCallStart` from the canonical envelope's `tool_use` blocks (alongside the matching emissions already planned for `stream_event` partials). The `mapAssistantCanonical` helper is the dispatch site for that work. +- **Replay parity (Phase 13).** Replay produces canonical `SDKAssistantMessage`s only — there are no `stream_event` partials in JSONL transcripts. `mapAssistantCanonical` is therefore the *only* path that emits text response parts in replay, so its body must grow from "drop" to "emit canonical parts" before replay ships. Until then, the stub at [`claudeAgent.ts:598-605`](./claudeAgent.ts#L598-L605) keeps replay off. +- **Boundary asymmetry preserved.** Live still closes a turn on `'result'` (the existing code path); canonical `'assistant'` deliberately does NOT close the turn. M8 boundary contract intact. + +### Architectural cleanup (post-Cycle-F, pre-Cycle-G) + +Two follow-up refactors that came out of the post-Cycle-F architectural review. Both are pure debt reduction — no behavior change, no protocol change, no new tests for cycles A–F's behavior. Phase 7 readiness is the underlying motivator: the mapper grows, so the seam needs to grow with it instead of the call sites. + +**1. Encapsulate mapper state as an opaque `ClaudeMapperState` class** + +- [`claudeMapSessionEvents.ts`](./claudeMapSessionEvents.ts): Replaced `export interface IClaudeMapperState { readonly currentBlockParts: Map; }` with `export class ClaudeMapperState`. The class exposes 4 named operations — `resetMessage()`, `allocPart(index, partId)`, `getPart(index)`, `dropPart(index)` — backed by a private `_blockParts: Map`. The lifecycle invariant ("clear on `message_start`, allocate on `content_block_start`, look up on `content_block_delta`, drop on `content_block_stop`") now lives behind those names. +- The mapper signatures (`mapSDKMessageToAgentSignals`, internal `mapStreamEvent`) take `state: ClaudeMapperState` instead of `state: IClaudeMapperState`. The four internal call sites (`message_start` → `state.resetMessage()`, `content_block_start text/thinking` → `state.allocPart(index, partId)` ×2, `content_block_delta` → `state.getPart(event.index)`, `content_block_stop` → `state.dropPart(event.index)`) route through the methods. +- [`claudeAgentSession.ts`](./claudeAgentSession.ts): `private readonly _mapperState: IClaudeMapperState = { currentBlockParts: new Map() };` becomes `private readonly _mapperState: ClaudeMapperState = new ClaudeMapperState();`. Import updated. No other call sites — `_mapperState` is only ever passed to `mapSDKMessageToAgentSignals`. +- **Why now.** Phase 7 grows the state with at least one cross-message map (in-flight tool calls keyed by SDK `tool_use_id`) and the lifecycle for those is *not* "drop on `message_start`" — it's "drop on `tool_result` envelope or terminal `result`." Encoding the per-message vs. cross-message lifecycle distinction in a `Map` literal at the call site would have meant a breaking change in Phase 7. Encoding it in `resetMessage()` (per-message fields drop here; cross-message fields explicitly excluded) localizes that change to one method body. +- **Why a class, not a tighter interface.** The previous `interface IClaudeMapperState { readonly currentBlockParts: Map; }` had `readonly` on the *field reference* but not on the *Map's mutation surface*; callers could (and did) call `.set` / `.get` / `.delete` / `.clear` directly. A class with private state and a method-only surface fixes that without `Readonly<...>` wrappers that wouldn't have made the Map's mutators inaccessible anyway. +- **No protocol change. No new tests for state ownership specifically — the state semantics are tested through the mapper behavior, see "Direct mapper unit tests" below.** + +**2. Direct mapper unit tests at the function seam** + +- New file: [`../../test/node/claudeMapSessionEvents.test.ts`](../../test/node/claudeMapSessionEvents.test.ts). 11 tests covering the mapper as a pure function with a fresh `ClaudeMapperState` per scenario. Cases: + 1. `message_start` clears state and emits no signals (verified by post-reset delta resolving to `[]`). + 2. text content block: start emits `SessionResponsePart(Markdown)`, deltas emit `SessionDelta` with the same `partId`, stop drops the part (post-stop delta at the same index emits `[]`). + 3. thinking content block: start emits `SessionResponsePart(Reasoning)`, delta emits `SessionReasoning`. + 4. streamed `tool_use` content block at `content_block_start` is dropped with a warn log (defense-in-depth). + 5. canonical `'assistant'` envelope drops `tool_use` blocks with a warn log and emits nothing. + 6. canonical `'assistant'` envelope without `tool_use` emits nothing and does not warn. + 7. `result` success emits `SessionUsage` (with `model` from `modelUsage`) followed by `SessionTurnComplete`. + 8. `result` success with empty `modelUsage` omits the `model` field on `SessionUsage`. + 9. `message_stop` and unknown stream events emit `[]`. + 10. `content_block_delta` with no allocated part (orphan delta) emits `[]`. + 11. multi-block ordering: text @0 + thinking @1 keep distinct part ids, and deltas route to the correct part. +- New file: [`../../test/node/claudeMapSessionEventsTestUtils.ts`](../../test/node/claudeMapSessionEventsTestUtils.ts). Exports the SDK envelope builders previously inlined in `claudeAgent.test.ts` (`makeSystemInitMessage`, `makeResultSuccess`, `makeStreamEvent`, `makeMessageStart`, `makeContentBlockStart{Text,Thinking,ToolUse}`, `make{Text,Thinking}Delta`, `makeContentBlockStop`, `makeMessageStop`, `makeAssistantMessage`, `makeNonNullableUsage`, `TEST_UUID`) plus their type aliases (`BetaRaw*Event`, `BetaContentBlock`). One new builder added: `makeContentBlockStartToolUse(index, id, name)` for the new direct test of defense-in-depth. +- [`../../test/node/claudeAgent.test.ts`](../../test/node/claudeAgent.test.ts) now imports those builders from the util file instead of declaring them inline (~200 lines removed; the file is unchanged in semantics). +- **Why now.** Cycles A–F added new builders + a `CapturingLogService` test double *inside* the agent harness file. Each new mapper test before this refactor cost ~50 lines of agent harness setup. With the seam at the function level, the new tests cost a fresh `ClaudeMapperState()` each. The 11 cases above all fit inside 280 lines including imports. +- **Coverage delta.** The agent-harness tests already exercised the same mapper behavior end-to-end — these direct tests don't add coverage *of correct behavior*, they add coverage *at the seam where regressions should fail first*. Cases 8 (modelUsage empty), 9 (message_stop), 10 (orphan delta), and 11 (multi-block ordering) were not directly asserted at the harness level; they're now locked at the mapper level. +- **No protocol change. No behavior change. No agent-harness regression — `claudeAgent.test.ts` still 55/55 pass after the import refactor.** + +**Test count after architectural cleanup:** agentHost claude suite is now **72 pass / 0 fail** across `claudeAgent.test.ts` (55) + `claudeMapSessionEvents.test.ts` (11 new) + `claudeModelConfig.test.ts` (6). No regressions. + +**3. Stateless mapper (follow-up to #1)** + +After landing #1, an empirical re-read of `BetaRawContentBlockStartEvent.index` ([`node_modules/@anthropic-ai/sdk/resources/beta/messages/messages.d.ts:1123`](../../../../../node_modules/@anthropic-ai/sdk/resources/beta/messages/messages.d.ts)) and the SDK's own accumulator ([`node_modules/@anthropic-ai/sdk/lib/BetaMessageStream.js:476`](../../../../../node_modules/@anthropic-ai/sdk/lib/BetaMessageStream.js)) showed `index` is the position within the *current message's* `content[]` — the SDK accumulator does `snapshot.content.push(event.content_block)` on `content_block_start` and `snapshot.content.at(event.index)` on `content_block_delta` with no bounds checking. Phase 6 sets `canUseTool: deny`, so a turn is exactly one assistant message and `index` is therefore monotonic within a turn. That removes the only reason `ClaudeMapperState` existed. + +- [`claudeMapSessionEvents.ts`](./claudeMapSessionEvents.ts): Deleted the `ClaudeMapperState` class. `mapSDKMessageToAgentSignals` and the internal `mapStreamEvent` no longer take a `state` parameter. `content_block_start text/thinking` emits `SessionResponsePart` with `id: \`${turnId}#${event.index}\`` directly. `content_block_delta` emits with the same string formula — no map lookup, no orphan-delta guard. `message_start`, `content_block_stop`, `message_delta`, `message_stop` all become `return []` no-ops. `generateUuid` import removed. +- [`claudeAgentSession.ts`](./claudeAgentSession.ts): Dropped the `_mapperState` field and its `ClaudeMapperState` import; the call to `mapSDKMessageToAgentSignals` no longer passes a state argument. +- [`../../test/node/claudeMapSessionEvents.test.ts`](../../test/node/claudeMapSessionEvents.test.ts): Removed the `freshState()` helper and `ClaudeMapperState` import. Re-purposed the `message_start` test to "emits no signals" (no state to clear). Dropped the post-stop "leak" assertion from the text-block test (stop is now a no-op; the SDK protocol guarantees no out-of-order deltas). Deleted the orphan-delta test — a `SessionDelta` with an unknown `partId` is already a silent no-op in the reducer ([`reducers.ts:240`](../../common/state/protocol/reducers.ts#L240)), so the mapper-level guard was redundant cosmetic protection. +- **Why now / why this is safe.** The state class only ever skipped a no-op iteration in the reducer. The SDK protocol orders `content_block_start` strictly before any delta at the same index; an out-of-protocol delta with no preceding start is silently dropped by `updateResponsePart` because `if (!found) return state;`. Removing the state class therefore changes zero observable behavior under the SDK contract and one cosmetic behavior (a malformed orphan delta would now hit the reducer no-op instead of a mapper no-op). +- **Phase 7 implication.** Phase 7 turns are multi-message (text → tool_use → tool_result → text). The SDK resets `index` per message — the `content_block_start` push-then-`at(index)` pattern in `BetaMessageStream.js` proves it, since each `message_start` builds a fresh snapshot. That collision lands in Phase 7, not Phase 6. The fix at that point is to mix `message.id` (or an equivalent per-message counter) into the partId formula: `${turnId}#${messageId}#${index}`. Phase 7's plan should reintroduce the minimum state needed for that — likely a single `_currentMessageId: string | undefined` field, set on `message_start` from `event.message.id` — alongside the tool-call tracking maps it already requires. The phase-7-plan.md `IClaudeMapperState` design predates this refactor and should be re-derived from the new shape. +- **Test count after this follow-up:** **71 pass / 0 fail** (one direct mapper test deleted as redundant with the reducer's no-op contract). Agent harness tests unchanged (55 pass). + + + diff --git a/src/vs/platform/agentHost/node/claude/phase7-plan.md b/src/vs/platform/agentHost/node/claude/phase7-plan.md new file mode 100644 index 0000000000000..6956e755b47e7 --- /dev/null +++ b/src/vs/platform/agentHost/node/claude/phase7-plan.md @@ -0,0 +1,826 @@ +# Phase 7 Implementation Plan — `ClaudeAgent` tool calls + permission + user input + +> **Handoff plan** — written to be executed by an agent with no prior conversation context. All file paths and line citations are verified against the workspace at synthesis time. Cross-reference [roadmap.md](./roadmap.md) before committing exact phase numbers. + +## 1. Goal + +Replace Phase 6's `canUseTool: deny` stub with the real tool-use loop. Map the SDK's `tool_use` / `tool_result` flow to the protocol's tool-call state machine (`Streaming → PendingConfirmation → Running → Completed/Cancelled`), implement `respondToPermissionRequest` and `respondToUserInputRequest`, honour the session's `permissionMode`, and special-case the `AskUserQuestion` built-in tool through a `SessionInputRequested` round-trip. + +**Phase 7 deliverable.** A user typing "read package.json" sees: + +1. `SessionToolCallStart` (toolName `Read`, `Streaming`), +2. `SessionToolCallDelta` events streaming the partial input JSON, +3. `pending_confirmation` signal → host translates to `SessionToolCallReady` (PendingConfirmation), +4. Workbench dispatches `SessionToolCallConfirmed { approved: true }` → `respondToPermissionRequest`, +5. SDK runs the tool, `SessionToolCallComplete` lands with the file content as `ToolCallResult`. + +A user typing "what should I do next?" — and the model invoking `AskUserQuestion` — sees a `SessionInputRequested`, the workbench answers via `SessionInputCompleted`, `respondToUserInputRequest` resolves the deferred, and the SDK receives the answers as `updatedInput`. + +**Out of scope (deferred):** + +- File edit tracking, diff previews, per-file undo (Phase 8). +- `abortSession`, steering, `changeModel` (Phase 9). +- Client-provided tools / MCP gateway (Phase 10). +- Customizations / plugins (Phase 11). +- Subagents (Phase 12). +- Full transcript reconstruction including `tool_use` / `tool_result` replay (Phase 13). + +**Exit criteria:** + +1. The Phase 6 `canUseTool` deny stub is gone. Every tool the SDK proposes either auto-approves through the session's `permissionMode`, surfaces a confirmation via `pending_confirmation`, or — for `AskUserQuestion` only — round-trips through `SessionInputRequested`. +2. `IClaudeMapperState` exposes per-block tool tracking. The defense-in-depth `tool_use` warn-and-drop at [claudeMapSessionEvents.ts:163-167](claudeMapSessionEvents.ts#L163) is replaced with a real `SessionToolCallStart` emission, paired with `SessionToolCallDelta` for `input_json_delta`, and `SessionToolCallComplete` for synthetic `user` messages carrying `tool_result` content blocks. +3. `ClaudeAgent.respondToPermissionRequest(requestId, approved)` and `ClaudeAgent.respondToUserInputRequest(requestId, response, answers)` no longer throw `TODO: Phase 7`. Both iterate `_sessions` and delegate to the matching `ClaudeAgentSession.respondToPermissionRequest` / `respondToUserInputRequest`, which return `boolean` so the iteration can stop on first match — mirroring [`copilotAgent.ts:1239-1254`](../copilot/copilotAgent.ts#L1239-L1254). +4. The hardcoded `permissionMode: 'default'` at [claudeAgent.ts:444](claudeAgent.ts#L444) is replaced with a live read from `IAgentConfigurationService.getSessionConfigValues(sessionUri)[ClaudeSessionConfigKey.PermissionMode]`. Mid-session changes propagate via `Query.setPermissionMode(mode)` from the next `sendMessage` (no per-event listener — the session re-reads at every entry point). +5. Disposing a session whose `canUseTool` is parked on a deferred unblocks cleanly: `denyAllPending()` resolves every pending permission with `false` and every pending user input with `Cancel`. +6. Existing Phase 6 tests still pass. `claudeAgent.test.ts:797-832`'s "TODO: Phase 7" placeholder is removed; the suite gains tool-lifecycle tests, permission-mode tests, and an `AskUserQuestion` test driving the captured `canUseTool` callback. +7. The proxy-backed integration test exercises one `Read` permission round-trip. + +## 2. Files to create / modify + +| Action | File | Purpose | +|---|---|---| +| **Modify** | [claudeAgent.ts](claudeAgent.ts) | Replace `canUseTool` deny stub with the real gate (closure over `this`). Replace hardcoded `permissionMode: 'default'` with a live config read at materialize. Implement `respondToPermissionRequest` and `respondToUserInputRequest` as `_sessions.values()` iteration, mirroring [`copilotAgent.ts:1239-1254`](../copilot/copilotAgent.ts#L1239-L1254). Wire `Query.setPermissionMode(mode)` into `sendMessage` so live config wins. Add an `onElicitation: async () => ({ action: 'cancel' })` stub in `Options` to silence the SDK auto-decline path for any incidental MCP elicitation (full MCP wiring is Phase 10). | +| **Major edit** | [claudeAgentSession.ts](claudeAgentSession.ts) | Add `_pendingPermissions: Map>` and `_pendingUserInputs: Map; questionId: string }>`. Add `requestPermission(...)`, `requestUserInput(...)`, `respondToPermissionRequest(requestId, approved): boolean`, `respondToUserInputRequest(requestId, response, answers?): boolean`. Add `denyAllPending()` invoked from the dispose chain so the SDK's `canUseTool` callback unblocks. Add `setPermissionMode(mode)` that forwards to `Query.setPermissionMode` once the query is bound. | +| **Major edit** | [claudeMapSessionEvents.ts](claudeMapSessionEvents.ts) | Extend `IClaudeMapperState` with `activeToolBlocks: Map` (per-message, cleared on `message_start`) and `toolCallTurnIds: Map` (cross-message, drained on `tool_result`). Replace the warn-and-drop branch at [claudeMapSessionEvents.ts:163-167](claudeMapSessionEvents.ts#L163) with `SessionToolCallStart` emission. Handle `input_json_delta` → `SessionToolCallDelta`. Handle synthetic `user` messages whose `message.content` contains `tool_result` blocks → one `SessionToolCallComplete` per block. **Do NOT emit `SessionToolCallReady`** — that comes from the host translating `pending_confirmation` (see §3.3). | +| **Create** | [claudeToolDisplay.ts](claudeToolDisplay.ts) | Pure helper. `getClaudePermissionKind(toolName: string): 'shell' \| 'write' \| 'mcp' \| 'read' \| 'url' \| 'custom-tool'` and `getClaudeToolDisplayName(toolName: string): string`. Mirrors the [`copilotToolDisplay.ts`](../copilot/copilotToolDisplay.ts) shape. The mapping table is in §4. | +| **Modify** | [../../test/node/claudeAgent.test.ts](../../test/node/claudeAgent.test.ts) | Make `FakeQuery.setPermissionMode` recordable instead of throw. Expose `capturedStartupOptions[].canUseTool` (and `onElicitation`) as callable handles. Add helpers for building `tool_use` content-block stream events and synthetic `tool_result` user messages. Replace [claudeAgent.test.ts:797 / 832](../../test/node/claudeAgent.test.ts#L797) Phase-7 throw assertions with real round-trip tests. Add the cases listed in §5. | +| **Modify** | [../../test/node/claudeAgent.integration.test.ts](../../test/node/claudeAgent.integration.test.ts) | Extend the proxy-backed test to script a one-tool turn (`tool_use { name: 'Read' }` → host approves → `tool_result`) and assert the resulting `AgentSignal` sequence. | + +No new dependencies. No SDK version change. + +## 3. Implementation spec + +### 3.1 The shared owner of the tool-use round-trip is `ClaudeAgent` — not `ClaudeAgentSession` + +The SDK's `canUseTool` closure is set on `Options` before the session wrapper is instantiated ([claudeAgent.ts:436-444](claudeAgent.ts#L436)), so the closure cannot capture `this._sessions.get(sessionId)` at construction time. Two viable shapes: + +- **(A)** Closure captures `sessionId` and reads `this._sessions.get(sessionId)` at call time — the agent always has access to its own session map. The session owns the pending state and the `pending_confirmation` emission. +- **(B)** Closure captures the session reference passed in by `_materializeProvisional` after the session is constructed. + +We pick **(A)**. The session is in `_sessions` by the time any `canUseTool` callback fires (the SDK doesn't dispatch tools before init completes, and init completes before `_materializeProvisional` returns the wrapper at [claudeAgent.ts:469-470](claudeAgent.ts#L469)). (A) keeps the agent the single owner of cross-session policy (config reads, `_sessions` lookup, future MCP routing) and the session purely a per-Query state holder. This mirrors [`copilotAgent.ts:1239-1254`](../copilot/copilotAgent.ts#L1239-L1254): the agent dispatches, the session resolves. + +```ts +// claudeAgent.ts (sketch — inside _materializeProvisional, replaces lines 436-444) + +const options: Options = { + // ... unchanged ... + canUseTool: async (toolName, input, options) => { + return this._handleCanUseTool(sessionId, toolName, input, options); + }, + onElicitation: async () => ({ action: 'cancel' }), // §3.7 + permissionMode: this._readSessionPermissionMode(provisional.sessionUri), // §3.6 + // ... unchanged ... +}; +``` + +### 3.2 Pending state on `ClaudeAgentSession` + +Mirror [`copilotAgentSession.ts:182-184`](../copilot/copilotAgentSession.ts#L182-L184) — same maps, same value shapes, same `respondTo*` boolean return. + +```ts +// claudeAgentSession.ts (additions) + +import { DeferredPromise } from '../../../../base/common/async.js'; +import { generateUuid } from '../../../../base/common/uuid.js'; +import { CancellationError } from '../../../../base/common/errors.js'; +import type { PermissionMode } from '@anthropic-ai/claude-agent-sdk'; +import { + SessionInputAnswer, + SessionInputAnswerState, + SessionInputAnswerValueKind, + SessionInputResponseKind, +} from '../../common/state/protocol/state.js'; + +private readonly _pendingPermissions = new Map>(); +private readonly _pendingUserInputs = new Map }>; + questionId: string; +}>(); + +/** + * Park on a deferred until {@link respondToPermissionRequest} resolves it. + * The agent has already fired `pending_confirmation` before calling this + * (so the workbench is already showing the confirm UI). The SDK is + * blocked on this promise inside its `canUseTool` callback. + */ +async requestPermission(toolUseId: string): Promise { + if (this._abortController.signal.aborted) { + return false; + } + const deferred = new DeferredPromise(); + this._pendingPermissions.set(toolUseId, deferred); + return deferred.p; +} + +respondToPermissionRequest(requestId: string, approved: boolean): boolean { + const deferred = this._pendingPermissions.get(requestId); + if (!deferred) { + return false; + } + this._pendingPermissions.delete(requestId); + deferred.complete(approved); + return true; +} + +/** + * Build a `SessionInputRequested` action, fire it via + * `_onDidSessionProgress`, and park on a deferred until the workbench + * answers via {@link respondToUserInputRequest}. + * + * Returns the answer keyed by the original `AskUserQuestionInput.questions[].header` + * (the SDK's expected shape). Returns `undefined` on Cancel/Decline so + * the caller can deny the SDK tool call. + */ +async requestUserInput( + request: AskUserQuestionInput, +): Promise | undefined> { + if (this._abortController.signal.aborted) { + return undefined; + } + // ... build SessionInputRequest from `request.questions` (mirrors + // `copilotAgentSession.ts:828-849` but with multiple questions, since + // AskUserQuestionInput supports a question carousel) ... + // ... fire SessionInputRequested action signal ... + // ... await deferred, transform answers back to `Record` ... +} + +respondToUserInputRequest( + requestId: string, + response: SessionInputResponseKind, + answers?: Record, +): boolean { + const pending = this._pendingUserInputs.get(requestId); + if (!pending) { + return false; + } + this._pendingUserInputs.delete(requestId); + pending.deferred.complete({ response, answers }); + return true; +} + +/** + * Forwards to `Query.setPermissionMode(mode)` once the query has been + * bound. Pre-bind, this is a no-op — the next materialize seeds the + * mode via `Options.permissionMode`. + */ +setPermissionMode(mode: PermissionMode): void { + this._query?.setPermissionMode(mode); +} + +/** + * Invoked from the dispose chain. Resolves every parked permission + * deferred with `false` and every parked input deferred with `Cancel`, + * unblocking the SDK's `canUseTool` callback so it can return and the + * SDK can shut down cleanly. + */ +private _denyAllPending(): void { + for (const [, deferred] of this._pendingPermissions) { + if (!deferred.isSettled) { + deferred.complete(false); + } + } + this._pendingPermissions.clear(); + + for (const [, pending] of this._pendingUserInputs) { + if (!pending.deferred.isSettled) { + pending.deferred.complete({ response: SessionInputResponseKind.Cancel }); + } + } + this._pendingUserInputs.clear(); +} +``` + +Wire `_denyAllPending()` into the existing dispose chain at [claudeAgentSession.ts:122-125](claudeAgentSession.ts#L122). Order matters: deny BEFORE `_abortController.abort()` so the SDK's `canUseTool` callback (currently parked) resolves with `false` and the SDK's loop unwinds before the abort tears the subprocess down. After `abort()`, `_warm[Symbol.asyncDispose]()` runs as today. + +```ts +// In the constructor, immediately after `super();` and BEFORE the +// existing `_abortController` dispose registration: +this._register(toDisposable(() => this._denyAllPending())); +this._register(toDisposable(() => this._abortController.abort())); +// ... existing WarmQuery dispose ... +``` + +`Disposable` runs registrations in LIFO order, so register `_denyAllPending` FIRST so it runs LAST. Wait — actually [base/common/lifecycle.ts](../../../../base/common/lifecycle.ts) `dispose()` runs registered disposables in arbitrary order via the `DisposableStore.dispose` map; verify the actual semantics before relying on order. **Safer:** make `_denyAllPending()` synchronous and idempotent, and call it explicitly at the top of an `override dispose()` — that guarantees deterministic ordering. + +```ts +override dispose(): void { + this._denyAllPending(); + super.dispose(); +} +``` + +### 3.3 Mapper extensions + +`IClaudeMapperState` gains two maps. The existing `currentBlockParts` is a per-message map cleared on `message_start`; `activeToolBlocks` follows the same lifecycle. `toolCallTurnIds` is cross-message (a `tool_use` lands in one assistant message, the matching `tool_result` arrives in a later synthetic user message). + +```ts +// claudeMapSessionEvents.ts (extended interface) + +export interface IClaudeMapperState { + /** existing — text/thinking part allocation */ + readonly currentBlockParts: Map; + + /** + * Per-message: maps content_block index → in-flight tool-use block. + * Populated on `content_block_start { tool_use }`, drained on + * `content_block_stop`, cleared on `message_start`. + */ + readonly activeToolBlocks: Map; + + /** + * Cross-message: maps SDK `tool_use_id` → the `turnId` the tool was + * announced under. Populated on `content_block_start { tool_use }`, + * drained when the matching `tool_result` arrives in a synthetic + * `user` message. Persists across `message_start` clears because + * `tool_result` arrives in a different SDKMessage than the + * announcing assistant message. + */ + readonly toolCallTurnIds: Map; +} +``` + +Initialise in [claudeAgentSession.ts:84-85](claudeAgentSession.ts#L84): + +```ts +private readonly _mapperState: IClaudeMapperState = { + currentBlockParts: new Map(), + activeToolBlocks: new Map(), + toolCallTurnIds: new Map(), + toolCallNames: new Map(), +}; +``` + +#### 3.3.1 `content_block_start { tool_use }` — emit `SessionToolCallStart` + +Replaces the warn-and-drop branch at [claudeMapSessionEvents.ts:163-167](claudeMapSessionEvents.ts#L163). + +```ts +if (block.type === 'tool_use') { + state.activeToolBlocks.set(event.index, { toolUseId: block.id, toolName: block.name }); + state.toolCallTurnIds.set(block.id, turnId); + state.toolCallNames.set(block.id, block.name); + return [{ + kind: 'action', + session, + action: { + type: ActionType.SessionToolCallStart, + session: sessionStr, + turnId, + toolCallId: block.id, + toolName: block.name, + displayName: getClaudeToolDisplayName(block.name), + } satisfies SessionToolCallStartAction, + }]; +} +``` + +The `SessionToolCallStart` action transitions the tool call into `Streaming` ([state.ts:1123-1135](../../common/state/protocol/state.ts#L1123)) — `partialInput` is empty, deltas append to it. + +#### 3.3.2 `content_block_delta { input_json_delta }` — emit `SessionToolCallDelta` + +```ts +if (event.delta.type === 'input_json_delta') { + const active = state.activeToolBlocks.get(event.index); + if (!active) { + return []; + } + return [{ + kind: 'action', + session, + action: { + type: ActionType.SessionToolCallDelta, + session: sessionStr, + turnId, + toolCallId: active.toolUseId, + content: event.delta.partial_json, + } satisfies SessionToolCallDeltaAction, + }]; +} +``` + +The mapper does NOT need to assemble the JSON. The SDK delivers fully-parsed `input` to `canUseTool` ([sdk.d.ts:1825-1833](../../../../../../node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L1825)); the `Delta` events exist purely so the workbench can render the streaming params live. + +#### 3.3.3 `content_block_stop` — drain per-block state + +Drain `activeToolBlocks.delete(event.index)`. Do NOT emit `SessionToolCallReady` here — that arrives from `pending_confirmation` (§3.5). Mapper-side, the tool call sits in `Streaming` until the host advances it. + +Also drain `currentBlockParts.delete(event.index)` for parity with the text/thinking branches; today's mapper already implicitly relies on the part-id staying allocated for late deltas (the SDK's per-block ordering guarantees deltas don't arrive after stop), but explicit cleanup avoids accumulating dead entries across long turns. + +#### 3.3.4 Synthetic `user` message with `tool_result` blocks — emit `SessionToolCallComplete` + +The SDK delivers tool results back as `SDKUserMessage` records with `isSynthetic: true` (or sometimes `isSynthetic` absent) and a `message.content` array containing `tool_result` content blocks per the Anthropic API. From [sdk.d.ts:3489-3510](../../../../../../node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L3489): + +```ts +export declare type SDKUserMessage = { + type: 'user'; + message: MessageParam; // content is BetaContentBlockParam[] + parent_tool_use_id: string | null; + isSynthetic?: boolean; + tool_use_result?: unknown; + // ... +}; +``` + +The mapper detects: + +```ts +case 'user': { + const content = message.message.content; + if (!Array.isArray(content)) { + return []; + } + const signals: AgentSignal[] = []; + for (const block of content) { + if (block.type !== 'tool_result') { + continue; + } + const toolUseId = block.tool_use_id; + const associatedTurnId = state.toolCallTurnIds.get(toolUseId); + const toolName = state.toolCallNames.get(toolUseId); + if (associatedTurnId === undefined || toolName === undefined) { + // Defense in depth: tool result without a known announcement. + // Phase 13 transcript replay will populate the maps from disk; + // in Phase 7 a missing entry means the SDK emitted a tool_result + // we never saw the tool_use for. + logService.warn(`[claudeMapSessionEvents] tool_result for unknown tool_use_id ${toolUseId}`); + continue; + } + state.toolCallTurnIds.delete(toolUseId); + state.toolCallNames.delete(toolUseId); + signals.push({ + kind: 'action', + session, + action: { + type: ActionType.SessionToolCallComplete, + session: sessionStr, + turnId: associatedTurnId, + toolCallId: toolUseId, + result: buildToolCallResult(block, toolName), + } satisfies SessionToolCallCompleteAction, + }); + } + return signals; +} +``` + +`buildToolCallResult` translates the Anthropic `tool_result` content (string or content-block array) into `ToolCallResult` ([state.ts:1095-1116](../../common/state/protocol/state.ts#L1095)). Phase-7 mapping (per §9.3 decision): + +- `success = !block.is_error` +- `pastTenseMessage = \`${getClaudeToolDisplayName(toolName)} finished\`` — Phase 8 refines per-tool. +- `content` = pass-through of the Anthropic `tool_result.content` array if it's already an array of typed blocks; if it's a plain string, wrap as `[{ type: 'text', text: }]`. + +The `toolName` is needed for the past-tense string. Add a third map to `IClaudeMapperState` to support this: `toolCallNames: Map`. Populated alongside `toolCallTurnIds` on `tool_use` start; drained alongside it on `tool_result`. + +```ts +// claudeMapSessionEvents.ts (final IClaudeMapperState shape) +export interface IClaudeMapperState { + readonly currentBlockParts: Map; + readonly activeToolBlocks: Map; + readonly toolCallTurnIds: Map; + readonly toolCallNames: Map; +} +``` + +#### 3.3.5 Why the mapper doesn't emit `SessionToolCallReady` + +The protocol's tool-call state machine ([sessionState.ts:60-65](../../common/state/sessionState.ts)) lives in two phases: + +1. **`Streaming`** — `SessionToolCallStart` + 0..N `SessionToolCallDelta`. The mapper drives this purely from stream events. +2. **`PendingConfirmation`** — `SessionToolCallReady` lands the assembled tool-call state and triggers the confirmation UI. + +The hop from Streaming → PendingConfirmation is the host's call. The host's `_translateToolCallSignal` (existing infrastructure on `AgentService`, used by Copilot today) handles the `pending_confirmation` signal by either (a) auto-approving and dispatching `SessionToolCallReady` with `confirmed: NotNeeded`, or (b) dispatching `SessionToolCallReady` with confirmation options. Either way the action is the host's, not the mapper's. See [agentService.ts:299-330](../../common/agentService.ts#L299) for the contract — the comment is explicit: "the host applies auto-approval logic over `permissionKind` / `permissionPath` and then dispatches the appropriate `SessionToolCallReady` action". + +Mapper emits `Start` and `Delta`. Session emits `pending_confirmation`. Host emits `Ready`. Mapper emits `Complete`. + +### 3.4 The `_handleCanUseTool` flow + +The closure in `Options.canUseTool` is the hot path. It must: + +1. Re-read live `permissionMode` (so a mid-turn config change wins). +2. Special-case `AskUserQuestion` (§3.5). +3. Auto-approve under `bypassPermissions` (any tool) and `acceptEdits` (write-class tools). +4. Return `{ behavior: 'deny', message: '...' }` if the session is gone or aborted. +5. Otherwise, fire `pending_confirmation` and park on `session.requestPermission(toolUseId)`. + +```ts +// claudeAgent.ts (new private method) + +private async _handleCanUseTool( + sessionId: string, + toolName: string, + input: Record, + options: { suggestions?: PermissionUpdate[]; signal: AbortSignal; blockedPath?: string; toolUseID: string }, +): Promise { + const session = this._sessions.get(sessionId); + if (!session) { + // Race: session disposed between SDK call and our lookup. SDK + // expects a deny so its loop can unwind. + return { behavior: 'deny', message: 'Session is no longer active' }; + } + + const sessionUri = session.sessionUri; + const liveMode = this._readSessionPermissionMode(sessionUri); + + // 1. AskUserQuestion: surface as user input request (§3.5). + if (toolName === 'AskUserQuestion') { + const askInput = input as AskUserQuestionInput; + const answers = await session.requestUserInput(askInput); + if (!answers) { + return { behavior: 'deny', message: 'The user cancelled the question' }; + } + return { + behavior: 'allow', + updatedInput: { ...askInput, answers }, + }; + } + + // 2. Plan mode disables non-read tools natively in the SDK; if it + // still calls canUseTool, deny non-read tools defensively. + const permissionKind = getClaudePermissionKind(toolName); + if (liveMode === 'plan' && permissionKind !== 'read') { + return { behavior: 'deny', message: 'Plan mode is read-only' }; + } + + // 3. bypassPermissions: allow everything. + if (liveMode === 'bypassPermissions') { + return { behavior: 'allow' }; + } + + // 4. acceptEdits: auto-approve write-class tools. + if (liveMode === 'acceptEdits' && permissionKind === 'write') { + return { behavior: 'allow' }; + } + + // 5. Default path: surface a pending confirmation. + const permissionPath = options.blockedPath ?? extractPermissionPath(toolName, input); + const toolInputJson = JSON.stringify(input); + + this._onDidSessionProgress.fire({ + kind: 'pending_confirmation', + session: sessionUri, + state: { + status: ToolCallStatus.PendingConfirmation, + toolCallId: options.toolUseID, + toolName, + displayName: getClaudeToolDisplayName(toolName), + invocationMessage: getClaudeToolDisplayName(toolName), // §9.3: generic in Phase 7 + toolInput: toolInputJson, + } satisfies ToolCallPendingConfirmationState, + permissionKind, + permissionPath, + }); + + const approved = await session.requestPermission(options.toolUseID); + return approved + ? { behavior: 'allow' } + : { behavior: 'deny', message: 'User declined' }; +} + +private _readSessionPermissionMode(sessionUri: URI): PermissionMode { + const values = this._configurationService.getSessionConfigValues(sessionUri.toString()); + const raw = values?.[ClaudeSessionConfigKey.PermissionMode]; + if (raw === 'acceptEdits' || raw === 'bypassPermissions' || raw === 'plan' || raw === 'default') { + return raw; + } + return 'default'; +} +``` + +`extractPermissionPath` is a tiny pure helper alongside `getClaudePermissionKind` in [claudeToolDisplay.ts](claudeToolDisplay.ts) — see §4. Per §9.3, Phase 7 ships `invocationMessage = getClaudeToolDisplayName(toolName)` (e.g. `"Read file"`); Phase 8 refines per-tool. There is no separate `getClaudeInvocationMessage` helper in Phase 7 — call `getClaudeToolDisplayName` directly. + +### 3.5 `AskUserQuestion` special-case + +The `AskUserQuestion` built-in tool ([extensions/copilot/src/extension/chatSessions/claude/common/claudeTools.ts:60](../../../../../../extensions/copilot/src/extension/chatSessions/claude/common/claudeTools.ts#L60)) is the SDK's question-carousel mechanism. The production extension handles it in [`askUserQuestionHandler.ts:33-92`](../../../../../../extensions/copilot/src/extension/chatSessions/claude/common/toolPermissionHandlers/askUserQuestionHandler.ts#L33) by: + +1. Calling the workbench `vscode_askQuestions` core tool to render the question carousel. +2. Translating the answers back into the SDK's expected shape: `Record` keyed by **question text**, not header. +3. Returning `{ behavior: 'allow', updatedInput: { ...input, answers } }` so the SDK "executes" the tool with the assembled answers as its result. + +The agent host has no direct workbench tool service, but it has the `SessionInputRequested` action — designed for exactly this round-trip. The mapping is identical except: + +- Host fires `SessionInputRequested` with one `SessionInputQuestion` per `AskUserQuestionInput.questions[i]`. +- Workbench renders the carousel, dispatches `SessionInputCompleted`. +- Agent host calls `respondToUserInputRequest` → `session.respondToUserInputRequest` → resolves the `requestUserInput` deferred → closure builds `answers` and returns `{ behavior: 'allow', updatedInput: { ...input, answers } }`. + +**Why not `onElicitation`?** GPT's council vote pointed there but the SDK declares `ElicitationRequest` as MCP-server-only — see [sdk.d.ts:498-520](../../../../../../node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L498): + +> ``` +> /** Elicitation request from an MCP server, asking the SDK consumer for user input. */ +> export declare type ElicitationRequest = { ... }; +> ``` + +`AskUserQuestion` is a built-in tool, not an MCP server, so it never reaches `onElicitation`. (We still wire `onElicitation` as a `cancel` stub — §3.7 — because some hooks/customizations could surface elicitations once Phase 11 lands, and the SDK auto-declines if the field is absent.) + +**Mapping the answers.** `AskUserQuestionInput.questions[i]` has `header` (id) and `question` (display). The SDK expects `answers` keyed by `question.question` ([extensions/copilot/.../askUserQuestionHandler.ts:67-73](../../../../../../extensions/copilot/src/extension/chatSessions/claude/common/toolPermissionHandlers/askUserQuestionHandler.ts#L67)). The protocol's `SessionInputAnswer` is keyed by our internally-generated `questionId`. So: + +- When firing `SessionInputRequested`, generate a unique `questionId` per question and stash a `Map` in the pending entry. +- When the answer arrives, look up by `questionId`, read the answer's `value` (text or selected), and build `Record`. +- Concatenate selected options + freeform text with `, ` to match the production extension's behaviour. + +### 3.6 `permissionMode` propagation + +Two surfaces consume the mode: + +- **The SDK.** Set via `Options.permissionMode` at materialize, and via `Query.setPermissionMode(mode)` mid-session. +- **Our `canUseTool` gate.** Re-read live from `IAgentConfigurationService` on every callback (§3.4). + +**Materialize.** Replace `permissionMode: 'default'` at [claudeAgent.ts:444](claudeAgent.ts#L444) with `permissionMode: this._readSessionPermissionMode(provisional.sessionUri)`. + +**Mid-session.** In `sendMessage` ([claudeAgent.ts:761-783](claudeAgent.ts#L761)), before invoking `entry.send(...)`, call `entry.setPermissionMode(this._readSessionPermissionMode(session))`. This guarantees the SDK's view matches the user's latest config value before each turn. Mid-turn changes to `permissionMode` between two `canUseTool` callbacks are not separately propagated — the next turn syncs it. The `canUseTool` gate (§3.4) reads live, so the host's auto-approval policy responds immediately even if the SDK's internal classification lags by one turn. + +**Why no `SessionConfigChanged` listener.** [agentSideEffects.ts:835](../agentSideEffects.ts#L835) handles `SessionConfigChanged` at the side-effects layer — by the time `canUseTool` fires, `getSessionConfigValues` returns the new value. There is no need to subscribe per session. This matches CopilotAgent's "read at every entry point" pattern ([copilotAgent.ts:773](../copilot/copilotAgent.ts#L773): "any `SessionConfigChanged` actions that arrived after `createSession` are honoured without bespoke forwarding"). + +### 3.7 `onElicitation` stub + +The SDK's `Options.onElicitation` ([sdk.d.ts:1320](../../../../../../node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L1320)) is the MCP-only equivalent of `canUseTool` for elicitation requests. If absent, the SDK auto-declines (sdk.d.ts comment around `OnElicitation`). Phase 7 has no MCP servers wired (Phase 10), so this is technically unreachable, BUT: + +- Phase 11 hooks/customizations may surface elicitations earlier than Phase 10 expects. +- A user-supplied CLAUDE.md in the cwd can configure plugins or settings that include an MCP server. + +Add a `cancel` stub so any incidental elicitation declines cleanly with a logged warn: + +```ts +// claudeAgent.ts (in _materializeProvisional Options) +onElicitation: async req => { + this._logService.info(`[Claude] declining elicitation from MCP server (Phase 7 stub): ${req.message ?? ''}`); + return { action: 'cancel' }; +}, +``` + +Promote to a real implementation in Phase 10 alongside the MCP gateway. + +### 3.8 `respondToPermissionRequest` / `respondToUserInputRequest` on `ClaudeAgent` + +Replace [claudeAgent.ts:785-790](claudeAgent.ts#L785) with the same iteration pattern used by [`copilotAgent.ts:1239-1254`](../copilot/copilotAgent.ts#L1239-L1254): + +```ts +respondToPermissionRequest(requestId: string, approved: boolean): void { + for (const session of this._sessions.values()) { + if (session.respondToPermissionRequest(requestId, approved)) { + return; + } + } + // Optional: log a warn for unknown requestIds. Returning silently + // matches CopilotAgent — the workbench treats both as "no-op" and + // the action is already idempotent at the reducer level. +} + +respondToUserInputRequest( + requestId: string, + response: SessionInputResponseKind, + answers?: Record, +): void { + for (const session of this._sessions.values()) { + if (session.respondToUserInputRequest(requestId, response, answers)) { + return; + } + } +} +``` + +Synchronous (return `void`) — matches the `IAgent` declaration at [agentService.ts:382-385](../../common/agentService.ts#L382). The actual SDK resumption happens on the deferred promise the session is parked on, which the workbench-driven dispatch flow already runs on the right async tick. + +## 4. Tool-name → `permissionKind` / `displayName` mapping + +`getClaudePermissionKind(toolName: string)` and `getClaudeToolDisplayName(toolName: string)` live in [claudeToolDisplay.ts](claudeToolDisplay.ts). The mapping is sourced from the SDK's built-in tool list ([sdk.d.ts: see `BUILTIN_TOOL_NAMES` constant if exported, otherwise enumerated here]) cross-referenced with the production extension's [`claudeTools.ts:35-67`](../../../../../../extensions/copilot/src/extension/chatSessions/claude/common/claudeTools.ts#L35) and the host's permissionKind enum at [agentService.ts:324](../../common/agentService.ts#L324). + +| Tool name | `permissionKind` | `displayName` | Notes | +|---|---|---|---| +| `Bash` | `shell` | `Run shell command` | `input.command` is the command line | +| `BashOutput` | `shell` | `Read shell output` | Reads buffered output of a backgrounded Bash | +| `KillBash` | `shell` | `Kill shell command` | Terminates a backgrounded Bash | +| `Read` | `read` | `Read file` | `input.file_path` | +| `Glob` | `read` | `Find files` | `input.pattern`, optional `input.path` | +| `Grep` | `read` | `Search files` | `input.pattern`, optional `input.path` | +| `LS` | `read` | `List directory` | `input.path` | +| `NotebookRead` | `read` | `Read notebook` | `input.notebook_path` | +| `Write` | `write` | `Write file` | `input.file_path` | +| `Edit` | `write` | `Edit file` | `input.file_path` | +| `MultiEdit` | `write` | `Edit file` | `input.file_path` | +| `NotebookEdit` | `write` | `Edit notebook` | `input.notebook_path` | +| `TodoWrite` | `write` | `Update todo list` | Internal SDK state | +| `WebFetch` | `url` | `Fetch URL` | `input.url` | +| `Task` | `custom-tool` | `Run subagent task` | Triggers Phase 12 subagent UX in the future | +| `ExitPlanMode` | `custom-tool` | `Exit plan mode` | Surfaces plan-review confirmation in production extension | +| `AskUserQuestion` | (special-cased — does not produce `pending_confirmation`) | `Ask user a question` | §3.5 | +| `` | `mcp` | `Run MCP tool ${stripped}` | Reserved for Phase 10 | +| `` | `custom-tool` | `${toolName}` | Defensive default | + +`extractPermissionPath(toolName, input)` mirrors the column above: + +```ts +export function extractPermissionPath(toolName: string, input: Record): string | undefined { + switch (toolName) { + case 'Read': + case 'Write': + case 'Edit': + case 'MultiEdit': { + const fp = input.file_path; + return typeof fp === 'string' ? fp : undefined; + } + case 'NotebookRead': + case 'NotebookEdit': { + const fp = input.notebook_path; + return typeof fp === 'string' ? fp : undefined; + } + case 'Glob': + case 'Grep': + case 'LS': { + const p = input.path; + return typeof p === 'string' ? p : undefined; + } + case 'WebFetch': { + const url = input.url; + return typeof url === 'string' ? url : undefined; + } + default: + return undefined; + } +} +``` + +`options.blockedPath` from the SDK takes precedence when present (the SDK populates it for tools that map to a single denied path). + +## 5. Test cases + +All new tests live in [claudeAgent.test.ts](../../test/node/claudeAgent.test.ts) unless noted. + +### 5.1 Test infrastructure changes + +- **`FakeQuery.setPermissionMode`** at [claudeAgent.test.ts:266](../../test/node/claudeAgent.test.ts#L266): stop throwing. Push to `recordedPermissionModes: PermissionMode[]`. +- **`FakeClaudeAgentSdkService`**: each entry of `capturedStartupOptions` already records the `Options` object verbatim. Tests can therefore call `capturedStartupOptions[0].canUseTool!(name, input, { toolUseID, signal: ..., suggestions: [], blockedPath })` directly. No new field needed. +- **Helpers** at the top of the file: + - `streamToolUseStart(index, toolUseId, name, turnId)` → `SDKMessage` of type `stream_event` with `event.type === 'content_block_start'`. + - `streamInputJsonDelta(index, partialJson, turnId)` → `content_block_delta` with `input_json_delta`. + - `streamContentBlockStop(index, turnId)` → `content_block_stop`. + - `userToolResultMessage(toolUseId, content, isError?)` → `SDKMessage` of type `user` with `message.content` containing a single `tool_result` block. +- **Replace** [claudeAgent.test.ts:797-832](../../test/node/claudeAgent.test.ts#L797): drop the `respondToPermissionRequest: TODO Phase 7` assertion. (`respondToUserInputRequest` was already not in the throw-list.) + +### 5.2 New unit tests + +Phrased as `assert.deepStrictEqual` snapshots over the captured `_onDidSessionProgress` event log unless the test specifically targets a single field. Per the workspace's testing guidelines: prefer one snapshot over many small assertions. + +1. **`canUseTool: deny stub is gone`.** Materialize a session, drive a `tool_use { name: 'Read' }` block through the stream, call `capturedStartupOptions[0].canUseTool` directly. Assert the call does NOT immediately deny — it parks on a deferred. Resolve via `agent.respondToPermissionRequest(toolUseId, true)` and assert the `canUseTool` promise resolves with `{ behavior: 'allow' }`. + +2. **`canUseTool: respondToPermissionRequest false → deny`.** As (1) but `false`. Result: `{ behavior: 'deny', message: 'User declined' }`. + +3. **`canUseTool: bypassPermissions auto-allows`.** Seed session config with `permissionMode: 'bypassPermissions'`. Drive `canUseTool` for any tool. Assert immediate `{ behavior: 'allow' }`, no `pending_confirmation` fired. + +4. **`canUseTool: acceptEdits auto-allows write tools, prompts shell`.** Seed `acceptEdits`. `Write` → immediate allow. `Bash` → `pending_confirmation` fired, parks on deferred. + +5. **`canUseTool: plan mode denies non-read`.** Seed `plan`. `Bash` → immediate deny. `Read` → `pending_confirmation`. + +6. **`canUseTool: live config win`.** Seed `default`. Run a `canUseTool` call (parks). Update config to `bypassPermissions` via `SessionConfigChanged`. Run a SECOND `canUseTool` call: assert immediate allow without firing `pending_confirmation`. (Validates the live re-read at §3.4.) + +7. **`pending_confirmation signal carries the correct shape`.** Drive a `Read { file_path: '/tmp/foo.txt' }`. Assert the captured signal is exactly: + ```js + { kind: 'pending_confirmation', session: , state: { status: 'pending-confirmation', toolCallId: , toolName: 'Read', displayName: 'Read file', invocationMessage: '...', toolInput: '{"file_path":"/tmp/foo.txt"}' }, permissionKind: 'read', permissionPath: '/tmp/foo.txt' } + ``` + +8. **`mapper emits SessionToolCallStart on tool_use block start`.** Stream `streamToolUseStart(0, 'tu_1', 'Read', turnId)`. Assert the captured action is `{ type: 'session/toolCallStart', toolCallId: 'tu_1', toolName: 'Read', displayName: 'Read file' }`. + +9. **`mapper emits SessionToolCallDelta on input_json_delta`.** Stream `streamToolUseStart(...)` then `streamInputJsonDelta(0, '{"file_pa', turnId)`. Assert the second action is `{ type: 'session/toolCallDelta', toolCallId: 'tu_1', content: '{"file_pa' }`. + +10. **`mapper emits SessionToolCallComplete on tool_result`.** After (8) and `content_block_stop`, push `userToolResultMessage('tu_1', 'file contents')`. Assert action is `{ type: 'session/toolCallComplete', toolCallId: 'tu_1', turnId: , result: { success: true, content: [{ type: 'text', text: 'file contents' }], pastTenseMessage: ... } }`. Verifies `toolCallTurnIds` cross-message linkage. + +11. **`mapper drops tool_result for unknown tool_use_id with warn`.** Push `userToolResultMessage('unknown_id', '...')` without a preceding `tool_use`. Assert no actions emitted, `logService.warn` called once. + +12. **`AskUserQuestion: surfaces SessionInputRequested, returns updatedInput`.** Drive `canUseTool('AskUserQuestion', { questions: [{ header: 'q1', question: 'Pick one?', options: [...] }] }, ...)`. Assert a `SessionInputRequested` action fires with one question. Resolve via `agent.respondToUserInputRequest(requestId, Accept, { [questionId]: { state: Done, value: { kind: Selected, value: 'option-a' } } })`. Assert the `canUseTool` promise resolves with `{ behavior: 'allow', updatedInput: { questions: [...], answers: { 'Pick one?': 'option-a' } } }`. + +13. **`AskUserQuestion: cancel returns deny`.** As (12) but respond with `Cancel`. Result: `{ behavior: 'deny', message: 'The user cancelled the question' }`. + +14. **`respondToPermissionRequest unknown id is silent`.** No session has the id. `agent.respondToPermissionRequest('nope', true)` returns void. No throw, no assertion. + +15. **`respondToUserInputRequest unknown id is silent`.** Same as (14) for user input. + +16. **`Query.setPermissionMode forwards on sendMessage`.** Send a first message (binds the Query). Update config to `acceptEdits`. Send a second message. Assert `FakeQuery.recordedPermissionModes === ['acceptEdits']` (only the second send forwards, since the first send seeded mode via `Options.permissionMode`). + +17. **`dispose with parked permission unblocks SDK`.** Drive `canUseTool` (parks). Call `agent.disposeSession(sessionUri)`. Assert the `canUseTool` promise resolves with `{ behavior: 'deny', message: '...' }` and the SDK's `for await` loop terminates without orphaning the deferred. Verifies §3.2 `_denyAllPending` ordering. + +18. **`Options.onElicitation stub returns cancel`.** Inspect `capturedStartupOptions[0].onElicitation`. Call it with a fake elicitation request. Assert `{ action: 'cancel' }`. + +### 5.3 Integration test (proxy-backed) + +Extend [claudeAgent.integration.test.ts](../../test/node/claudeAgent.integration.test.ts): + +- Stub `ICopilotApiService` to deliver a canned Anthropic stream that emits a `tool_use { name: 'Read', input: { file_path: '/tmp/x' } }` block, then waits for the `tool_result` to arrive on the upstream request, then emits a final assistant `text` block + `result`. +- Drive `agent.sendMessage(...)`, capture progress signals. +- Assert sequence: `Start(tool_call) → ResponsePart(text) → Start(tool_call=Read) → Delta(...) → pending_confirmation → respondToPermissionRequest(true) → Complete(tool_result) → ResponsePart(text=continuation) → SessionUsage → SessionTurnComplete`. + +(The host's `_translateToolCallSignal` injection of `SessionToolCallReady` lives outside the agent's emission stream, so the integration test asserts the agent-side emissions only.) + +## 6. Risks / gotchas + +1. **Mapper currently warns and drops `tool_use` ([claudeMapSessionEvents.ts:163-167](claudeMapSessionEvents.ts#L163-L167)).** That branch is the Phase 6 defense-in-depth for `canUseTool: deny`. Phase 7 must REPLACE it, not add alongside — leaving both paths means a `tool_use` would emit a `Start` AND log a warn. + +2. **`canUseTool` blocks the SDK's tool execution loop.** The SDK parks on the awaited `PermissionResult`. If the session is disposed mid-park, the Promise must still resolve or the SDK's `for await` won't terminate, leaking the subprocess. Mitigated by `_denyAllPending()` in dispose (§3.2). Test 17 covers this. + +3. **`Query.setPermissionMode` is only available after the first send.** [`sdk.d.ts: Query`](../../../../../../node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts) exposes `setPermissionMode` on the bound `Query` only — pre-bind, the field on `ClaudeAgentSession._query` is `undefined`. The call site in `sendMessage` runs INSIDE the sequencer queue (so AFTER `_materializeProvisional` returns and AFTER the first `entry.send` would bind), so the first turn seeds the mode via `Options.permissionMode`, and subsequent turns use `setPermissionMode`. The session's `setPermissionMode` short-circuits if `_query === undefined`. + +4. **Existing test asserts `respondToPermissionRequest` throws TODO Phase 7.** [claudeAgent.test.ts:797-832](../../test/node/claudeAgent.test.ts#L797) — must be removed in this phase or the suite fails. The new tests (5.2.1, 5.2.2, 5.2.14) take its place. + +5. **SDK auto-declines elicitations when `onElicitation` is absent.** Phase 7 has no MCP servers, but customizations and skills sourced via `settingSources` could still emit elicitations through the SDK's hook plumbing. Wire the `cancel` stub at materialize (§3.7) so the auto-decline is explicit and logged. Test 18 covers this. + +6. **`tool_use` block index reuse across messages.** The SDK's content-block index is per-message — a fresh `message_start` resets the counter. `activeToolBlocks` is per-message and cleared on `message_start` for parity with `currentBlockParts`. `toolCallTurnIds` is cross-message and keyed on the SDK's UUID `block.id` (globally unique), not the index. Test 6 (live mode) and test 10 (cross-message tool_result) cover both axes. + +7. **Synthetic user-message detection.** [sdk.d.ts:3489-3510](../../../../../../node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts#L3489) marks tool-result deliveries as `isSynthetic?: boolean` — present-and-true on emitters that bother. Older emitters omit it. Filter by content shape (any `user` message whose `content` array contains a `tool_result` block), NOT `isSynthetic`. The `tool_result` blocks themselves are what matter; the wrapping message can be anything. + +8. **`_handleCanUseTool` runs on the SDK's async tick, not the session sequencer.** The SDK invokes `canUseTool` from inside its own loop — it does NOT serialize with our `_sessionSequencer`. Two parallel tool calls in a single turn (the SDK does emit them) will race into `_handleCanUseTool` simultaneously. Each one looks up the session map, parks on a distinct `toolUseID`-keyed deferred, and resolves independently. No shared mutable state inside `_handleCanUseTool` itself, so this is fine. + +9. **`pending_confirmation` ordering vs `Start`/`Delta`.** The mapper emits `Start` and `Delta` from inside the `for await (message of query)` loop. `_handleCanUseTool` fires `pending_confirmation` from a separate async callback path. Both ultimately push into `_onDidSessionProgress` (a single `Emitter`), and `Emitter.fire` is synchronous — the order in which they reach the host is the order they're called. The SDK fires `canUseTool` AFTER the corresponding `content_block_stop`, so the order is: `Start` → `Delta`s → `pending_confirmation`. Verified by walking through the SDK's source path for tool-block delivery. + +## 7. Acceptance criteria + +1. The 18 new unit tests in §5.2 pass. Existing Phase-6 tests still pass. +2. The integration test in §5.3 exercises a one-tool round-trip end-to-end against the proxy. +3. `npm run compile-check-ts-native` reports zero errors. `npm run gulp compile-extensions` reports zero errors (no extension changes, but the agent platform shares declarations with extensions). +4. `npm run valid-layers-check` reports zero new layer violations. +5. **Live-system smoke run.** Phase 7 extends the existing live-smoke procedure documented at [smoke.md](smoke.md) — the canonical operator-driven E2E for `ClaudeAgent`, harnessed by [`launch-smoke.sh`](scripts/launch-smoke.sh) and [`verify-claude-logs.sh`](scripts/verify-claude-logs.sh). Smoke.md is keyed by phase; Phase 7 adds a row to the "When to run" table and a set of new log assertions to `verify-claude-logs.sh --phase=7`. The run produces a tool-call screenshot + log artifacts attached to the PR. + + **New Phase-7 row to add to smoke.md §1:** + + | 7 (tool calls + permission + user input) | Same as Phase 6 PLUS: a tool-using prompt fires `pending_confirmation`; approving it lands `SessionToolCallComplete` with the result; flipping `permissionMode → bypassPermissions` skips confirmation; an `AskUserQuestion` invocation surfaces the question carousel and answers reach the model. | + + **New Phase-7 assertions to add to `verify-claude-logs.sh --phase=7`:** + + 9. ≥ 1 `"type":"session/toolCall/start"` action in the IPC log (proves the mapper's §3.3.1 emission). + 10. ≥ 1 `"signal":"pending_confirmation"` envelope in the agent-host log (proves §3.4 fired). + 11. ≥ 1 `"type":"session/toolCall/complete"` action (proves the synthetic `user` `tool_result` round-trip in §3.3.4). + 12. **No fatal patterns** — extends §6 of smoke.md: + - `[ClaudeAgentSession] canUseTool callback parked on disposed session` (proves dispose ordering bug if it appears). + - `[claudeMapSessionEvents] tool_result for unknown tool_use_id` warn (proves cross-message lookup is broken if it appears outside Phase 13 replay). + 13. (Operator-driven) After a `bypassPermissions` round-trip, the agent-host log contains a `Query.setPermissionMode("bypassPermissions")` line and the next tool call has NO matching `pending_confirmation` envelope (proves §3.6 live-mode propagation). + + **New Phase-7 artifacts to capture in `/tmp/claude-smoke//`:** + + - `tool-actions.log` — sample `session/toolCall/start` / `session/toolCall/complete` envelopes + - `tool-confirm.png` — screenshot of the tool confirmation card pre-approval + - `tool-complete.png` — screenshot of the assistant response post-approval + - `bypass-mode.png` — screenshot proving no confirmation card on bypass + - `ask-user-question.png` — screenshot of the question carousel + + **Phase-7-specific operator script (uses the [`launch`](../../../../../../.github/skills/launch/SKILL.md) and [`code-oss-logs`](../../../../../../.github/skills/code-oss-logs/SKILL.md) skills):** + + 1. **Boot.** Run `./src/vs/platform/agentHost/node/claude/scripts/launch-smoke.sh 9224`. Wait for CDP port. Run `verify-claude-logs.sh --phase=7` to confirm the Phase-6 baseline still passes (registration / auth / proxy / models / no fatals). + 2. **Permission round-trip — approve.** Use the [`launch`](../../../../../../.github/skills/launch/SKILL.md) skill to attach Playwright, open the agent picker, select Claude (use `ArrowDown` + `Enter` per smoke.md §3 gotcha), and type `read package.json and tell me the name`. Wait ≥ 5s for the tool card to render. Snapshot. Verify a `Pick file Read` (or similar) confirmation card appears. Screenshot to `tool-confirm.png`. Approve. Snapshot again. Verify the assistant response includes the package name (e.g. `"code-oss-dev"`). Screenshot to `tool-complete.png`. + 3. **Verify the action stream.** Use the [`code-oss-logs`](../../../../../../.github/skills/code-oss-logs/SKILL.md) skill to read the agent-host log for the active window. Confirm the sequence `canUseTool` → `pending_confirmation` → `respondToPermissionRequest(approved=true)` → `tool_result` → `session/toolCall/complete` appears in order. Re-run `verify-claude-logs.sh --phase=7` and confirm checks 9–11 pass. + 4. **Permission round-trip — bypass.** Open the workbench Approvals dropdown, switch to `bypassPermissions`. Type `read README.md`. Snapshot. Verify NO confirmation card appears; the read result lands directly. Screenshot to `bypass-mode.png`. Re-run `verify-claude-logs.sh --phase=7` and confirm check 13 passes (the `setPermissionMode("bypassPermissions")` line is present and the post-bypass tool call has no `pending_confirmation`). + 5. **`AskUserQuestion` round-trip.** Switch back to `default` mode. Type `What should I do next? Use AskUserQuestion to give me three options.` Snapshot. Verify the question carousel renders. Pick an option. Verify the model receives the answer (the assistant's next response references the chosen option). Screenshot to `ask-user-question.png`. + 6. **Tear down.** `lsof -t -i :9224 | xargs -r kill`. Attach all five screenshots + `tool-actions.log` + the Phase-7 row in `verify-claude-logs.sh` output to the PR per smoke.md §7. + +6. The Phase-6 `canUseTool: deny` stub at [claudeAgent.ts:436-440](claudeAgent.ts#L436) is gone — `git grep "Tools are not yet enabled"` returns no matches. + +## 8. Phase 8+ contract notes + +- **Phase 8 (file edit tracking)** layers on top of `SessionToolCallComplete` for `Write`/`Edit`/`MultiEdit`. Phase 7's `getClaudePermissionKind('Write') === 'write'` and `extractPermissionPath('Write', input) === input.file_path` are the seam — Phase 8 reads them off `pending_confirmation` to allocate `resourceWrite` URIs and attach `edits: { items: FileEdit[] }` to the `pending_confirmation.state.edits` field (currently omitted in Phase 7). +- **Phase 9 (abort/steering)** uses the same `_pendingPermissions` map. `abortSession` will call `_denyAllPending()` then `_abortController.abort()` — Phase 7's `_denyAllPending()` is the underlying primitive. `Query.setPermissionMode` is also touched by Phase 9's plan-mode entry/exit hooks; the `setPermissionMode` method on `ClaudeAgentSession` from §3.2 is the Phase-9 hook point. +- **Phase 10 (client tools / MCP)** replaces the `onElicitation: cancel` stub from §3.7 with a real translation to the protocol's input request / pending tool call. The `getClaudePermissionKind('mcp__*')` rule from §4 is the Phase-10 entry point for routing. +- **Phase 11 (customizations)** adds tools sourced from CLAUDE.md / hooks / agent customizations. `getClaudePermissionKind` falls through to `'custom-tool'` for unknowns, so Phase 7 already handles them (deny/prompt) — Phase 11 just extends the display-name table. +- **Phase 12 (subagents)** uses `parentToolCallId` on `pending_confirmation` ([agentService.ts:333-340](../../common/agentService.ts#L333)). Phase 7 omits it (no Task-tool handling yet). When Phase 12 lands, `_handleCanUseTool` will inspect `input.subagent_type` and set `parentToolCallId` accordingly. The `Task` tool is in the §4 table as `custom-tool` for now. +- **Phase 13 (transcript reconstruction)** must populate `toolCallTurnIds` from disk replay so `tool_result` events delivered on session restoration can map back to the announcing `tool_use`'s turnId. The `IClaudeMapperState` design from §3.3 is the seam — replay drives the same mapper, hydrating the same maps. + +## 9. Decisions (grilling outcomes) + +The five candidates that survived the council fan-out were resolved during the grilling pass; the user opted into autonomous resolution. Recording the resolutions here so the implementing agent has the full reasoning trail. + +### 9.1 `AskUserQuestion` is visible in the transcript as a tool call + +**Decision.** Emit `SessionToolCallStart`, `SessionToolCallDelta`, and `SessionToolCallComplete` for `AskUserQuestion` from the mapper, exactly the same as any other tool. Skip ONLY the `pending_confirmation` signal — `_handleCanUseTool` short-circuits to the user-input round-trip (§3.5). + +**Why.** The protocol's tool-call card is the natural transcript artifact for "this happened in this turn". `SessionInputRequested` is an orthogonal answer-collection state, not a tool-progress state — they convey different information. Suppressing the tool-call entry would force Phase 13 transcript reconstruction to special-case the read side too, and would lose the record of which questions were asked and how the model received the answers. + +**Mapper-side implication.** No special branching for `AskUserQuestion` in the mapper. It treats every `tool_use` block uniformly. The branching happens entirely inside `_handleCanUseTool` (§3.4 step 1). + +**UX nuance to flag for the workbench.** When both a tool-call card and the question carousel are visible during the round-trip, the workbench may choose to visually collapse the tool-call card while the carousel is open. That's a workbench rendering concern, not an agent-host emission concern. + +### 9.2 `requiresResultConfirmation` is deferred to Phase 8 + +**Decision.** Phase 7 emits `SessionToolCallComplete` without `requiresResultConfirmation`. Phase 8 (file edit tracking, diff previews, per-file accept/reject) is the correct phase to add it. + +**Why.** The flag exists to gate the SDK from receiving the tool's output until the user reviews it ([actions.ts:418](../../common/state/protocol/actions.ts#L418)). The review surface is a diff renderer, which Phase 8 owns. Wiring the flag in Phase 7 without the diff plumbing creates a half-state where the workbench shows "approve result" UI without anything to approve. + +**Operational note.** Phase 7's `Write`/`Edit` tools still go through the standard `pending_confirmation` flow before execution (auto-approved under `acceptEdits`, prompted otherwise). They just don't gate the *result*. The model's view of the tool result is unchanged from Phase 6. + +### 9.3 `pastTenseMessage` ships generic in Phase 7 + +**Decision.** Phase 7 emits `pastTenseMessage: \`${displayName} finished\`` (e.g. `"Read file finished"`). Phase 8 refines per-tool ("Read package.json (240 lines)", "Wrote 12 lines to foo.ts", etc.). + +**Why.** Per-tool past-tense strings need access to the tool's *result* shape (line counts, diff summaries) — that data only enters the mapper alongside Phase 8's edit-tracking work. Forcing meaningful strings in Phase 7 means duplicating Phase-8-shape parsers in the mapper. The workbench has rendered generic past-tense strings since the Copilot agent shipped; nothing UX-critical depends on richer text in this phase. + +**`invocationMessage` parity.** Same posture: ship `\`${displayName}\`` for Phase 7. Phase 8's per-tool helpers will replace both at the same site. + +### 9.4 Wire the `onElicitation: cancel` stub + +**Decision.** Set `Options.onElicitation: async req => ({ action: 'cancel' })` at materialize time, with a `_logService.info` of the elicitation message and originating MCP server name. Phase 10 replaces the stub with a real translation. + +**Why.** The SDK's behaviour when `onElicitation` is absent is "auto-decline" — but it's not specified what telemetry is fired or what the user-visible result is. An explicit `cancel` with a log line gives us a known surface to debug from when Phase 11 customizations or Phase 10 MCP servers eventually fire elicitations through it. The cost is a single closure on `Options`. The benefit is observability when something unexpected fires. + +**Test 18 in §5.2** locks the stub's behaviour so a future SDK upgrade can't silently change it. + +### 9.5 `Query.setPermissionMode` rebinding is Phase 9's concern + +**Decision.** Phase 7 forwards live `permissionMode` via `Query.setPermissionMode(mode)` from `sendMessage` (§3.6). It does NOT track the previously-set mode or attempt to rebind on yield-restart — that flow doesn't exist yet. + +**Why.** Phase 9 owns yield-restart. When that lands, the rebind path will re-build `Options.permissionMode` from the live config (same path as initial materialize at §3.6) — no additional Phase-7 machinery needed. `ClaudeAgentSession.setPermissionMode` from §3.2 stays as-is; it short-circuits when `_query === undefined`, which is the post-restart state right before the next `sendMessage` rebinds it. + +**Risk acknowledged.** If Phase 9 lands a yield-restart that doesn't go through `_materializeProvisional`'s path (e.g. it re-uses `WarmQuery` and only rebinds `Query`), it'll need to seed permissionMode itself. Phase 9's plan should call this out in its own §3.6 equivalent. diff --git a/src/vs/platform/agentHost/node/claude/roadmap.md b/src/vs/platform/agentHost/node/claude/roadmap.md index a9ac6ce279b69..da51e4dfbb10e 100644 --- a/src/vs/platform/agentHost/node/claude/roadmap.md +++ b/src/vs/platform/agentHost/node/claude/roadmap.md @@ -407,29 +407,56 @@ a Claude model, but can't yet send a message. ### Phase 5 — Session lifecycle: create / dispose / list / shutdown Implement the lifecycle methods that don't require live LLM traffic. +**Provisional / materialize is the load-bearing model in this phase** +(CONTEXT M9): `createSession` returns a session URI synchronously **without** +spawning an SDK subprocess and **without** writing the on-disk session +file (the JSONL sidecar). `IAgentCreateSessionResult.provisional: true` +tells `AgentService` to defer the `sessionAdded` notification and the +`SessionReady` lifecycle dispatch. The session materializes on first +`sendMessage` (Phase 6); `IAgent.onDidMaterializeSession` then fires and +`AgentService` flushes the deferred notifications. Provisional records +are therefore **invisible to other workbench clients** until materialised. - `createSession(config)` — allocate a fresh UUID `sessionId`, construct the URI via `AgentSession.uri(this.id, sessionId)`, construct a `ClaudeAgentSession` (new file `node/claude/claudeAgentSession.ts`). - Persist minimal session metadata via `ISessionDataService`. Do **not** - start the SDK yet — that happens lazily on first `sendMessage`. -- **Honor `IAgentCreateSessionConfig.fork`** - (`agentService.ts:161–173`): when `config.fork` is set, route through the - SDK's `forkSession(sourceSessionId, options)` (top-level SDK function from - `claudeCodeSdkService.ts:57, 121–124`) to mint a new session ID, build the - URI from the new ID, and persist the `turnIdMapping`. + Return `{ session, workingDirectory, provisional: true }`. **Do not** + write the JSONL sidecar yet — the session-data DB row is the only + pre-materialise persistence (Phase 5's `_provisionalSessions` map carries + the in-memory state). The SDK starts lazily on first `sendMessage` + (Phase 6). +- **`createSession({ fork })` — deferred.** The fork branch throws + `TODO: Phase 6.5` with no side effects. See "Phase 6.5 — Fork (deferred)" + below for the structural reason, the reverted attempt, and the deferred + plan that lands alongside Phase 13's result-message mapper. - `disposeSession(session)` — tear down the session's `Query` (if alive), - MCP gateway, in-flight aborts. + MCP gateway, in-flight aborts. Provisional sessions dispose by removing + the in-memory record (no SDK / sidecar to clean up). - `listSessions()` — `IAgent.listSessions()` returns `Promise` (`agentService.ts:394`). Call SDK `listSessions()` with `dir` undefined (across all projects), map each - `SDKSessionInfo` → `IAgentSessionMetadata`. + `SDKSessionInfo` → `IAgentSessionMetadata`. **Sidecar policy:** the + per-session `.session.json` sidecar (when present) is a *best-effort + enrichment* layer — it is read to fill in `customizationDirectory` and + similar host-only fields, but **not used as a filter** (CONTEXT M12). + Sessions without a sidecar are still listed; sessions whose sidecar is + malformed are still listed with the host-only fields cleared. Provisional + sessions are intentionally absent until materialised. - `getSessionMessages(session)` — empty stub for now; full implementation in Phase 13. +- `getSessionMetadata?(session)` (optional `IAgent` method) — returns the + enriched metadata for a single session, including sidecar fields. Phase + 5 implements the read path; Phase 11 wires writes ( + `setMetadata('claude.customizationDirectory', …)`). - `resolveSessionConfig` / `sessionConfigCompletions` — schema for - Claude-specific session knobs (model, working directory). + Claude-specific session knobs (model, working directory). Per-field + metadata flags (`sessionMutable`, etc.) come from CONTEXT M12; today the + IAgent protocol exposes no generic live-edit setter, so `sessionMutable` + fields whose values change mid-session round-trip as a fresh + `createSession` (a restart) until the protocol grows that surface. - **`shutdown()`** — gracefully close every active `Query`, dispose the - proxy, drain in-flight requests. + proxy, drain in-flight requests. Provisional sessions are dropped from + the in-memory map (no I/O). **Read-through cache for the transcript** lands here as a seam: @@ -441,13 +468,17 @@ Implement the lifecycle methods that don't require live LLM traffic. swapped in without touching `ClaudeAgentSession`. Tests: create a session, list it (including externally-created), get its -(empty) messages, dispose it, verify it's gone from `listSessions`. Fork -via `createSession({ fork })` produces a new URI with the right -`turnIdMapping`. `shutdown()` is idempotent and cancels in-flight work. - -Exit criteria: sessions can be created (including via fork) and persisted; -restarts find them; externally-created Claude Code sessions appear; agent -host can shut down cleanly. +(empty) messages, dispose it, verify it's gone from `listSessions`. +`shutdown()` is idempotent and cancels in-flight work. +`createSession({ fork })` throws `TODO: Phase 6.5` with no side effects. +A provisional session is **not** visible to a second `listSessions` caller +until materialised. + +Exit criteria: sessions can be created (non-fork) provisionally and +materialised on first `sendMessage` (Phase 6 owns the materialise edge); +restarts find materialised sessions; externally-created Claude Code +sessions appear; agent host can shut down cleanly. Fork is deferred to +Phase 6.5. ### Phase 6 — `sendMessage` + streaming progress events (single-turn, no tools) @@ -534,6 +565,109 @@ canned Anthropic stream → verify the resulting `AgentSignal` sequence. Exit criteria: a workbench client sends "hi" and sees a streamed assistant response in the UI. +### Phase 6.5 — Fork (deferred — depends on Phase 13's result-message mapper) + +> **Status:** attempted, fully reverted, deferred. `createSession({ fork })` +> currently throws `TODO: Phase 6.5` at +> [`claudeAgent.ts:303`](./claudeAgent.ts) with no side effects. +> +> **Sequencing note:** numbered 6.5 to stay consistent with the throw +> message and `phase6-plan.md` §8.1, but **executes after Phase 13** because +> the clean fix shares Phase 13's result-message mapper. + +**Why deferred — structural mismatch.** Copilot's fork path +([`copilotAgent.ts:660-714`](../copilot/copilotAgent.ts)) calls +`sourceEntry.getNextTurnEventId(turnId) → toEventId`, an O(1) primitive the +Copilot SDK provides natively. The Claude SDK has no equivalent: `forkSession` +takes `upToMessageId` — an SDK message UUID, INCLUSIVE per +[`sdk.d.ts:558`](../../../../../../node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts) — +not a protocol turn id, and offers no helper to translate one to the other. + +The agent-host needs that translation. Workbench's +[`agentHostSessionHandler.ts:2167`](../../../../workbench/contrib/chat/browser/agentSessions/agentHost/agentHostSessionHandler.ts) +passes `turnId` for the **last KEPT turn N** ("keep `[0..N]` INCLUSIVE"); the +SDK wants the *uuid of the last SDK message of turn N*. The Claude +extension's +[`claudeChatSessionContentProvider.ts:341`](../../../../../../extensions/copilot/src/extension/chatSessions/vscode-node/claudeChatSessionContentProvider.ts) +sidesteps this because its UI semantic is "fork BEFORE request X" (EXCLUSIVE) +and it uses request-id directly as the SDK uuid via `messageIndex - 1`. The +agent-host can't do that — its inputs are *protocol turn ids*, not message +uuids — and no on-disk primitive exists for the mapping in either the SDK's +JSONL transcript or our session-data DB. + +**What was attempted and reverted (do-not-redo).** An in-fork heuristic that +forward-scanned the SDK's JSONL transcript past `type:'user'` tool-result +frames (an `_isGenuineUserRequest` predicate skipped mid-turn tool replies) +until the next genuine user request, then took the last `type:'assistant'` +before that as the fork anchor. The attempt also threaded protocol +`turnId → SDKUserMessage.uuid` (mirroring +[`claudeCodeAgent.ts:569`](../../../../../../extensions/copilot/src/extension/chatSessions/claude/node/claudeCodeAgent.ts)) +and routed the new session id through `Options.resume`. The heuristic worked +today but is non-contractual: it relies on the SDK packing tool-results into +`type:'user'` with pure tool_result content arrays, on the absence of +turn-ending mid-tool states, and on subagents living in separate +`agent-*.jsonl` files. Any of those could change in an SDK version bump and +silently break fork. **Decision:** revert and ship the contract-based +solution backed by Phase 13's mapper. + +**Chosen approach (when this phase lands):** + +- **Persist `protocolTurnId → lastSdkMessageUuid`** on result-message ingest + inside the same handler Phase 13 builds for transcript reconstruction. + That mapper sees every SDK message in order and observes `type:'result'` + (the only contract-level turn-end signal); it is the right place to + capture the mapping without inference. +- Store the mapping in the session-data DB alongside the existing + per-session metadata. Fork becomes O(1) DB lookup, no JSONL walk. +- `createSession({ fork })` then calls + `sdk.forkSession(srcId, { upToMessageId: })` and routes + the new session id through `Options.resume` so the SDK loads the forked + transcript. +- Persist the customization-directory metadata via `setMetadata` on the + forked session (mirrors + [`copilotAgent.ts`](../copilot/copilotAgent.ts)). +- Pre-existing on-disk sessions need a one-time backfill (best-effort, the + reverted heuristic is acceptable here since it's a one-shot operation + on archived data). + +**Dependencies:** + +- **Hard:** Phase 13 (transcript reconstruction + result-message mapper). + Cannot land until the mapper has a stable hook for + `turnId → lastSdkMessageUuid` capture on every result-message ingest, + including replay during session restoration so pre-existing on-disk + sessions get the mapping backfilled. +- **Soft:** Phase 13's `IClaudeSessionTranscriptStore` (the Phase-5 seam) is + the natural place to surface the mapping to `ClaudeAgent`. + +**Architectural model:** Copilot's `getNextTurnEventId(turnId)`. Phase 6.5 +is the Claude-side polyfill of that primitive — persisted by us because the +Claude SDK doesn't provide it. + +**Materialisation note.** Unlike non-fork `createSession` (Phase 5/6's +provisional path), `forkSession` writes the forked SDK transcript file +synchronously. Fork therefore *eagerly* fires `onDidMaterializeSession` +from inside `createSession`, before returning — there is no provisional +state to defer. The host's contract for fork is "materialise immediately, +no separate sendMessage edge" (CONTEXT M9). + +**Workbench client behavior in the interim.** The agent-host contract today +is "fork rejects, no side effects" — any client invocation surfaces the +throw as a session-creation error. Whether the workbench should hide or +disable the fork affordance for Claude sessions until this phase lands is +TBD with the workbench owners. + +Tests (when this phase lands): unit tests for the mapping ingest (turn end +→ persisted row), unit tests for `createSession({ fork })` looking up the +mapping and calling `forkSession` with the right uuid, integration test +parallel to Copilot's fork tests (create → N turns → fork at N-1 → +new turn on fork → verify prefix turns intact). + +Exit criteria: fork is contract-based (no JSONL shape inference), works +with restored sessions, and honors the workbench's "keep `[0..N]` +INCLUSIVE" semantic. The reverted heuristic is **not** retained behind a +flag. + ### Phase 7 — Tool calls + permission + user input Wire the SDK's tool-use loop through to the agent host's tool infrastructure. @@ -541,17 +675,37 @@ Wire the SDK's tool-use loop through to the agent host's tool infrastructure. - Map Claude's tool-use events → `AgentSignal` tool-call request parts. - Map tool-result events → tool-result response parts. -- **`respondToPermissionRequest`** — gate tool execution like the Copilot - agent does. Wire through SDK's permission callback / `canUseTool`. -- **`respondToUserInputRequest`** (`agentService.ts:384–385`) — handle the - SDK's user-input / `ask_user` flow. Forward client-provided answers back - to the SDK. +- **Tool-call → turn attribution.** Maintain a per-session + `Map` populated when the assistant message + carrying the `tool_use` block streams in. The map is consulted from + every Phase-7 tool-related callback (`canUseTool`, elicitation + handlers, tool-result emission, file-edit tracking in Phase 8) so each + signal carries the protocol turn id of the request that scheduled it. + Mirrors CONTEXT M2 / M3 / M7 — the SDK never re-states the turn id on + per-block events, so the host owns the binding. +- **`respondToPermissionRequest` — dual-routing in `canUseTool`.** The + SDK's `canUseTool(name, input)` callback fires for every tool, but only + *some* tools should round-trip through the workbench client; the rest + are auto-allowed (or denied) by the host without an elicitation. Maintain + an `INTERACTIVE_CLAUDE_TOOLS` set (read, write, edit, bash, etc.) and + branch: + - Interactive tool → emit `AgentSignal` permission-request part keyed + on the tool_use_id (lookup turn id via the attribution map), wait + for the client's `respondToPermissionRequest`, return the matching + SDK verdict. + - Non-interactive tool → immediate auto-allow (or deny per policy); + no client round-trip, no signal emitted. + Mirror the routing with `respondToUserInputRequest` for the SDK's + `ask_user` / elicitation flow (`agentService.ts:384–385`) — same + attribution-map lookup. - Mirror per-tool annotations (`Read`, `Write`, `Edit`, `Bash`, etc.) so the client can render them. Tests: a session that asks for `Read`, gets prompted, approves, sees the tool output streamed back. A session that triggers an `ask_user` request, -client responds, SDK continues. +client responds, SDK continues. A session that fires a non-interactive tool +emits no permission signal. Each emitted signal carries the correct turn +id via the attribution map. Exit criteria: a real "read this file" prompt completes end-to-end. @@ -583,36 +737,95 @@ works. ### Phase 9 — Abort + steering + model change + shutdown polish +Every runtime mutation in this phase classifies into one of M11's three +buckets — **hot-swap**, **defer-and-coalesce**, or **restart-required** +(see [CONTEXT.md M11](./CONTEXT.md) "Hot-swap / defer-and-coalesce / +restart-required taxonomy"). The prompt iterable's yield boundary is the +only mutation barrier; agents synchronise all three buckets at that point. + - **`abortSession`** — cancel the underlying SDK turn via `_abortController.abort()`, matching the production reference. Phase 9 may experiment with `Query.interrupt()` if the abort path turns out to orphan the subprocess, but the default plan is the AbortController route the extension already proves works. Propagates through SDK → proxy → `ICopilotApiService`. -- **Steering / `setPendingMessages`** — use `Query.streamInput()` to push - additional `SDKUserMessage`s mid-turn. -- `changeModel` — `Query.setModel()` on the live `Query`. Resolve the new - model ID through the proxy's resolver first. -- `setPermissionMode` (internal SDK concern, not a protocol method) — - `Query.setPermissionMode()` on the live `Query`. Permission mode changes - are driven by SDK events (`EnterPlanMode`/`ExitPlanMode` tools), not - by direct `IAgent` method calls. Wire accordingly - (see `claudeCodeAgent.ts:174–181` for the reference). +- **Steering / `setPendingMessages`** — yield an `SDKUserMessage` with + `priority: 'now'` into the *existing* prompt iterable that was passed to + `query()`. The SDK's `'now'`-priority watcher aborts the in-flight turn + and dequeues the steering message next. **Do NOT use + `Query.streamInput()`** — the production reference has zero callers; the + prompt iterable absorbs that role (CONTEXT M10). `sendMessage`-originated + messages stay on `priority: 'next'` (or unset — `'next'` is the SDK + default); steering is the one path that escalates to `'now'`. Emit + `IAgentSteeringConsumedSignal` only when the SDK echoes the + `'now'`-priority message on the event stream (model has *seen* it), not + when the iterable's `yield` resolves (queue acceptance). +- **`changeModel` — bundle-atomic hot-swap.** A single call carries + `ModelSelection.id` *and* the model's per-model config bag (today: + `config.effort`). Apply the diff at the next yield boundary by fanning + out to one or more SDK runtime setters: + - `id` changed → `Query.setModel(sdkId)`. + - `config.effort` changed → `Query.applyFlagSettings({ effortLevel })`. + **Clamp** at the seam: `Options.effort` allows `'max'` but the + runtime setter does not (CONTEXT M11 effort-clamp table). Mid-session + `'max'` selections silently degrade to `'xhigh'` on the runtime path. + Genuine `'max'` mid-session requires the **restart-required** path + (close the `Query`, spawn a new one with `Options.effort: 'max'`). + - Both changed → both setters at the same yield boundary, in + agent-defined order. + Restart preserves bijective state: when the agent restarts the `Query` + for any reason (yield-restart, customization-tools-diverge, etc.), it + re-applies the stored bijective values (`_currentModel`, + `_currentPermissionMode`, `_currentEffort`) so the user-visible config + stays continuous. +- **`Query.setPermissionMode()` is reachable but not protocol-exposed.** + Permission mode is `sessionMutable: true` in the M12 schema and + bijective in M11, but the IAgent protocol has **no generic live-edit + setter** today (CONTEXT M12: "protocol surface for routing an arbitrary + live config edit back into the running session is **TBD**"). Until the + generic setter lands, a client mid-session edit of `permissionMode` + round-trips as a fresh `createSession` with the new bag (a restart) — + not as a `setPermissionMode` RPC. The SDK-internal driver + (`EnterPlanMode` / `ExitPlanMode` tools — see `claudeCodeAgent.ts:174–181` + for the reference) is wired regardless because it does not require an + IAgent surface. - **Yield-restart** mechanism (port from `claudeCodeAgent.ts`): when settings files change or tool set changes mid-turn, drain the current - generator and restart via `resume: sessionId`. + generator and restart via `resume: sessionId`. This is the + **restart-required** bucket; bijective state is re-applied on the new + `Query` to keep the user-visible config continuous (see `changeModel` + above). - **Subprocess crash recovery** — if the SDK subprocess dies mid-turn, surface to the client as a turn error and mark the session ready for a fresh `_startSession` on the next `sendMessage`. -Tests: abort mid-stream releases the proxy's HTTP connection, steering lands -in the next turn, model swap takes effect, killed subprocess triggers -recovery. +Tests: abort mid-stream releases the proxy's HTTP connection, steering +(`priority: 'now'` yield) preempts the in-flight turn and emits +`steering_consumed` after model visibility, model+effort swap fires both +SDK setters at the same yield boundary, `'max'` mid-session demotes to +`'xhigh'` on the runtime path (and reaches genuine `'max'` only via +restart), killed subprocess triggers recovery. Exit criteria: parity with Copilot agent on stop / steer / switch model. ### Phase 10 — Client-provided tools (in-process MCP) +The Claude SDK exposes **two distinct MCP entry points** that classify into +different M11 buckets — do not conflate them: + +1. **In-process tools → `createSdkMcpServer` + `Options.mcpServers`** — + defined at `query()` start, **immutable for the life of the `Query`** + (CONTEXT M11). Any change to the in-process tool list is + **restart-required**: yield-restart via `resume: sessionId` so the next + `Query` is started with the new `mcpServers` bag. This is the path + client-provided tools take. +2. **External MCP servers → `Query.setMcpServers(...)`** — a runtime SDK + setter, in M11's **hot-swap** bucket (bijective; no restart). External + server additions / removals (when we surface them) flow through here, + not through restart. + +The restart-required path: + - `setClientTools(session, clientId, tools)` — convert the protocol's `ToolDefinition[]` into SDK MCP tool definitions via SDK's `tool(name, description, zodSchema, handler)`, wrap in `createSdkMcpServer`, pass via @@ -621,14 +834,18 @@ Exit criteria: parity with Copilot agent on stop / steer / switch model. (uses `tool()` / `createSdkMcpServer`). **Not** `claudeCodeAgent.ts`. - The handler is a deferred promise that the host resolves when `onClientToolCallComplete` delivers the result. -- **Per-query MCP server recreation** — recreate from current tool list on +- **Per-query MCP server recreation** — because in-process `mcpServers` + are immutable on a live `Query`, recreate from the current tool list on each `_startSession` / yield-restart. Tools changing between turns triggers a yield-restart (mirror `_toolsMatch` from `claudeCodeAgent.ts`). + Phase 11's `reloadPlugins` does **not** help here — plugins are + orthogonal to client-provided tool servers. - **MCP gateway lifecycle** — port the `_gateway` + `_gatewayIdleTimeout` pattern: gateway disposed after N seconds of idle to release resources. Tests: a client registers a custom tool, the agent invokes it via a Claude -prompt, result returns to the client and is fed back into the SDK. +prompt, result returns to the client and is fed back into the SDK; tool +list diff between turns triggers yield-restart, not in-place mutation. Exit criteria: client tools callable from a Claude session. @@ -649,10 +866,17 @@ Exit criteria: client tools callable from a Claude session. (`agentService.ts:439`) for progressive loading UI. - Pass the local paths as `options.plugins: [{ type: 'local', path }, ...]` on the next `query()` call. -- **Restart-on-toggle** flag (`_pendingRestart` from `claudeCodeAgent.ts`): - customization toggles mark the session for restart before the next - `sendMessage`. -- `setCustomizationEnabled(uri, enabled)` — flips the flag. +- **`setCustomizationEnabled(uri, enabled)` — defer-and-coalesce, NOT + restart.** Set `_pendingPluginReload`; at the next yield boundary, call + `Query.reloadPlugins()` (a cheap runtime SDK setter — bijective per + M11). `reloadPlugins` is in M11's **defer-and-coalesce** bucket, not + restart-required: the running subprocess stays up. Only when the *tool + set* implied by the new plugin list diverges from the live one do we + fall back to the **restart-required** path (yield-restart via + `resume: sessionId`); that's the narrow `_toolsMatch` case from + `claudeCodeAgent.ts`, not the default. The misnamed `_pendingRestart` + flag from the reference impl is a historical artifact — the canonical + taxonomy treats plugin reload as cheap. **Outbound (SDK → host) — required for Copilot parity (`agentService.ts:399–417`):** @@ -664,10 +888,13 @@ Exit criteria: client tools callable from a Claude session. Tests: client provides a customization → agent syncs it → next `query()` includes the local path → SDK init message confirms the plugin loaded; -customization toggle triggers restart; published events fire correctly. +customization toggle drains via `reloadPlugins` at the next yield (no +subprocess restart) and the new plugin appears in `available_plugins`; a +tool-set diff *does* trigger yield-restart; published events fire correctly. -Exit criteria: customization round-trip works; workbench renders Claude -customizations like Copilot's. +Exit criteria: customization round-trip works; toggle is defer-and-coalesce +by default and restart-required only when tool sets diverge; workbench +renders Claude customizations like Copilot's. ### Phase 12 — Subagents @@ -713,19 +940,27 @@ Exit criteria: subagent sessions are first-class for clients. (`agentService.ts:430`), so we omit it and document: - Clients wanting truncate-like behavior use `createSession({ fork: { session, turnIndex, turnId, turnIdMapping } })` - (Phase 5), which legitimately mints a new session URI. + (Phase 6.5 — currently deferred; until that lands, the fork branch + throws and the workbench surfaces a session-creation error). - The workbench should follow the new URI, just like for any other fork. - Adding in-place truncate later would require a URI→sessionId mapping layer; we'd revisit when there's user demand. -- Session forking via `createSession({ fork })` is already covered in - Phase 5; this phase verifies the round-trip with persisted state. +- **`turnId → lastSdkMessageUuid` ingest** lands here as the prerequisite + for Phase 6.5. The result-message mapper persists a + `(turnId, lastAssistantMessageUuid)` pair in session metadata on every + completed turn; replay during session restoration backfills pre-existing + on-disk sessions. This is the contract-level primitive that Phase 6.5 + consumes for fork — see "Phase 6.5 — Fork (deferred)" above. Tests: persist a session, restart the agent host, reload the session, -verify turns are intact and a new turn appends correctly. Fork via -`createSession({ fork })` produces a new URI with the prefix turns intact. +verify turns are intact and a new turn appends correctly. Verify +`turnId → lastSdkMessageUuid` rows are persisted for each completed turn +on live ingest, and that session restoration replays the mapper to +backfill them. -Exit criteria: agent-host restart is invisible; fork works; truncate is -documented as fork-by-another-name. +Exit criteria: agent-host restart is invisible; the turn-mapping ingest +is validated; truncate is documented as fork-by-another-name. Fork +end-to-end ships in Phase 6.5 (deferred to land alongside this phase). ### Phase 14 — Hardening + telemetry @@ -812,8 +1047,9 @@ native dependency is packaged in all production builds. Claude Agent SDK and proxy beneath it; we re-use `@anthropic-ai/sdk` types. - **In-place `truncateSession`.** SDK's `forkSession` always mints a new - session ID. Clients use `createSession({ fork })` for truncate-like - effect; we revisit if there's demand. + session ID. Clients will use `createSession({ fork })` for truncate-like + effect once Phase 6.5 lands; we revisit in-place truncate if there's + demand. - **File rewind as part of `truncateSession`.** Per-file undo is exposed via `resourceRead` / `resourceWrite` URIs (Phase 8). - **Custom subprocess sandboxing** via `spawnClaudeCodeProcess`. The Agent diff --git a/src/vs/platform/agentHost/node/copilot/copilotAgent.ts b/src/vs/platform/agentHost/node/copilot/copilotAgent.ts index b508666d6ac02..8ded0d56227fb 100644 --- a/src/vs/platform/agentHost/node/copilot/copilotAgent.ts +++ b/src/vs/platform/agentHost/node/copilot/copilotAgent.ts @@ -84,6 +84,16 @@ const ThinkingLevelConfigKey = 'thinkingLevel'; const ReasoningEfforts = ['low', 'medium', 'high', 'xhigh'] as const; type ReasoningEffort = NonNullable; +export const COPILOT_AGENT_HOST_SYSTEM_MESSAGE = { + mode: 'customize', + sections: { + identity: { + action: 'replace', + content: 'You are an AI assistant using Copilot CLI runtime in VS Code. You help users with software engineering tasks. When asked about your identity, you must state that you are an AI assistant using Copilot CLI runtime in VS Code.', + }, + }, +} satisfies NonNullable; + interface ISerializedModelSelection { id?: unknown; config?: unknown; @@ -139,6 +149,28 @@ export function getCopilotWorktreeBranchName(sessionId: string, branchNameHint: return `agents/${branchNameHint ? `${branchNameHint}-${sessionId.substring(0, 8)}` : sessionId}`; } +/** + * Derive a slug-style branch-name hint from the user's first message. Used + * by the worktree isolation flow so the generated branch name reflects the + * intent of the session instead of being just a session id. + * + * Returns `undefined` if the message has no slug-able content (e.g. only + * punctuation), in which case the caller falls back to a session-id-only + * branch name. + */ +export function getCopilotBranchNameHintFromMessage(message: string): string | undefined { + const words = message + .toLowerCase() + .normalize('NFKD') + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, '') + .split('-') + .filter(word => word.length > 0) + .slice(0, 8); + const hint = words.join('-').slice(0, 48).replace(/-+$/g, ''); + return hint.length > 0 ? hint : undefined; +} + /** * Builds the localized "Created isolated worktree for branch X" markdown * shown at the top of the first response in worktree-isolated sessions. @@ -761,7 +793,7 @@ export class CopilotAgent extends Disposable implements IAgent { * `SessionConfigChanged` actions that arrived after `createSession` are * honoured without bespoke forwarding. */ - private async _materializeProvisional(sessionId: string): Promise { + private async _materializeProvisional(sessionId: string, prompt: string): Promise { const provisional = this._provisionalSessions.get(sessionId); if (!provisional) { throw new Error(`Cannot materialize unknown provisional session: ${sessionId}`); @@ -781,7 +813,7 @@ export class CopilotAgent extends Disposable implements IAgent { const customizationDirectory = provisional.workingDirectory; const activeClient = this._activeClients.get(sessionUri); const snapshot = activeClient ? await activeClient.snapshot(customizationDirectory) : undefined; - const workingDirectory = await this._resolveSessionWorkingDirectory(materializedConfig, sessionId); + const workingDirectory = await this._resolveSessionWorkingDirectory(materializedConfig, sessionId, prompt); const shellManager = this._instantiationService.createInstance(ShellManager, sessionUri, workingDirectory); const sessionConfigBuilder = this._buildSessionConfig(snapshot, shellManager); @@ -920,10 +952,12 @@ export class CopilotAgent extends Disposable implements IAgent { await this._sessionSequencer.queue(sessionId, async () => { // First message on a provisional session: materialize the SDK - // session, worktree, and on-disk metadata before continuing. + // session, worktree, and on-disk metadata before continuing. The + // prompt is forwarded so a worktree-isolated session can derive + // its branch-name hint from the user's first message. let entry: CopilotAgentSession | undefined; if (this._provisionalSessions.has(sessionId)) { - entry = await this._materializeProvisional(sessionId); + entry = await this._materializeProvisional(sessionId, prompt); } else { entry = this._sessions.get(sessionId); } @@ -1342,6 +1376,7 @@ export class CopilotAgent extends Disposable implements IAgent { mcpServers: toSdkMcpServers(plugins.flatMap(p => p.mcpServers)), customAgents, skillDirectories: toSdkSkillDirectories(plugins.flatMap(p => p.skills)), + systemMessage: COPILOT_AGENT_HOST_SYSTEM_MESSAGE, tools: [...shellTools, ...callbacks.clientTools], // Enable infinite sessions so the SDK provisions a workspace // directory (containing `plan.md`, `checkpoints/`, `files/`). @@ -1430,7 +1465,7 @@ export class CopilotAgent extends Disposable implements IAgent { return this._gitService.getBranches(workingDirectory, { query, limit: CopilotAgent._BRANCH_COMPLETION_LIMIT }); } - protected async _resolveSessionWorkingDirectory(config: IAgentCreateSessionConfig | undefined, sessionId: string): Promise { + protected async _resolveSessionWorkingDirectory(config: IAgentCreateSessionConfig | undefined, sessionId: string, prompt?: string): Promise { if (config?.config?.isolation !== 'worktree' || !config.workingDirectory || typeof config.config.branch !== 'string') { return config?.workingDirectory; } @@ -1441,8 +1476,7 @@ export class CopilotAgent extends Disposable implements IAgent { } const worktreesRoot = getCopilotWorktreesRoot(repositoryRoot); - const branchNameHintRaw = config.config[SessionConfigKey.BranchNameHint]; - const branchNameHint = typeof branchNameHintRaw === 'string' ? branchNameHintRaw : undefined; + const branchNameHint = prompt ? getCopilotBranchNameHintFromMessage(prompt) : undefined; const branchName = getCopilotWorktreeBranchName(sessionId, branchNameHint); const worktree = URI.joinPath(worktreesRoot, getCopilotWorktreeName(branchName)); await fs.mkdir(worktreesRoot.fsPath, { recursive: true }); diff --git a/src/vs/platform/agentHost/test/common/claudeModelConfig.test.ts b/src/vs/platform/agentHost/test/common/claudeModelConfig.test.ts new file mode 100644 index 0000000000000..6ee9f280f9e2e --- /dev/null +++ b/src/vs/platform/agentHost/test/common/claudeModelConfig.test.ts @@ -0,0 +1,182 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import assert from 'assert'; +import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../base/test/common/utils.js'; +import { CLAUDE_THINKING_LEVEL_KEY, createClaudeThinkingLevelSchema, isClaudeEffortLevel, resolveClaudeEffort, type ClaudeEffortLevel } from '../../common/claudeModelConfig.js'; +import type { ModelSelection } from '../../common/state/protocol/state.js'; + +suite('resolveClaudeEffort (Phase 6.1 / Cycle E)', () => { + + ensureNoDisposablesAreLeakedInTestSuite(); + + test('returns the SDK enum value for each accepted thinkingLevel string', () => { + const accepted = ['low', 'medium', 'high', 'xhigh', 'max'] as const; + const actual = accepted.map(level => resolveClaudeEffort({ + id: 'claude-opus-4.6', + config: { [CLAUDE_THINKING_LEVEL_KEY]: level }, + })); + assert.deepStrictEqual(actual, ['low', 'medium', 'high', 'xhigh', 'max']); + }); + + test('returns undefined for absent / unrecognized inputs (SDK default takes over)', () => { + // Each input represents a real failure mode the materialize site can + // hit: no model picked, model with no config bag, model with empty + // config bag, model with config but no thinkingLevel key, and a model + // whose thinkingLevel string is outside the union. All five must + // degrade to `undefined` so the SDK falls through to its own default + // instead of being told to use a value it doesn't understand. + const cases: readonly (ModelSelection | undefined)[] = [ + undefined, + { id: 'claude-opus-4.6' }, + { id: 'claude-opus-4.6', config: {} }, + { id: 'claude-opus-4.6', config: { unrelated: 'high' } }, + { id: 'claude-opus-4.6', config: { [CLAUDE_THINKING_LEVEL_KEY]: 'turbo' } }, + ]; + assert.deepStrictEqual(cases.map(resolveClaudeEffort), [undefined, undefined, undefined, undefined, undefined]); + }); +}); + +suite('isClaudeEffortLevel (Phase 6.1 / Cycle D3)', () => { + + ensureNoDisposablesAreLeakedInTestSuite(); + + test('accepts the canonical 5-value union, rejects anything else', () => { + // Picker-side and read-side must agree on the same union: the picker + // only emits these five strings, and `toAgentModelInfo` filters + // CAPI's `reasoning_effort` array through this guard before passing + // it into `createClaudeThinkingLevelSchema`. A drift between the two + // would surface as a model whose enum advertises a value the + // materialize site can't honor. + const inputs = ['low', 'medium', 'high', 'xhigh', 'max', '', 'LOW', 'turbo', 'minimal', 'High']; + assert.deepStrictEqual(inputs.map(isClaudeEffortLevel), [true, true, true, true, true, false, false, false, false, false]); + }); +}); + +suite('createClaudeThinkingLevelSchema (Phase 6.1 / Cycle D3)', () => { + + ensureNoDisposablesAreLeakedInTestSuite(); + + test('per-model variation: enum + enumLabels + default track the supplied list; empty list returns undefined', () => { + // Single snapshot covering every shape the caller can hand in: the + // full 5-value union, a 3-value subset (most common Claude case), a + // single-value list, an out-of-canonical-order list that omits + // 'high' (no `default` emitted), and the empty list (no schema + // rendered, picker hides the control). Asserting them together + // locks (a) `enum` ordering and `enumLabels` ordering stay 1:1 with + // the input, and (b) `default: 'high'` is emitted iff 'high' is in + // the supported list (mirror of the extension's rule at + // extensions/copilot/.../claudeCodeModels.ts:230). + const fullUnion: readonly ClaudeEffortLevel[] = ['low', 'medium', 'high', 'xhigh', 'max']; + const lowMediumHigh: readonly ClaudeEffortLevel[] = ['low', 'medium', 'high']; + const highOnly: readonly ClaudeEffortLevel[] = ['high']; + const noHigh: readonly ClaudeEffortLevel[] = ['max', 'low']; + const empty: readonly ClaudeEffortLevel[] = []; + + assert.deepStrictEqual({ + fullUnion: createClaudeThinkingLevelSchema(fullUnion), + lowMediumHigh: createClaudeThinkingLevelSchema(lowMediumHigh), + highOnly: createClaudeThinkingLevelSchema(highOnly), + noHigh: createClaudeThinkingLevelSchema(noHigh), + empty: createClaudeThinkingLevelSchema(empty), + }, { + fullUnion: { + type: 'object', + properties: { + thinkingLevel: { + type: 'string', + title: 'Thinking Level', + description: 'Controls how much reasoning effort Claude uses.', + enum: ['low', 'medium', 'high', 'xhigh', 'max'], + enumLabels: ['Low', 'Medium', 'High', 'Extra High', 'Max'], + default: 'high', + }, + }, + }, + lowMediumHigh: { + type: 'object', + properties: { + thinkingLevel: { + type: 'string', + title: 'Thinking Level', + description: 'Controls how much reasoning effort Claude uses.', + enum: ['low', 'medium', 'high'], + enumLabels: ['Low', 'Medium', 'High'], + default: 'high', + }, + }, + }, + highOnly: { + type: 'object', + properties: { + thinkingLevel: { + type: 'string', + title: 'Thinking Level', + description: 'Controls how much reasoning effort Claude uses.', + enum: ['high'], + enumLabels: ['High'], + default: 'high', + }, + }, + }, + noHigh: { + type: 'object', + properties: { + thinkingLevel: { + type: 'string', + title: 'Thinking Level', + description: 'Controls how much reasoning effort Claude uses.', + enum: ['max', 'low'], + enumLabels: ['Max', 'Low'], + }, + }, + }, + empty: undefined, + }); + }); + + test(`emits default: 'high' iff 'high' is in the supported list, never substitutes another value`, () => { + // 'high' is the canonical Claude default (server-side fallback when + // adaptive thinking is enabled). When a model omits 'high' the + // helper must NOT pick another value as a stand-in default — the + // picker should open with no pre-selection so the SDK falls through + // to its own default rather than being told to use a value the user + // didn't pick. + const cases: readonly { input: readonly ClaudeEffortLevel[]; expected: ClaudeEffortLevel | undefined }[] = [ + { input: ['high'], expected: 'high' }, + { input: ['low', 'high'], expected: 'high' }, + { input: ['low', 'medium', 'high', 'xhigh', 'max'], expected: 'high' }, + { input: ['low'], expected: undefined }, + { input: ['low', 'medium'], expected: undefined }, + { input: ['xhigh'], expected: undefined }, + { input: ['xhigh', 'max'], expected: undefined }, + ]; + assert.deepStrictEqual( + cases.map(c => createClaudeThinkingLevelSchema(c.input)?.properties.thinkingLevel.default), + cases.map(c => c.expected), + ); + }); + + test('input array is not mutated and the returned enum is independent of subsequent input mutation', () => { + // The helper is invoked once per model at authenticate-time; the + // caller's array is the post-`filter` view of `reasoning_effort`. + // If the schema's `enum` aliased the input array, a subsequent + // mutation (e.g. another caller reusing a buffer) would silently + // rewrite an already-published `IAgentModelInfo.configSchema`. + const input: ClaudeEffortLevel[] = ['low', 'high']; + const schema = createClaudeThinkingLevelSchema(input); + input.push('max'); + assert.deepStrictEqual({ + input, + enum: schema?.properties.thinkingLevel.enum, + default: schema?.properties.thinkingLevel.default, + }, { + input: ['low', 'high', 'max'], + enum: ['low', 'high'], + default: 'high', + }); + }); +}); + diff --git a/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts b/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts index dc8ea73831155..5b9be9a9ef678 100644 --- a/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts +++ b/src/vs/platform/agentHost/test/node/claudeAgent.integrationTest.ts @@ -277,6 +277,10 @@ class ProxyRoundTripSdkService implements IClaudeAgentSdkService { return []; } + async getSessionInfo(_sessionId: string): Promise { + return undefined; + } + async startup(params: { options: Options; initializeTimeoutMs?: number }): Promise { this.capturedStartupOptions.push(params.options); diff --git a/src/vs/platform/agentHost/test/node/claudeAgent.test.ts b/src/vs/platform/agentHost/test/node/claudeAgent.test.ts index 688dd999e1fd7..402f6a1b3dd82 100644 --- a/src/vs/platform/agentHost/test/node/claudeAgent.test.ts +++ b/src/vs/platform/agentHost/test/node/claudeAgent.test.ts @@ -4,21 +4,23 @@ *--------------------------------------------------------------------------------------------*/ import type Anthropic from '@anthropic-ai/sdk'; -import type { Options, Query, SDKMessage, SDKPartialAssistantMessage, SDKResultSuccess, SDKSessionInfo, SDKSystemMessage, SDKUserMessage, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; +import type { Options, Query, SDKMessage, SDKSessionInfo, SDKUserMessage, WarmQuery } from '@anthropic-ai/claude-agent-sdk'; import type { CCAModel } from '@vscode/copilot-api'; -// Beta event-stream type aliases. The Anthropic namespace re-exports these -// from `@anthropic-ai/sdk/resources/beta/messages.js`, but importing that -// subpath directly trips the `local/code-import-patterns` allowlist -// (the agentHost rule only permits the bare `@anthropic-ai/sdk` specifier). -// Local aliases via the existing `Anthropic` import keep the body of this -// file readable without extending the allowlist. -type BetaRawContentBlockDeltaEvent = Anthropic.Beta.BetaRawContentBlockDeltaEvent; -type BetaRawContentBlockStartEvent = Anthropic.Beta.BetaRawContentBlockStartEvent; -type BetaRawContentBlockStopEvent = Anthropic.Beta.BetaRawContentBlockStopEvent; -type BetaRawMessageStartEvent = Anthropic.Beta.BetaRawMessageStartEvent; -type BetaRawMessageStopEvent = Anthropic.Beta.BetaRawMessageStopEvent; import assert from 'assert'; +import { + makeAssistantMessage, + makeContentBlockStartText, + makeContentBlockStartThinking, + makeContentBlockStop, + makeMessageStart, + makeMessageStop, + makeResultSuccess, + makeStreamEvent, + makeSystemInitMessage, + makeTextDelta, + makeThinkingDelta, +} from './claudeMapSessionEventsTestUtils.js'; import { DeferredPromise } from '../../../../base/common/async.js'; import { Event } from '../../../../base/common/event.js'; import type { DisposableStore } from '../../../../base/common/lifecycle.js'; @@ -36,6 +38,8 @@ import { IAgentMaterializeSessionEvent, AgentSession, AgentSignal, GITHUB_COPILO import { ActionType } from '../../common/state/sessionActions.js'; import { MessageAttachmentKind, ResponsePartKind } from '../../common/state/sessionState.js'; import { ISessionDataService } from '../../common/sessionDataService.js'; +import { AHP_AUTH_REQUIRED, ProtocolError } from '../../common/state/sessionProtocol.js'; +import { ProtectedResourceMetadata } from '../../common/state/protocol/state.js'; import { IAgentHostGitService } from '../../node/agentHostGitService.js'; import { ClaudeAgent } from '../../node/claude/claudeAgent.js'; import { ClaudeAgentSdkService, IClaudeAgentSdkService, IClaudeSdkBindings } from '../../node/claude/claudeAgentSdkService.js'; @@ -149,6 +153,24 @@ class FakeClaudeAgentSdkService implements IClaudeAgentSdkService { return this.sessionList; } + /** + * Fake for {@link IClaudeAgentSdkService.getSessionInfo}. Tests stage + * `sessionList` and the fake searches it by id; setting + * {@link getSessionInfoOverride} replaces the default lookup + * wholesale (used to simulate the "session moved off disk" case). + */ + getSessionInfoOverride: ((sessionId: string) => Promise) | undefined; + + getSessionInfoCalls: string[] = []; + + async getSessionInfo(sessionId: string): Promise { + this.getSessionInfoCalls.push(sessionId); + if (this.getSessionInfoOverride) { + return this.getSessionInfoOverride(sessionId); + } + return this.sessionList.find(s => s.sessionId === sessionId); + } + async startup(params: { options: Options; initializeTimeoutMs?: number }): Promise { this.startupCallCount++; this.capturedStartupOptions.push(params.options); @@ -288,167 +310,6 @@ class FakeQuery implements AsyncGenerator { [Symbol.asyncDispose](): Promise { return Promise.resolve(); } } -// #region SDK message builders -// -// The SDK's `SDKMessage` union has many required fields that aren't -// relevant to most agent-host tests (deep `NonNullableUsage` shape, -// `SDKSystemMessage`'s `tools`/`mcp_servers`/etc.). These builders -// produce fully-typed values without `as unknown` casts so tests can -// stage transcripts ergonomically. - -/** Stable test UUID — reused so assertions can pin against a known value. */ -const TEST_UUID = '11111111-2222-3333-4444-555555555555'; - -function makeNonNullableUsage(): SDKResultSuccess['usage'] { - return { - cache_creation: { ephemeral_1h_input_tokens: 0, ephemeral_5m_input_tokens: 0 }, - cache_creation_input_tokens: 0, - cache_read_input_tokens: 0, - inference_geo: 'unknown', - input_tokens: 0, - iterations: [], - output_tokens: 0, - server_tool_use: { web_fetch_requests: 0, web_search_requests: 0 }, - service_tier: 'standard', - speed: 'standard', - }; -} - -function makeSystemInitMessage(sessionId: string): SDKSystemMessage { - return { - type: 'system', - subtype: 'init', - apiKeySource: 'user', - claude_code_version: '0.0.0-test', - cwd: '/workspace', - tools: [], - mcp_servers: [], - model: 'claude-test', - permissionMode: 'default', - slash_commands: [], - output_style: 'default', - skills: [], - plugins: [], - uuid: TEST_UUID, - session_id: sessionId, - }; -} - -function makeResultSuccess(sessionId: string): SDKResultSuccess { - return { - type: 'result', - subtype: 'success', - duration_ms: 0, - duration_api_ms: 0, - is_error: false, - num_turns: 1, - result: '', - stop_reason: 'end_turn', - total_cost_usd: 0, - usage: makeNonNullableUsage(), - modelUsage: {}, - permission_denials: [], - uuid: TEST_UUID, - session_id: sessionId, - }; -} - -// `stream_event` (SDKPartialAssistantMessage) builders. The SDK's -// `Options.includePartialMessages: true` setting (Phase 6 §3.4) routes -// raw `BetaRawMessageStreamEvent`s through to the agent so we can map -// per-token. The deep `BetaMessage` shape on `message_start` carries -// many required fields irrelevant to mapping; these helpers populate -// only what the mapper reads, with everything else set to safe zero -// values so the SDK type-checks pass without `as unknown` casts. - -function makeStreamEvent( - sessionId: string, - event: SDKPartialAssistantMessage['event'], -): SDKPartialAssistantMessage { - return { - type: 'stream_event', - event, - parent_tool_use_id: null, - uuid: TEST_UUID, - session_id: sessionId, - }; -} - -function makeMessageStart(): BetaRawMessageStartEvent { - return { - type: 'message_start', - message: { - id: 'msg_test', - type: 'message', - role: 'assistant', - model: 'claude-test', - content: [], - stop_reason: null, - stop_sequence: null, - stop_details: null, - container: null, - context_management: null, - usage: { - cache_creation: { ephemeral_1h_input_tokens: 0, ephemeral_5m_input_tokens: 0 }, - cache_creation_input_tokens: 0, - cache_read_input_tokens: 0, - inference_geo: 'unknown', - input_tokens: 0, - iterations: [], - output_tokens: 0, - server_tool_use: { web_fetch_requests: 0, web_search_requests: 0 }, - service_tier: 'standard', - speed: 'standard', - }, - }, - }; -} - -function makeContentBlockStartText(index: number): BetaRawContentBlockStartEvent { - return { - type: 'content_block_start', - index, - content_block: { type: 'text', text: '', citations: null }, - }; -} - -function makeContentBlockStartThinking(index: number): BetaRawContentBlockStartEvent { - return { - type: 'content_block_start', - index, - content_block: { type: 'thinking', thinking: '', signature: '' }, - }; -} - -function makeTextDelta(index: number, text: string): BetaRawContentBlockDeltaEvent { - return { - type: 'content_block_delta', - index, - delta: { type: 'text_delta', text }, - }; -} - -function makeThinkingDelta(index: number, thinking: string): BetaRawContentBlockDeltaEvent { - return { - type: 'content_block_delta', - index, - delta: { type: 'thinking_delta', thinking }, - }; -} - -function makeContentBlockStop(index: number): BetaRawContentBlockStopEvent { - return { - type: 'content_block_stop', - index, - }; -} - -function makeMessageStop(): BetaRawMessageStopEvent { - return { type: 'message_stop' }; -} - -// #endregion - /** * Wraps a delegate {@link ISessionDataService} and records call counts so * tests can assert that lifecycle methods (e.g. non-fork `createSession`) @@ -506,6 +367,16 @@ function makeModel(overrides: Partial & { readonly id: string; readonl }; } +/** + * Build a `CCAModelSupports` with `reasoning_effort` / `adaptive_thinking` + * augmentations the SDK type doesn't yet declare (tracked at + * microsoft/vscode-capi#85). Mirrors the runtime shape `claudeAgent.ts` + * narrows at the read boundary. + */ +function makeSupports(extras: { adaptive_thinking?: boolean; reasoning_effort?: readonly string[] } = {}): CCAModel['capabilities']['supports'] { + return { parallel_tool_calls: true, streaming: true, tool_calls: true, vision: false, ...extras } as CCAModel['capabilities']['supports']; +} + const CLAUDE_OPUS = makeModel({ id: 'claude-opus-4.6', name: 'Claude Opus 4.6', vendor: 'Anthropic' }); const CLAUDE_SONNET = makeModel({ id: 'claude-sonnet-4.6', name: 'Claude Sonnet 4.6', vendor: 'Anthropic' }); const NON_ANTHROPIC = makeModel({ id: 'gpt-5', name: 'GPT-5', vendor: 'OpenAI' }); @@ -542,7 +413,26 @@ interface ITestContext { readonly sessionData: RecordingSessionDataService; } -function createTestContext(disposables: Pick): ITestContext { +/** + * {@link NullLogService} subclass that captures `warn` / `error` messages + * so tests can assert defense-in-depth diagnostics fired from the mapper + * or other internals. All other levels remain no-ops. + */ +class CapturingLogService extends NullLogService { + readonly warns: string[] = []; + readonly errors: string[] = []; + override warn(message: string, ...args: unknown[]): void { + this.warns.push([message, ...args.map(a => String(a))].join(' ')); + } + override error(message: string | Error, ...args: unknown[]): void { + this.errors.push([String(message), ...args.map(a => String(a))].join(' ')); + } +} + +function createTestContext( + disposables: Pick, + overrides?: { logService?: ILogService }, +): ITestContext { const proxy = new FakeClaudeProxyService(); const api = new FakeCopilotApiService(); api.models = async () => [...ALL_MODELS]; @@ -550,7 +440,7 @@ function createTestContext(disposables: Pick): ITestCont const sessionData = new RecordingSessionDataService(createSessionDataService()); const services = new ServiceCollection( - [ILogService, new NullLogService()], + [ILogService, overrides?.logService ?? new NullLogService()], [ICopilotApiService, api], [IClaudeProxyService, proxy], [ISessionDataService, sessionData], @@ -598,6 +488,19 @@ suite('ClaudeAgent', () => { assert.deepStrictEqual(agent.models.get(), []); }); + test('createSession before authenticate throws ProtocolError(AHP_AUTH_REQUIRED) with protected resources', async () => { + const { agent } = createTestContext(disposables); + + await assert.rejects( + () => agent.createSession({ workingDirectory: URI.file('/workspace') }), + (err: Error) => + err instanceof ProtocolError && + err.code === AHP_AUTH_REQUIRED && + Array.isArray(err.data) && + (err.data as ProtectedResourceMetadata[])[0]?.resource === 'https://api.github.com', + ); + }); + test('authenticate populates models filtered to Claude family', async () => { const { agent, proxy } = createTestContext(disposables); @@ -612,12 +515,132 @@ suite('ClaudeAgent', () => { accepted: true, startCalls: ['tok'], models: [ - { provider: 'claude', id: 'claude-opus-4.6', name: 'Claude Opus 4.6', maxContextWindow: 200_000, supportsVision: false }, - { provider: 'claude', id: 'claude-sonnet-4.6', name: 'Claude Sonnet 4.6', maxContextWindow: 200_000, supportsVision: false }, + { provider: 'claude', id: 'claude-opus-4.6', name: 'Claude Opus 4.6', maxContextWindow: 200_000, supportsVision: false, policyState: 'enabled', _meta: { multiplierNumeric: 1 } }, + { provider: 'claude', id: 'claude-sonnet-4.6', name: 'Claude Sonnet 4.6', maxContextWindow: 200_000, supportsVision: false, policyState: 'enabled', _meta: { multiplierNumeric: 1 } }, ], }); }); + test('authenticate surfaces the CAPI chat-default model first; ties preserve insertion order', async () => { + // `IAgentModelInfo` carries no explicit `isDefault` bit; the + // picker uses `models[0]` as the de facto default at + // modelPicker.ts:144. So a stable sort by `is_chat_default` + // ensures whichever model CAPI flags as the chat default ends + // up at position 0, regardless of the order CAPI returned the + // list. Equal-priority entries fall through the comparator + // unchanged so insertion order wins on ties. + const opus = makeModel({ id: 'claude-opus-4.6', name: 'Claude Opus 4.6', vendor: 'Anthropic' }); + const sonnetDefault = makeModel({ id: 'claude-sonnet-4.6', name: 'Claude Sonnet 4.6', vendor: 'Anthropic', is_chat_default: true }); + const haiku = makeModel({ id: 'claude-haiku-4.6', name: 'Claude Haiku 4.6', vendor: 'Anthropic' }); + + const { agent, api } = createTestContext(disposables); + api.models = async () => [opus, sonnetDefault, haiku]; + await agent.authenticate('https://api.github.com', 'tok'); + await tick(); + + assert.deepStrictEqual( + agent.models.get().map(m => m.id), + ['claude-sonnet-4.6', 'claude-opus-4.6', 'claude-haiku-4.6'], + ); + }); + + test('authenticate sources configSchema enum from each model\'s reasoning_effort list (Phase 6.1 / Cycle D3 / I5)', async () => { + // Per Phase 6.1 plan D3 + CONTEXT.md M12 (line ~1802): the + // `configSchema.properties.thinkingLevel.enum` advertised on each + // Claude model must come from that model's own + // `capabilities.supports.reasoning_effort` list — different + // Claude models support different effort subsets (some + // `['low','medium','high']`, some `['high']`, some none at all). + // Mirror of the extension pattern at + // extensions/copilot/src/extension/chatSessions/claude/node/ + // claudeCodeModels.ts:208-212 (`pickReasoningEffort`), which + // reads `endpoint.supportsReasoningEffort` per-endpoint. + // + // CAPI's `/models` JSON exposes `reasoning_effort: string[]` and + // `adaptive_thinking: boolean` on each model's `supports` bag, + // but the published `@vscode/copilot-api` types don't yet + // surface these fields (tracked at microsoft/vscode-capi#85); + // `claudeAgent.ts` narrows the bag locally at the read boundary. + const capsBase = { + family: 'test', + limits: { max_context_window_tokens: 200_000, max_output_tokens: 8192, max_prompt_tokens: 200_000 }, + object: 'model_capabilities', + tokenizer: 'o200k_base', + type: 'chat', + } as const; + const fullEffortModel = makeModel({ + id: 'claude-opus-4.6', name: 'Claude Opus 4.6', vendor: 'Anthropic', + capabilities: { ...capsBase, supports: makeSupports({ adaptive_thinking: true, reasoning_effort: ['low', 'medium', 'high'] }) }, + }); + const highOnlyModel = makeModel({ + id: 'claude-sonnet-4.6', name: 'Claude Sonnet 4.6', vendor: 'Anthropic', + capabilities: { ...capsBase, supports: makeSupports({ adaptive_thinking: true, reasoning_effort: ['high'] }) }, + }); + const emptyEffortModel = makeModel({ + id: 'claude-haiku-4.6', name: 'Claude Haiku 4.6', vendor: 'Anthropic', + capabilities: { ...capsBase, supports: makeSupports({ adaptive_thinking: false, reasoning_effort: [] }) }, + }); + const unknownEffortModel = makeModel({ + id: 'claude-opus-4.5', name: 'Claude Opus 4.5', vendor: 'Anthropic', + capabilities: { ...capsBase, supports: makeSupports({ adaptive_thinking: true, reasoning_effort: ['low', 'bogus', 'high'] }) }, + }); + const noEffortFieldModel = makeModel({ + id: 'claude-sonnet-4.5', name: 'Claude Sonnet 4.5', vendor: 'Anthropic', + }); + + const { agent, api } = createTestContext(disposables); + api.models = async () => [fullEffortModel, highOnlyModel, emptyEffortModel, unknownEffortModel, noEffortFieldModel]; + await agent.authenticate('https://api.github.com', 'tok'); + await tick(); + + const schemasById = Object.fromEntries( + agent.models.get().map(m => [m.id, m.configSchema] as const), + ); + assert.deepStrictEqual(schemasById, { + 'claude-opus-4.6': { + type: 'object', + properties: { + thinkingLevel: { + type: 'string', + title: 'Thinking Level', + description: 'Controls how much reasoning effort Claude uses.', + enum: ['low', 'medium', 'high'], + enumLabels: ['Low', 'Medium', 'High'], + default: 'high', + }, + }, + }, + 'claude-sonnet-4.6': { + type: 'object', + properties: { + thinkingLevel: { + type: 'string', + title: 'Thinking Level', + description: 'Controls how much reasoning effort Claude uses.', + enum: ['high'], + enumLabels: ['High'], + default: 'high', + }, + }, + }, + 'claude-haiku-4.6': undefined, + 'claude-opus-4.5': { + type: 'object', + properties: { + thinkingLevel: { + type: 'string', + title: 'Thinking Level', + description: 'Controls how much reasoning effort Claude uses.', + enum: ['low', 'high'], + enumLabels: ['Low', 'High'], + default: 'high', + }, + }, + }, + 'claude-sonnet-4.5': undefined, + }); + }); + test('authenticate rejects non-GitHub resources without disturbing state', async () => { const { agent, proxy } = createTestContext(disposables); @@ -904,6 +927,7 @@ suite('ClaudeAgent', () => { // session that's a cheap in-memory drop because nothing has // been persisted yet. const { agent, sdk, sessionData } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); const result = await agent.createSession({ workingDirectory: URI.parse('file:///workspace') }); @@ -939,6 +963,7 @@ suite('ClaudeAgent', () => { // the hint. Mirrors CopilotAgent's `config.session ? // AgentSession.id(config.session) : generateUuid()` contract. const { agent } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); const expected = AgentSession.uri('claude', 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'); const result = await agent.createSession({ session: expected }); @@ -960,6 +985,7 @@ suite('ClaudeAgent', () => { // Locking the throw message here so a half-implementation can't // land in Phase 6 without re-greening this case. const { agent, sessionData, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); await assert.rejects( agent.createSession({ @@ -1072,6 +1098,71 @@ suite('ClaudeAgent', () => { }); }); + test('createSession config.model + config.config.permissionMode flow into Options on first send (M11 / Phase 6.1 C2)', async () => { + // Phase 6.1 Cycle E (drift C2). M11 mandates that the + // `IAgentCreateSessionConfig` bag (`model` + `config.*`) survives + // from `createSession` → provisional record → first `query()`'s + // `Options.*`. The pre-fix surface dropped both: `provisional` + // had no `model`/`config` fields and the materialize site + // hardcoded `permissionMode: 'default'` with no `Options.model` + // at all — SDK defaults silently won. + // Pinned shape: `Options.model === created-time model.id`, + // `Options.permissionMode === created-time permissionMode`. + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + + const created = await agent.createSession({ + workingDirectory: URI.file('/work'), + model: { id: 'claude-sonnet-4.6' }, + config: { permissionMode: 'plan' }, + }); + const sessionId = AgentSession.id(created.session); + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + + await agent.sendMessage(created.session, 'hi', undefined, 'turn-1'); + + assert.deepStrictEqual({ + model: sdk.capturedStartupOptions[0]?.model, + permissionMode: sdk.capturedStartupOptions[0]?.permissionMode, + }, { + model: 'claude-sonnet-4.6', + permissionMode: 'plan', + }); + }); + + test('createSession model.config.thinkingLevel flows into Options.effort on first send (M11 / Phase 6.1 C2)', async () => { + // Phase 6.1 Cycle E. Per CONTEXT.md M11 + the M-portrait at + // CONTEXT.md:1497, `effort` is the third leg of the + // `IAgentCreateSessionConfig` → `Options.*` triplet (alongside + // model and permissionMode). Unlike the other two, effort is + // nested inside `ModelSelection.config.thinkingLevel` rather + // than living as its own session-config key — mirroring + // CopilotAgent's `_getReasoningEffort` pattern at + // copilotAgent.ts:487. The SDK's `Options.effort` accepts the + // full 5-value `EffortLevel` union (sdk.d.ts:443 + sdk.d.ts:1214); + // the 4-value clamp at sdk.d.ts:4292 only applies to the live + // `applyFlagSettings` hot-swap path (Phase 9). + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + + const created = await agent.createSession({ + workingDirectory: URI.file('/work'), + model: { id: 'claude-opus-4.6', config: { thinkingLevel: 'high' } }, + }); + const sessionId = AgentSession.id(created.session); + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + + await agent.sendMessage(created.session, 'hi', undefined, 'turn-1'); + + assert.deepStrictEqual({ + model: sdk.capturedStartupOptions[0]?.model, + effort: sdk.capturedStartupOptions[0]?.effort, + }, { + model: 'claude-opus-4.6', + effort: 'high', + }); + }); + test('two sendMessage calls reuse the materialized Query', async () => { // Phase 6 §5.1 Test 5. After the first send materializes the // session, subsequent sends MUST push onto the same prompt @@ -1419,6 +1510,97 @@ suite('ClaudeAgent', () => { }); }); + test('canonical SDKAssistantMessage with tool_use content fires defense-in-depth warning (Phase 6.1 / Cycle F)', async () => { + // Phase 6 sets `canUseTool: deny`, so the canonical + // `SDKAssistantMessage` (`type: 'assistant'`) should never carry + // `tool_use` content blocks. If one arrives anyway (SDK race, + // future change) the mapper warns and drops rather than handing + // the reducer a part it has no handler for. Mirrors the existing + // `content_block_start` defense-in-depth at + // claudeMapSessionEvents.ts:163-167. + const logService = new CapturingLogService(); + const { agent, sdk } = createTestContext(disposables, { logService }); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + sdk.nextQueryMessages = [ + makeSystemInitMessage(sessionId), + makeAssistantMessage(sessionId, [ + { type: 'tool_use', id: 'tu_1', name: 'Bash', input: {} }, + ]), + makeResultSuccess(sessionId), + ]; + + const signals: AgentSignal[] = []; + disposables.add(agent.onDidSessionProgress(s => signals.push(s))); + + await agent.sendMessage(created.session, 'hi', undefined, 'turn-1'); + + const responsePartCount = signals + .map(s => s.kind === 'action' ? s.action : undefined) + .filter(a => a?.type === ActionType.SessionResponsePart).length; + + assert.deepStrictEqual({ + responsePartCount, + warnedAboutToolUse: logService.warns.some(m => /tool_use/.test(m)), + }, { + responsePartCount: 0, + warnedAboutToolUse: true, + }); + }); + + test('canonical SDKAssistantMessage with text content does not double-emit signals already produced by stream_event partials (Phase 6.1 / Cycle F)', async () => { + // CONTEXT.md M8:875 — partials are advisory, final + // `SDKAssistantMessage` is canonical. With `includePartialMessages: + // true` (Phase 6 §3.4) the `stream_event` partials already drove + // the response part + per-token deltas. The terminal `'assistant'` + // envelope MUST NOT add a second copy: the reducer is append-only + // (no replace path), so a double-emit would corrupt the activeTurn + // `responseParts` list with a duplicated block. + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + sdk.nextQueryMessages = [ + makeSystemInitMessage(sessionId), + makeStreamEvent(sessionId, makeMessageStart()), + makeStreamEvent(sessionId, makeContentBlockStartText(0)), + makeStreamEvent(sessionId, makeTextDelta(0, 'hello')), + makeStreamEvent(sessionId, makeContentBlockStop(0)), + makeStreamEvent(sessionId, makeMessageStop()), + makeAssistantMessage(sessionId, [ + { type: 'text', text: 'hello', citations: null }, + ]), + makeResultSuccess(sessionId), + ]; + + const signals: AgentSignal[] = []; + disposables.add(agent.onDidSessionProgress(s => signals.push(s))); + + await agent.sendMessage(created.session, 'hi', undefined, 'turn-1'); + + const partActions = signals + .map(s => s.kind === 'action' ? s.action : undefined) + .filter(a => a?.type === ActionType.SessionResponsePart); + const deltaActions = signals + .map(s => s.kind === 'action' ? s.action : undefined) + .filter(a => a?.type === ActionType.SessionDelta); + + const delta0 = deltaActions[0]?.type === ActionType.SessionDelta ? deltaActions[0] : undefined; + + assert.deepStrictEqual({ + partCount: partActions.length, + deltaCount: deltaActions.length, + deltaContent: delta0?.content, + }, { + partCount: 1, + deltaCount: 1, + deltaContent: 'hello', + }); + }); + test('_isResumed flips on first system:init', async () => { // Phase 6 §5.1 Test 10. The SDK's `system:init` message marks // the start of a session. Phase 7+ teardown+recreate uses @@ -1765,6 +1947,35 @@ suite('ClaudeAgent', () => { }); }); + test('sendMessage tags SDKUserMessage.uuid with the effective turn id (M1 / Turn.id ↔ uuid invariant)', async () => { + // Phase 6.1 Cycle C / drift C1. M1 + the Glossary mandate that + // the outbound `SDKUserMessage.uuid` carries the agent host's + // `effectiveTurnId` (`turnId ?? generateUuid()`). Phase 6.5 fork + // (`sdk.getSessionMessages` → message-UUID lookup) and Phase 13 + // replay (`SDKUserMessageReplay.uuid`) both depend on this id + // being our turn id, NOT a fresh SDK-generated uuid. + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + const created = await agent.createSession({ workingDirectory: URI.file('/work') }); + const sessionId = AgentSession.id(created.session); + + sdk.nextQueryMessages = [ + makeSystemInitMessage(sessionId), + makeResultSuccess(sessionId), + ]; + + await agent.sendMessage(created.session, 'hi', undefined, 'turn-explicit'); + + const drained = sdk.warmQueries[0]?.produced?.drainedPrompts ?? []; + assert.deepStrictEqual({ + drainedCount: drained.length, + uuid: drained[0]?.uuid, + }, { + drainedCount: 1, + uuid: 'turn-explicit', + }); + }); + test('attachments (File and Directory) become a system-reminder block on the user message', async () => { // Phase 6 §5.1 Test 15. The prompt resolver must produce two // content blocks for an attachment-bearing send: a `text` @@ -1857,6 +2068,7 @@ suite('ClaudeAgent', () => { // throwing, both share the `_disposeSequencer` for the same // key, and the agent does not surface a double-dispose error. const { agent } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); const r1 = await agent.createSession({}); await agent.createSession({}); @@ -1884,6 +2096,7 @@ suite('ClaudeAgent', () => { // cleanup work (Query.interrupt) — that work MUST NOT spill // into SDK-side or DB-side deletion. const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); const created = await agent.createSession({}); // Make the SDK report the just-created session as if its // metadata had been written by an earlier `query()` turn — @@ -2045,6 +2258,48 @@ suite('ClaudeAgent', () => { }); }); + test('createSession.model round-trips through the per-session DB to listSessions[].model (Phase 6.1 I8 + I7 + C2)', async () => { + // Phase 6.1 Cycle E (drift I8). Closes the missing-metadata leak: + // `IAgentCreateSessionConfig.model` is supposed to be persisted + // per-session and surface back via `listSessions(): IAgentSessionMetadata.model`. + // Pre-fix, only `customizationDirectory` was overlayed; `model` + // was silently dropped. The CopilotAgent reference path + // (`copilotAgent.ts:1483-1564`, `_META_MODEL`/`_serializeModelSelection`/ + // `_storeSessionMetadata`/`_readSessionMetadata`) shows the + // canonical shape: a JSON-serialised `ModelSelection` keyed by + // a provider-private metadata constant. + // Round-trip: createSession({ model }) → sendMessage materializes + // (writes sidecar) → SDK reports the session in its listing → + // listSessions surfaces the persisted `model`. + const { agent, sdk } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); + + const created = await agent.createSession({ + workingDirectory: URI.file('/work'), + model: { id: 'claude-opus-4.6', config: { thinking: 'extended' } }, + }); + const sessionId = AgentSession.id(created.session); + + sdk.nextQueryMessages = [makeSystemInitMessage(sessionId), makeResultSuccess(sessionId)]; + await agent.sendMessage(created.session, 'hi', undefined, 'turn-1'); + + sdk.sessionList = [{ + sessionId, + summary: 'Round trip', + lastModified: 1234, + }]; + const list = await agent.listSessions(); + const entry = list.find(r => AgentSession.id(r.session) === sessionId); + + assert.deepStrictEqual({ + model: entry?.model, + summary: entry?.summary, + }, { + model: { id: 'claude-opus-4.6', config: { thinking: 'extended' } }, + summary: 'Round trip', + }); + }); + test('listSessions returns an empty list (does not reject) when the SDK fails to load', async () => { // Copilot-reviewer comment: `AgentService.listSessions` fans out // across providers via `Promise.all` (agentService.ts:202-204). @@ -2070,6 +2325,100 @@ suite('ClaudeAgent', () => { assert.deepStrictEqual(result, []); }); + test('getSessionMetadata joins SDK info with sidecar overlay, returns SDK-only fields for external sessions, and undefined for unknown ids (Phase 6.1 / Cycle D4 / I7)', async () => { + // Phase 6.1 plan / Cycle D4 + drift I7. CONTEXT.md M11 / agents.md + // section "Lazy session metadata" (~line 2125) require Claude to + // expose a per-session lookup that mirrors the + // `IAgent.getSessionMetadata` shape so AgentService can hydrate + // stale session URIs without enumerating the full provider + // catalog. The Claude shape MUST surface external CLI sessions + // (no sidecar) — otherwise `claude:/` URIs from raw Anthropic + // CLI runs become un-hydrate-able once enumerated. Composes: + // sdkService.getSessionInfo(id) -> summary, cwd, timestamps + // _readSessionMetadata(uri) -> model, customizationDirectory + // SDK miss => undefined (caller treats as deleted/not-yet-created). + const dbSidecar = new TestSessionDatabase(); + await dbSidecar.setMetadata('claude.customizationDirectory', URI.file('/cust').toString()); + await dbSidecar.setMetadata('claude.model', JSON.stringify({ id: 'claude-opus-4.6', config: { thinkingLevel: 'high' } })); + + const sessionData: ISessionDataService = { + ...createNullSessionDataService(), + tryOpenDatabase: async session => { + if (AgentSession.id(session) === 'sidecar') { + return { object: dbSidecar, dispose: () => { /* no-op */ } }; + } + return undefined; + }, + }; + const sdk = new FakeClaudeAgentSdkService(); + sdk.sessionList = [ + { sessionId: 'sidecar', summary: 'With Sidecar', lastModified: 5000, createdAt: 4900, cwd: '/work' }, + { sessionId: 'external', summary: 'External', lastModified: 6000, createdAt: 5900, cwd: '/raw-cli' }, + ]; + + const services = new ServiceCollection( + [ILogService, new NullLogService()], + [ICopilotApiService, new FakeCopilotApiService()], + [IClaudeProxyService, new FakeClaudeProxyService()], + [ISessionDataService, sessionData], + [IClaudeAgentSdkService, sdk], + ); + const instantiationService = disposables.add(new InstantiationService(services)); + const agent = disposables.add(instantiationService.createInstance(ClaudeAgent)); + + const sidecarUri = AgentSession.uri('claude', 'sidecar'); + const externalUri = AgentSession.uri('claude', 'external'); + const unknownUri = AgentSession.uri('claude', 'unknown'); + + const sidecar = await agent.getSessionMetadata!(sidecarUri); + const external = await agent.getSessionMetadata!(externalUri); + const unknown = await agent.getSessionMetadata!(unknownUri); + + assert.deepStrictEqual({ + sidecar: { + session: sidecar?.session.toString(), + summary: sidecar?.summary, + startTime: sidecar?.startTime, + modifiedTime: sidecar?.modifiedTime, + workingDirectory: sidecar?.workingDirectory?.toString(), + customizationDirectory: sidecar?.customizationDirectory?.toString(), + model: sidecar?.model, + }, + external: { + session: external?.session.toString(), + summary: external?.summary, + startTime: external?.startTime, + modifiedTime: external?.modifiedTime, + workingDirectory: external?.workingDirectory?.toString(), + customizationDirectory: external?.customizationDirectory, + model: external?.model, + }, + unknown, + sdkLookups: sdk.getSessionInfoCalls.slice().sort(), + }, { + sidecar: { + session: sidecarUri.toString(), + summary: 'With Sidecar', + startTime: 4900, + modifiedTime: 5000, + workingDirectory: URI.file('/work').toString(), + customizationDirectory: URI.file('/cust').toString(), + model: { id: 'claude-opus-4.6', config: { thinkingLevel: 'high' } }, + }, + external: { + session: externalUri.toString(), + summary: 'External', + startTime: 5900, + modifiedTime: 6000, + workingDirectory: URI.file('/raw-cli').toString(), + customizationDirectory: undefined, + model: undefined, + }, + unknown: undefined, + sdkLookups: ['external', 'sidecar', 'unknown'], + }); + }); + test('shutdown is idempotent and returns the same memoized promise on concurrent calls', async () => { // Phase 6+ INVARIANT: the SDK Query subprocess for each live // session is aborted inside `shutdown()`. If two callers race @@ -2080,6 +2429,7 @@ suite('ClaudeAgent', () => { // is locked NOW so Phase 6 inherits the contract for free. // Mirror of `CopilotAgent.shutdown()` at copilotAgent.ts:1246. const { agent } = createTestContext(disposables); + await agent.authenticate(GITHUB_COPILOT_PROTECTED_RESOURCE.resource, 'tok'); await agent.createSession({}); await agent.createSession({}); @@ -2145,6 +2495,7 @@ suite('ClaudeAgent', () => { // Recover. importBehavior = { listSessions: async () => [{ sessionId: 's', summary: 's', lastModified: 1 }], + getSessionInfo: async () => undefined, startup: async () => { throw new Error('TestableClaudeAgentSdkService: startup not modeled'); }, }; const result1 = await svc.listSessions(); @@ -2181,9 +2532,11 @@ suite('ClaudeAgent', () => { // `PermissionMode` enum. `Permissions` (allow/deny tool lists) // is reused unchanged from `platformSessionSchema` because the // SDK accepts `allowedTools` / `disallowedTools` natively. - // Tested keys: presence + ordering of enum + the four-value - // canonical set + default. Skipped keys (AutoApprove, Mode, - // Isolation, Branch, BranchNameHint) MUST be absent — workbench + // Tested keys: presence + ordering of enum + the six-value + // canonical set (matching SDK `PermissionMode` typedef at + // `sdk.d.ts:1560`, ratified in Phase 6.1 Cycle A under I2) + + // default. Skipped keys (AutoApprove, Mode, Isolation, Branch, + // BranchNameHint) MUST be absent — workbench // `AgentHostModePicker` and friends key off these property names // to decide what to render, and accidentally re-introducing // `mode` would drop the wrong picker into the Claude UI. @@ -2208,7 +2561,7 @@ suite('ClaudeAgent', () => { topLevelType: 'object', propertyKeys: ['permissionMode', 'permissions'], permissionModeType: 'string', - permissionModeEnum: ['default', 'acceptEdits', 'bypassPermissions', 'plan'], + permissionModeEnum: ['default', 'acceptEdits', 'bypassPermissions', 'plan', 'dontAsk', 'auto'], permissionModeDefault: 'default', permissionsType: 'object', values: { permissionMode: 'default' }, diff --git a/src/vs/platform/agentHost/test/node/claudeMapSessionEvents.test.ts b/src/vs/platform/agentHost/test/node/claudeMapSessionEvents.test.ts new file mode 100644 index 0000000000000..0045afe3d05dc --- /dev/null +++ b/src/vs/platform/agentHost/test/node/claudeMapSessionEvents.test.ts @@ -0,0 +1,292 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import assert from 'assert'; +import { URI } from '../../../../base/common/uri.js'; +import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../base/test/common/utils.js'; +import { NullLogService } from '../../../log/common/log.js'; +import type { AgentSignal } from '../../common/agentService.js'; +import { ActionType } from '../../common/state/sessionActions.js'; +import { ResponsePartKind } from '../../common/state/sessionState.js'; +import { mapSDKMessageToAgentSignals } from '../../node/claude/claudeMapSessionEvents.js'; +import { + makeAssistantMessage, + makeContentBlockStartText, + makeContentBlockStartThinking, + makeContentBlockStartToolUse, + makeContentBlockStop, + makeMessageStart, + makeMessageStop, + makeResultSuccess, + makeStreamEvent, + makeTextDelta, + makeThinkingDelta, +} from './claudeMapSessionEventsTestUtils.js'; + +/** + * Direct unit tests for {@link mapSDKMessageToAgentSignals}. + * + * Calls the mapper as a pure function — no state, no harness, no + * service collection. The integrated `claudeAgent.test.ts` suite still + * exercises the mapper end-to-end alongside the SDK envelope plumbing. + */ +suite('claudeMapSessionEvents — direct mapper tests', () => { + + ensureNoDisposablesAreLeakedInTestSuite(); + + const SESSION = URI.parse('agent-session://test/abc'); + const SESSION_STR = SESSION.toString(); + const SESSION_ID = 'sid-1'; + const TURN_ID = 'turn-1'; + + /** + * Captures `warn` calls so defense-in-depth tests can assert the + * mapper logged the dropped tool_use diagnostic. + */ + class CapturingLogService extends NullLogService { + readonly warns: string[] = []; + override warn(message: string, ...args: unknown[]): void { + this.warns.push([message, ...args.map(a => String(a))].join(' ')); + } + } + + test('message_start emits no signals', () => { + const signals = mapSDKMessageToAgentSignals( + makeStreamEvent(SESSION_ID, makeMessageStart()), + SESSION, + TURN_ID, + new NullLogService(), + ); + + assert.deepStrictEqual(signals, []); + }); + + test('text content block: start emits SessionResponsePart, deltas emit SessionDelta', () => { + const out: AgentSignal[] = []; + const log = new NullLogService(); + const push = (msgs: AgentSignal[]) => out.push(...msgs); + + push(mapSDKMessageToAgentSignals(makeStreamEvent(SESSION_ID, makeMessageStart()), SESSION, TURN_ID, log)); + push(mapSDKMessageToAgentSignals(makeStreamEvent(SESSION_ID, makeContentBlockStartText(0)), SESSION, TURN_ID, log)); + push(mapSDKMessageToAgentSignals(makeStreamEvent(SESSION_ID, makeTextDelta(0, 'Hello, ')), SESSION, TURN_ID, log)); + push(mapSDKMessageToAgentSignals(makeStreamEvent(SESSION_ID, makeTextDelta(0, 'world!')), SESSION, TURN_ID, log)); + push(mapSDKMessageToAgentSignals(makeStreamEvent(SESSION_ID, makeContentBlockStop(0)), SESSION, TURN_ID, log)); + + assert.strictEqual(out.length, 3); + const start = out[0]; + assert.ok(start.kind === 'action' && start.action.type === ActionType.SessionResponsePart); + assert.strictEqual(start.action.session, SESSION_STR); + assert.strictEqual(start.action.turnId, TURN_ID); + assert.strictEqual(start.action.part.kind, ResponsePartKind.Markdown); + const partId = start.action.part.id; + assert.ok(partId.length > 0); + + assert.deepStrictEqual(out.slice(1), [ + { + kind: 'action', + session: SESSION, + action: { + type: ActionType.SessionDelta, + session: SESSION_STR, + turnId: TURN_ID, + partId, + content: 'Hello, ', + }, + }, + { + kind: 'action', + session: SESSION, + action: { + type: ActionType.SessionDelta, + session: SESSION_STR, + turnId: TURN_ID, + partId, + content: 'world!', + }, + }, + ]); + }); + + test('thinking content block: start emits Reasoning part, deltas emit SessionReasoning', () => { + const log = new NullLogService(); + + const startSignals = mapSDKMessageToAgentSignals( + makeStreamEvent(SESSION_ID, makeContentBlockStartThinking(0)), + SESSION, + TURN_ID, + log, + ); + assert.strictEqual(startSignals.length, 1); + const start = startSignals[0]; + assert.ok(start.kind === 'action' && start.action.type === ActionType.SessionResponsePart); + assert.strictEqual(start.action.part.kind, ResponsePartKind.Reasoning); + const partId = start.action.part.id; + + const deltaSignals = mapSDKMessageToAgentSignals( + makeStreamEvent(SESSION_ID, makeThinkingDelta(0, 'pondering')), + SESSION, + TURN_ID, + log, + ); + assert.deepStrictEqual(deltaSignals, [{ + kind: 'action', + session: SESSION, + action: { + type: ActionType.SessionReasoning, + session: SESSION_STR, + turnId: TURN_ID, + partId, + content: 'pondering', + }, + }]); + }); + + test('streamed tool_use content block is dropped with a warn log (defense-in-depth)', () => { + const log = new CapturingLogService(); + + const signals = mapSDKMessageToAgentSignals( + makeStreamEvent(SESSION_ID, makeContentBlockStartToolUse(2, 'tu_1', 'Bash')), + SESSION, + TURN_ID, + log, + ); + + assert.deepStrictEqual(signals, []); + assert.strictEqual(log.warns.length, 1); + assert.ok(log.warns[0].includes('dropped streamed tool_use block')); + assert.ok(log.warns[0].includes('index 2')); + }); + + test('canonical assistant envelope drops tool_use blocks with a warn log and emits nothing', () => { + const log = new CapturingLogService(); + + const signals = mapSDKMessageToAgentSignals( + makeAssistantMessage(SESSION_ID, [ + { type: 'text', text: 'final', citations: null }, + { type: 'tool_use', id: 'tu_a', name: 'Bash', input: {} }, + ]), + SESSION, + TURN_ID, + log, + ); + + assert.deepStrictEqual(signals, []); + assert.strictEqual(log.warns.length, 1); + assert.ok(log.warns[0].includes('dropped tool_use block on canonical SDKAssistantMessage')); + assert.ok(log.warns[0].includes('id=tu_a')); + assert.ok(log.warns[0].includes('name=Bash')); + }); + + test('canonical assistant envelope without tool_use emits nothing and does not warn', () => { + const log = new CapturingLogService(); + + const signals = mapSDKMessageToAgentSignals( + makeAssistantMessage(SESSION_ID, [{ type: 'text', text: 'final answer', citations: null }]), + SESSION, + TURN_ID, + log, + ); + + assert.deepStrictEqual(signals, []); + assert.deepStrictEqual(log.warns, []); + }); + + test('result success emits SessionUsage (with model) followed by SessionTurnComplete', () => { + const result = makeResultSuccess(SESSION_ID); + result.usage.input_tokens = 12; + result.usage.output_tokens = 34; + result.usage.cache_read_input_tokens = 5; + result.modelUsage = { + 'claude-test': { + inputTokens: 12, + outputTokens: 34, + cacheReadInputTokens: 5, + cacheCreationInputTokens: 0, + webSearchRequests: 0, + costUSD: 0, + contextWindow: 200_000, + maxOutputTokens: 8192, + }, + }; + + const signals = mapSDKMessageToAgentSignals(result, SESSION, TURN_ID, new NullLogService()); + + assert.deepStrictEqual(signals, [ + { + kind: 'action', + session: SESSION, + action: { + type: ActionType.SessionUsage, + session: SESSION_STR, + turnId: TURN_ID, + usage: { + inputTokens: 12, + outputTokens: 34, + cacheReadTokens: 5, + model: 'claude-test', + }, + }, + }, + { + kind: 'action', + session: SESSION, + action: { + type: ActionType.SessionTurnComplete, + session: SESSION_STR, + turnId: TURN_ID, + }, + }, + ]); + }); + + test('result success without modelUsage omits the model field on SessionUsage', () => { + const result = makeResultSuccess(SESSION_ID); + result.modelUsage = {}; + + const signals = mapSDKMessageToAgentSignals(result, SESSION, TURN_ID, new NullLogService()); + + assert.strictEqual(signals.length, 2); + const usage = signals[0]; + assert.ok(usage.kind === 'action' && usage.action.type === ActionType.SessionUsage); + assert.strictEqual(usage.action.usage.model, undefined); + }); + + test('message_stop and unknown stream events emit nothing', () => { + const log = new NullLogService(); + + const stop = mapSDKMessageToAgentSignals( + makeStreamEvent(SESSION_ID, makeMessageStop()), + SESSION, + TURN_ID, + log, + ); + assert.deepStrictEqual(stop, []); + }); + + test('multi-block ordering: text @0 then thinking @1 keep distinct part ids and route deltas correctly', () => { + const log = new NullLogService(); + + const text0 = mapSDKMessageToAgentSignals(makeStreamEvent(SESSION_ID, makeContentBlockStartText(0)), SESSION, TURN_ID, log); + const think1 = mapSDKMessageToAgentSignals(makeStreamEvent(SESSION_ID, makeContentBlockStartThinking(1)), SESSION, TURN_ID, log); + + const text0Start = text0[0]; + const think1Start = think1[0]; + assert.ok(text0Start.kind === 'action' && text0Start.action.type === ActionType.SessionResponsePart); + assert.ok(think1Start.kind === 'action' && think1Start.action.type === ActionType.SessionResponsePart); + assert.strictEqual(text0Start.action.part.kind, ResponsePartKind.Markdown); + assert.strictEqual(think1Start.action.part.kind, ResponsePartKind.Reasoning); + const textPartId = text0Start.action.part.id; + const thinkPartId = think1Start.action.part.id; + assert.notStrictEqual(textPartId, thinkPartId); + + const dText = mapSDKMessageToAgentSignals(makeStreamEvent(SESSION_ID, makeTextDelta(0, 'A')), SESSION, TURN_ID, log); + const dThink = mapSDKMessageToAgentSignals(makeStreamEvent(SESSION_ID, makeThinkingDelta(1, 'B')), SESSION, TURN_ID, log); + + assert.ok(dText[0].kind === 'action' && dText[0].action.type === ActionType.SessionDelta); + assert.strictEqual(dText[0].action.partId, textPartId); + assert.ok(dThink[0].kind === 'action' && dThink[0].action.type === ActionType.SessionReasoning); + assert.strictEqual(dThink[0].action.partId, thinkPartId); + }); +}); diff --git a/src/vs/platform/agentHost/test/node/claudeMapSessionEventsTestUtils.ts b/src/vs/platform/agentHost/test/node/claudeMapSessionEventsTestUtils.ts new file mode 100644 index 0000000000000..9e2e240aa6a17 --- /dev/null +++ b/src/vs/platform/agentHost/test/node/claudeMapSessionEventsTestUtils.ts @@ -0,0 +1,238 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import type Anthropic from '@anthropic-ai/sdk'; +import type { SDKAssistantMessage, SDKPartialAssistantMessage, SDKResultSuccess, SDKSystemMessage } from '@anthropic-ai/claude-agent-sdk'; + +// Beta event-stream type aliases. The `Anthropic` namespace re-exports +// these from `@anthropic-ai/sdk/resources/beta/messages.js`, but +// importing that subpath directly trips the `local/code-import-patterns` +// allowlist (the agentHost rule only permits the bare +// `@anthropic-ai/sdk` specifier). Local aliases via the `Anthropic` +// import keep the body of this file readable without extending the +// allowlist. +export type BetaRawContentBlockDeltaEvent = Anthropic.Beta.BetaRawContentBlockDeltaEvent; +export type BetaRawContentBlockStartEvent = Anthropic.Beta.BetaRawContentBlockStartEvent; +export type BetaRawContentBlockStopEvent = Anthropic.Beta.BetaRawContentBlockStopEvent; +export type BetaRawMessageStartEvent = Anthropic.Beta.BetaRawMessageStartEvent; +export type BetaRawMessageStopEvent = Anthropic.Beta.BetaRawMessageStopEvent; +export type BetaContentBlock = Anthropic.Beta.BetaContentBlock; + +/** + * Static fixture uuid used by every helper that needs to emit a + * `${string}-${string}-${string}-${string}-${string}`-shaped value. + * Tests that exercise uuid-sensitive logic (e.g. the Cycle C send-seam + * test) construct their own uuids instead of relying on this constant. + */ +export const TEST_UUID = '11111111-2222-3333-4444-555555555555'; + +/** + * Builds the non-nullable shape of {@link SDKResultSuccess.usage}. Most + * fields are zeroed; the mapper only reads `input_tokens`, + * `output_tokens`, and `cache_read_input_tokens`. + */ +export function makeNonNullableUsage(): SDKResultSuccess['usage'] { + return { + cache_creation: { ephemeral_1h_input_tokens: 0, ephemeral_5m_input_tokens: 0 }, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + inference_geo: 'unknown', + input_tokens: 0, + iterations: [], + output_tokens: 0, + server_tool_use: { web_fetch_requests: 0, web_search_requests: 0 }, + service_tier: 'standard', + speed: 'standard', + }; +} + +export function makeSystemInitMessage(sessionId: string): SDKSystemMessage { + return { + type: 'system', + subtype: 'init', + apiKeySource: 'user', + claude_code_version: '0.0.0-test', + cwd: '/workspace', + tools: [], + mcp_servers: [], + model: 'claude-test', + permissionMode: 'default', + slash_commands: [], + output_style: 'default', + skills: [], + plugins: [], + uuid: TEST_UUID, + session_id: sessionId, + }; +} + +export function makeResultSuccess(sessionId: string): SDKResultSuccess { + return { + type: 'result', + subtype: 'success', + duration_ms: 0, + duration_api_ms: 0, + is_error: false, + num_turns: 1, + result: '', + stop_reason: 'end_turn', + total_cost_usd: 0, + usage: makeNonNullableUsage(), + modelUsage: {}, + permission_denials: [], + uuid: TEST_UUID, + session_id: sessionId, + }; +} + +// `stream_event` (SDKPartialAssistantMessage) builders. The SDK's +// `Options.includePartialMessages: true` setting (Phase 6 §3.4) routes +// raw `BetaRawMessageStreamEvent`s through to the agent so we can map +// per-token. The deep `BetaMessage` shape on `message_start` carries +// many required fields irrelevant to mapping; these helpers populate +// only what the mapper reads, with everything else set to safe zero +// values so the SDK type-checks pass without `as unknown` casts. + +export function makeStreamEvent( + sessionId: string, + event: SDKPartialAssistantMessage['event'], +): SDKPartialAssistantMessage { + return { + type: 'stream_event', + event, + parent_tool_use_id: null, + uuid: TEST_UUID, + session_id: sessionId, + }; +} + +export function makeMessageStart(): BetaRawMessageStartEvent { + return { + type: 'message_start', + message: { + id: 'msg_test', + type: 'message', + role: 'assistant', + model: 'claude-test', + content: [], + stop_reason: null, + stop_sequence: null, + stop_details: null, + container: null, + context_management: null, + usage: { + cache_creation: { ephemeral_1h_input_tokens: 0, ephemeral_5m_input_tokens: 0 }, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + inference_geo: 'unknown', + input_tokens: 0, + iterations: [], + output_tokens: 0, + server_tool_use: { web_fetch_requests: 0, web_search_requests: 0 }, + service_tier: 'standard', + speed: 'standard', + }, + }, + }; +} + +export function makeContentBlockStartText(index: number): BetaRawContentBlockStartEvent { + return { + type: 'content_block_start', + index, + content_block: { type: 'text', text: '', citations: null }, + }; +} + +export function makeContentBlockStartThinking(index: number): BetaRawContentBlockStartEvent { + return { + type: 'content_block_start', + index, + content_block: { type: 'thinking', thinking: '', signature: '' }, + }; +} + +export function makeContentBlockStartToolUse( + index: number, + id: string, + name: string, +): BetaRawContentBlockStartEvent { + return { + type: 'content_block_start', + index, + content_block: { type: 'tool_use', id, name, input: {} }, + }; +} + +export function makeTextDelta(index: number, text: string): BetaRawContentBlockDeltaEvent { + return { + type: 'content_block_delta', + index, + delta: { type: 'text_delta', text }, + }; +} + +export function makeThinkingDelta(index: number, thinking: string): BetaRawContentBlockDeltaEvent { + return { + type: 'content_block_delta', + index, + delta: { type: 'thinking_delta', thinking }, + }; +} + +export function makeContentBlockStop(index: number): BetaRawContentBlockStopEvent { + return { + type: 'content_block_stop', + index, + }; +} + +export function makeMessageStop(): BetaRawMessageStopEvent { + return { type: 'message_stop' }; +} + +/** + * Builds the canonical {@link SDKAssistantMessage} envelope (`type: + * 'assistant'`) the SDK delivers as the final, authoritative message + * for a turn alongside its `'stream_event'` partials. The {@link + * content} blocks mirror what the partial accumulator should already + * have produced via `content_block_*` events. Only the fields the + * mapper inspects are filled with real values; the rest are zeroed so + * the SDK type-checks pass. + */ +export function makeAssistantMessage( + sessionId: string, + content: BetaContentBlock[], +): SDKAssistantMessage { + return { + type: 'assistant', + message: { + id: 'msg_test', + type: 'message', + role: 'assistant', + model: 'claude-test', + content, + stop_reason: 'end_turn', + stop_sequence: null, + container: null, + context_management: null, + usage: { + cache_creation: { ephemeral_1h_input_tokens: 0, ephemeral_5m_input_tokens: 0 }, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + inference_geo: 'unknown', + input_tokens: 0, + iterations: [], + output_tokens: 0, + server_tool_use: { web_fetch_requests: 0, web_search_requests: 0 }, + service_tier: 'standard', + speed: 'standard', + }, + }, + parent_tool_use_id: null, + uuid: TEST_UUID, + session_id: sessionId, + }; +} diff --git a/src/vs/platform/agentHost/test/node/copilotAgent.test.ts b/src/vs/platform/agentHost/test/node/copilotAgent.test.ts index 5a4bbc60c0f5f..aab20eef2a409 100644 --- a/src/vs/platform/agentHost/test/node/copilotAgent.test.ts +++ b/src/vs/platform/agentHost/test/node/copilotAgent.test.ts @@ -29,7 +29,7 @@ import { AgentConfigurationService, IAgentConfigurationService } from '../../nod import { AgentHostStateManager } from '../../node/agentHostStateManager.js'; import { IAgentHostGitService } from '../../node/agentHostGitService.js'; import { IAgentHostTerminalManager } from '../../node/agentHostTerminalManager.js'; -import { CopilotAgent, getCopilotWorktreeBranchName, getCopilotWorktreeName, getCopilotWorktreesRoot } from '../../node/copilot/copilotAgent.js'; +import { COPILOT_AGENT_HOST_SYSTEM_MESSAGE, CopilotAgent, getCopilotBranchNameHintFromMessage, getCopilotWorktreeBranchName, getCopilotWorktreeName, getCopilotWorktreesRoot } from '../../node/copilot/copilotAgent.js'; import { CopilotAgentSession, type SessionWrapperFactory } from '../../node/copilot/copilotAgentSession.js'; import { CopilotSessionWrapper } from '../../node/copilot/copilotSessionWrapper.js'; import { ShellManager } from '../../node/copilot/copilotShellTools.js'; @@ -273,8 +273,8 @@ class TestableCopilotAgent extends CopilotAgent { return stub; } - resolveWorktreeForTest(config: Parameters[0], sessionId: string): Promise { - return this._resolveSessionWorkingDirectory(config, sessionId); + resolveWorktreeForTest(config: Parameters[0], sessionId: string, prompt?: string): Promise { + return this._resolveSessionWorkingDirectory(config, sessionId, prompt); } } @@ -372,6 +372,16 @@ suite('CopilotAgent', () => { assert.strictEqual(getCopilotWorktreeBranchName('12345678-aaaa-bbbb-cccc-123456789abc', 'a'.repeat(48)).length, 'agents/'.length + 48 + '-12345678'.length); }); + test('derives slug branch hint from first message', () => { + assert.strictEqual(getCopilotBranchNameHintFromMessage('Add agent host config'), 'add-agent-host-config'); + assert.strictEqual(getCopilotBranchNameHintFromMessage(' Fix: the bug!! '), 'fix-the-bug'); + assert.strictEqual(getCopilotBranchNameHintFromMessage('Refactor café ☕ rendering'), 'refactor-cafe-rendering'); + assert.strictEqual(getCopilotBranchNameHintFromMessage('one two three four five six seven eight nine ten'), 'one-two-three-four-five-six-seven-eight'); + assert.strictEqual(getCopilotBranchNameHintFromMessage('a'.repeat(100))?.length, 48); + assert.strictEqual(getCopilotBranchNameHintFromMessage('!!! ??? ...'), undefined); + assert.strictEqual(getCopilotBranchNameHintFromMessage(''), undefined); + }); + test('returns empty models and throws AuthRequired for sessions before authentication', async () => { const agent = createTestAgent(disposables); try { @@ -676,6 +686,43 @@ suite('CopilotAgent', () => { // Forking a provisional session is no longer a special case: the agent // service drops `config.fork` for sources with no turns, so the call // reduces to a plain new-session create. + + test('materialization passes VS Code-specific system message to the SDK', async () => { + const sessionDataService = disposables.add(new TestSessionDataService()); + const client = new TestCopilotClient([]); + let capturedConfig: Parameters[0] | undefined; + client.createSession = async config => { + capturedConfig = config; + return new MockCopilotSession() as unknown as CopilotSession; + }; + + const agent = createTestAgent(disposables, { sessionDataService, copilotClient: client }); + try { + await agent.authenticate('https://api.github.com', 'token'); + + const result = await agent.createSession({ + session: AgentSession.uri('copilotcli', 'system-message-session'), + workingDirectory: URI.file('/workspace'), + }); + assert.strictEqual(result.provisional, true); + + await agent.sendMessage(result.session, 'hello'); + + assert.ok(capturedConfig, 'SDK createSession should be called during provisional materialization'); + const systemMessage = capturedConfig.systemMessage; + assert.deepStrictEqual(systemMessage, COPILOT_AGENT_HOST_SYSTEM_MESSAGE); + if (!systemMessage || systemMessage.mode !== 'customize') { + assert.fail('Expected customize-mode system message'); + } + assert.strictEqual(systemMessage.sections?.identity?.action, 'replace'); + assert.strictEqual( + systemMessage.sections?.identity?.content, + 'You are an AI assistant using Copilot CLI runtime in VS Code. You help users with software engineering tasks. When asked about your identity, you must state that you are an AI assistant using Copilot CLI runtime in VS Code.' + ); + } finally { + await disposeAgent(agent); + } + }); }); suite('onClientToolCallComplete', () => { @@ -824,8 +871,8 @@ suite('CopilotAgent', () => { const expectedBranchName = getCopilotWorktreeBranchName(sessionId, branchHint); const workingDir = await agent.resolveWorktreeForTest({ workingDirectory: repositoryRoot, - config: { isolation: 'worktree', branch: 'main', branchNameHint: branchHint }, - }, sessionId); + config: { isolation: 'worktree', branch: 'main' }, + }, sessionId, 'Add feature'); assert.ok(workingDir, 'resolveWorktreeForTest must return a worktree URI'); assert.deepStrictEqual(gitService.addedWorktrees.length, 1, 'addWorktree must be called once'); assert.strictEqual(gitService.addedWorktrees[0].branchName, expectedBranchName); @@ -950,7 +997,7 @@ suite('CopilotAgent', () => { await agent.authenticate('https://api.github.com', 'token'); const workingDir = await agent.resolveWorktreeForTest({ workingDirectory: repositoryRoot, - config: { isolation: 'worktree', branch: 'main', branchNameHint: 'feat' }, + config: { isolation: 'worktree', branch: 'main' }, }, sessionId); assert.ok(workingDir, 'worktree must be created'); // Simulate the worktree directory existing on disk so the archive @@ -997,7 +1044,7 @@ suite('CopilotAgent', () => { await agent.authenticate('https://api.github.com', 'token'); const workingDir = await agent.resolveWorktreeForTest({ workingDirectory: repositoryRoot, - config: { isolation: 'worktree', branch: 'main', branchNameHint: 'feat' }, + config: { isolation: 'worktree', branch: 'main' }, }, sessionId); await fs.mkdir(workingDir!.fsPath, { recursive: true }); gitService.dirtyWorkingDirectories.add(workingDir!.fsPath); @@ -1028,7 +1075,7 @@ suite('CopilotAgent', () => { await agent.authenticate('https://api.github.com', 'token'); const workingDir = await agent.resolveWorktreeForTest({ workingDirectory: repositoryRoot, - config: { isolation: 'worktree', branch: 'main', branchNameHint: 'feat' }, + config: { isolation: 'worktree', branch: 'main' }, }, sessionId); await fs.mkdir(workingDir!.fsPath, { recursive: true }); // Drop the branch so cleanup must skip. @@ -1083,7 +1130,7 @@ suite('CopilotAgent', () => { await agent.authenticate('https://api.github.com', 'token'); const workingDir = await agent.resolveWorktreeForTest({ workingDirectory: repositoryRoot, - config: { isolation: 'worktree', branch: 'main', branchNameHint: 'feat' }, + config: { isolation: 'worktree', branch: 'main' }, }, sessionId); await fs.mkdir(workingDir!.fsPath, { recursive: true }); diff --git a/src/vs/sessions/LAYOUT.md b/src/vs/sessions/LAYOUT.md index 6d676fd6d749f..f8415ceca4a25 100644 --- a/src/vs/sessions/LAYOUT.md +++ b/src/vs/sessions/LAYOUT.md @@ -667,6 +667,7 @@ interface IPartVisibilityState { | Date | Change | |------|--------| +| 2026-05-07 | Updated the sessions new-chat empty state so the workspace title row now reads `New session in {workspace} with {coding agent harness}` by rendering the session type picker inline above the input, leaving the bottom control row for approvals and repository controls. | | 2026-05-06 | Polished the sessions command-center title widget hide/show behavior: the command-center toolbar now refreshes explicitly on new-chat context changes so adjacent actions disappear together, and the title widget uses a reduced-motion-aware subtle fade only when entering or leaving the new chat view. | | 2026-05-06 | Hid the sessions command-center title widget while the new chat view is visible (`isNewChatSession`), so titlebar session chrome only appears for existing or newly created chat threads. | | 2026-05-06 | Changed the default sessions shell gradient from a diagonal linear gradient to a bottom-right radial gradient so the accent tint stays behind the chat surface while the window-controls corner and sidebar footer return to the base shell color. | diff --git a/src/vs/sessions/contrib/chat/browser/agentHost/agentHostSessionConfigPicker.ts b/src/vs/sessions/contrib/chat/browser/agentHost/agentHostSessionConfigPicker.ts index cbadb311f345d..d4f7f78456bcb 100644 --- a/src/vs/sessions/contrib/chat/browser/agentHost/agentHostSessionConfigPicker.ts +++ b/src/vs/sessions/contrib/chat/browser/agentHost/agentHostSessionConfigPicker.ts @@ -305,9 +305,6 @@ export class AgentHostSessionConfigPicker extends Disposable { const properties = this._orderProperties(Object.entries(resolvedConfig.schema.properties)); for (const [property, schema] of properties) { - if (property === SessionConfigKey.BranchNameHint) { - continue; - } // Only render pickers for properties we know how to present. Today // that's string properties with either a static `enum` or a // dynamic enum sourced via `getSessionConfigCompletions`. diff --git a/src/vs/sessions/contrib/chat/browser/media/chatWidget.css b/src/vs/sessions/contrib/chat/browser/media/chatWidget.css index 05baa4e1ca12f..8573056db682c 100644 --- a/src/vs/sessions/contrib/chat/browser/media/chatWidget.css +++ b/src/vs/sessions/contrib/chat/browser/media/chatWidget.css @@ -198,8 +198,13 @@ white-space: nowrap; } +.session-workspace-picker-with-label:has(+ .sessions-chat-session-type-picker .action-label.hidden) { + display: none; +} + /* Project picker in inline title row */ -.sessions-chat-picker-slot.sessions-chat-workspace-picker .action-label { +.sessions-chat-picker-slot.sessions-chat-workspace-picker .action-label, +.sessions-chat-picker-slot.sessions-chat-session-type-picker .action-label { height: auto; padding: 4px; font-size: 18px; @@ -211,21 +216,25 @@ touch-action: manipulation; } -.sessions-chat-picker-slot.sessions-chat-workspace-picker .action-label:hover { +.sessions-chat-picker-slot.sessions-chat-workspace-picker .action-label:hover, +.sessions-chat-picker-slot.sessions-chat-session-type-picker .action-label:hover { background-color: var(--vscode-toolbar-hoverBackground); color: var(--vscode-foreground); } -.sessions-chat-picker-slot.sessions-chat-workspace-picker .action-label .sessions-chat-dropdown-label { +.sessions-chat-picker-slot.sessions-chat-workspace-picker .action-label .sessions-chat-dropdown-label, +.sessions-chat-picker-slot.sessions-chat-session-type-picker .action-label .sessions-chat-dropdown-label { font-size: 18px; } -.sessions-chat-picker-slot.sessions-chat-workspace-picker .action-label > .codicon:not(.sessions-chat-dropdown-chevron) { +.sessions-chat-picker-slot.sessions-chat-workspace-picker .action-label > .codicon:not(.sessions-chat-dropdown-chevron), +.sessions-chat-picker-slot.sessions-chat-session-type-picker .action-label > .codicon:not(.codicon-chevron-down) { font-size: 16px; margin: 0; } -.sessions-chat-picker-slot.sessions-chat-workspace-picker .action-label .sessions-chat-dropdown-chevron { +.sessions-chat-picker-slot.sessions-chat-workspace-picker .action-label .sessions-chat-dropdown-chevron, +.sessions-chat-picker-slot.sessions-chat-session-type-picker .action-label .sessions-chat-dropdown-chevron { display: inline-flex; align-items: center; justify-content: center; diff --git a/src/vs/sessions/contrib/chat/browser/mobile/mobileSessionTypePicker.ts b/src/vs/sessions/contrib/chat/browser/mobile/mobileSessionTypePicker.ts index 8c7535b6e06da..88ae1bf4bec70 100644 --- a/src/vs/sessions/contrib/chat/browser/mobile/mobileSessionTypePicker.ts +++ b/src/vs/sessions/contrib/chat/browser/mobile/mobileSessionTypePicker.ts @@ -36,7 +36,7 @@ export class MobileSessionTypePicker extends SessionTypePicker { super(actionWidgetService, sessionsManagementService, sessionsProvidersService, storageService); } - override render(container: HTMLElement): void { + override render(container: HTMLElement, options?: { className?: string }): void { // Always render so the session-type chip is visible in the chip // row on phone. The base class renders a trigger that the mobile // `_showPicker` override routes to a bottom sheet, while desktop @@ -44,7 +44,7 @@ export class MobileSessionTypePicker extends SessionTypePicker { // viewports also means rotation across the phone breakpoint // keeps the trigger alive — consistent with MOBILE.md's // principle of same functionality, different presentation. - super.render(container); + super.render(container, options); } protected override _showPicker(): void { diff --git a/src/vs/sessions/contrib/chat/browser/newChatInput.ts b/src/vs/sessions/contrib/chat/browser/newChatInput.ts index 718e4e5a440d0..163de415abb02 100644 --- a/src/vs/sessions/contrib/chat/browser/newChatInput.ts +++ b/src/vs/sessions/contrib/chat/browser/newChatInput.ts @@ -148,6 +148,7 @@ export class NewChatInputWidget extends Disposable implements IHistoryNavigation loading: IObservable; minEditorHeight?: number; placeholder?: string; + renderSessionTypePickerInControls?: boolean; }, @IInstantiationService private readonly instantiationService: IInstantiationService, @IModelService private readonly modelService: IModelService, @@ -211,7 +212,9 @@ export class NewChatInputWidget extends Disposable implements IHistoryNavigation const newChatBottomContainer = dom.append(parent, dom.$('.new-chat-bottom-container')); const newChatControlsContainer = dom.append(newChatBottomContainer, dom.$('.new-chat-controls-container')); - this.sessionTypePicker.render(newChatControlsContainer); + if (this.options.renderSessionTypePickerInControls !== false) { + this.sessionTypePicker.render(newChatControlsContainer); + } this._register(this.instantiationService.createInstance(MenuWorkbenchToolBar, dom.append(newChatControlsContainer, dom.$('')), Menus.NewSessionControl, { hiddenItemStrategy: HiddenItemStrategy.NoHide, })); diff --git a/src/vs/sessions/contrib/chat/browser/newChatViewPane.ts b/src/vs/sessions/contrib/chat/browser/newChatViewPane.ts index e866cff5067b1..d3697d51f0a82 100644 --- a/src/vs/sessions/contrib/chat/browser/newChatViewPane.ts +++ b/src/vs/sessions/contrib/chat/browser/newChatViewPane.ts @@ -76,6 +76,7 @@ class NewChatWidget extends Disposable { sendRequest: async (text: string, attachedContext?: IChatRequestVariableEntry[]) => this._send(text, attachedContext), canSendRequest, loading, + renderSessionTypePickerInControls: false, })); this._register(this._workspacePicker.onDidSelectWorkspace(async workspace => { @@ -203,6 +204,9 @@ class NewChatWidget extends Disposable { : localize('newSessionChooseWorkspace', "Start by picking a"); this._workspacePicker.render(pickersRow); + const withLabel = dom.append(pickersRow, dom.$('.session-workspace-picker-label.session-workspace-picker-with-label')); + withLabel.textContent = localize('newSessionWith', "with"); + this._newChatInput.sessionTypePicker.render(pickersRow, { className: 'sessions-chat-session-type-picker' }); return this._workspacePicker.onDidSelectWorkspace(() => { const workspace = this._workspacePicker.selectedProject; pickersLabel.textContent = workspace ? localize('newSessionIn', "New session in") : localize('newSessionChooseWorkspace', "Start by picking a"); diff --git a/src/vs/sessions/contrib/chat/browser/sessionTypePicker.ts b/src/vs/sessions/contrib/chat/browser/sessionTypePicker.ts index 41c429095959d..2e1694c749acc 100644 --- a/src/vs/sessions/contrib/chat/browser/sessionTypePicker.ts +++ b/src/vs/sessions/contrib/chat/browser/sessionTypePicker.ts @@ -79,10 +79,16 @@ export class SessionTypePicker extends Disposable { return this._sessionType; } - render(container: HTMLElement): void { + render(container: HTMLElement, options?: { className?: string }): void { this._renderDisposables.clear(); const slot = dom.append(container, dom.$('.sessions-chat-picker-slot')); + if (options?.className) { + const classNames = options.className.split(/\s+/).filter(className => className.length > 0); + if (classNames.length > 0) { + slot.classList.add(...classNames); + } + } this._renderDisposables.add({ dispose: () => slot.remove() }); const trigger = dom.append(slot, dom.$('a.action-label')); @@ -184,7 +190,8 @@ export class SessionTypePicker extends Disposable { const labelSpan = dom.append(this._triggerElement, dom.$('span.sessions-chat-dropdown-label')); labelSpan.textContent = modeLabel; - dom.append(this._triggerElement, renderIcon(Codicon.chevronDown)); + const chevron = dom.append(this._triggerElement, renderIcon(Codicon.chevronDown)); + chevron.classList.add('sessions-chat-dropdown-chevron'); this._triggerElement.ariaLabel = localize('sessionTypePicker.triggerAriaLabel', "Pick Session Type, {0}", modeLabel); } diff --git a/src/vs/sessions/contrib/copilotChatSessions/COPILOT_CHAT_SESSIONS_PROVIDER.md b/src/vs/sessions/contrib/copilotChatSessions/COPILOT_CHAT_SESSIONS_PROVIDER.md index d3aa181920d6b..e485183418d9a 100644 --- a/src/vs/sessions/contrib/copilotChatSessions/COPILOT_CHAT_SESSIONS_PROVIDER.md +++ b/src/vs/sessions/contrib/copilotChatSessions/COPILOT_CHAT_SESSIONS_PROVIDER.md @@ -156,4 +156,6 @@ this._register(actionViewItemService.register( The picker model is currently **hardcoded per session type**. Each session type that needs pickers must register its own actions and widgets with appropriate `when` clauses. For example, the Copilot CLI permission picker (`PermissionPicker`) and the Claude permission mode picker (`ClaudePermissionModePicker`) are separate, hardcoded widgets even though they serve a similar purpose. +Context-menu actions on session list items are similarly hardcoded per session type. The `Delete...` action registered for `SessionItemContextMenuId` gates on both `chatSessionProviderId == COPILOT_PROVIDER_ID` *and* `chatSessionType != CLAUDE_CODE_SESSION_TYPE`, because Claude sessions (although exposed through the Copilot provider) don't support the native delete flow. Any new session type that opts into the Copilot provider but not into a shared action needs its own `chatSessionType` exclusion in the action's `when` clause. + Ideally, pickers would be **generic and contributable** — a session type would declare its option groups (as the Claude extension already does via `IChatSessionsService.setOptionGroupsForSessionType()`), and the welcome view would dynamically render pickers from those groups without needing per-type widget classes. The active-session chat widget (`chatInputPart.ts`) already has this generic infrastructure via `createChatSessionPickerWidgets()`, but the welcome view does not yet use it. Until the welcome view adopts this pattern, new session types must follow the hardcoded approach above. diff --git a/src/vs/sessions/contrib/copilotChatSessions/browser/copilotChatSessionsActions.ts b/src/vs/sessions/contrib/copilotChatSessions/browser/copilotChatSessionsActions.ts index 3df237bee5746..c4c468d4b5840 100644 --- a/src/vs/sessions/contrib/copilotChatSessions/browser/copilotChatSessionsActions.ts +++ b/src/vs/sessions/contrib/copilotChatSessions/browser/copilotChatSessionsActions.ts @@ -515,7 +515,10 @@ registerAction2(class DeleteSessionAction extends Action2 { id: SessionItemContextMenuId, group: '1_edit', order: 4, - when: ContextKeyExpr.equals(ChatSessionProviderIdContext.key, COPILOT_PROVIDER_ID), + when: ContextKeyExpr.and( + ContextKeyExpr.equals(ChatSessionProviderIdContext.key, COPILOT_PROVIDER_ID), + ContextKeyExpr.notEquals('chatSessionType', CLAUDE_CODE_SESSION_TYPE), + ), }] }); } diff --git a/src/vs/workbench/api/browser/mainThreadChatDebug.ts b/src/vs/workbench/api/browser/mainThreadChatDebug.ts index 5281b3a786f53..fc53af618efbe 100644 --- a/src/vs/workbench/api/browser/mainThreadChatDebug.ts +++ b/src/vs/workbench/api/browser/mainThreadChatDebug.ts @@ -114,7 +114,7 @@ export class MainThreadChatDebug extends Disposable implements MainThreadChatDeb case 'toolCall': return { ...base, kind: 'toolCall', toolName: event.toolName, toolCallId: event.toolCallId, input: event.input, output: event.output, result: event.result, durationInMillis: event.durationInMillis }; case 'modelTurn': - return { ...base, kind: 'modelTurn', model: event.model, requestName: event.requestName, inputTokens: event.inputTokens, outputTokens: event.outputTokens, cachedTokens: event.cachedTokens, totalTokens: event.totalTokens, durationInMillis: event.durationInMillis }; + return { ...base, kind: 'modelTurn', model: event.model, requestName: event.requestName, inputTokens: event.inputTokens, outputTokens: event.outputTokens, cachedTokens: event.cachedTokens, totalTokens: event.totalTokens, copilotUsageNanoAiu: event.copilotUsageNanoAiu, durationInMillis: event.durationInMillis }; case 'generic': return { ...base, kind: 'generic', name: event.name, details: event.details, level: event.level, category: event.category }; case 'subagentInvocation': @@ -156,6 +156,7 @@ export class MainThreadChatDebug extends Disposable implements MainThreadChatDeb outputTokens: dto.outputTokens, cachedTokens: dto.cachedTokens, totalTokens: dto.totalTokens, + copilotUsageNanoAiu: dto.copilotUsageNanoAiu, durationInMillis: dto.durationInMillis, }; case 'generic': diff --git a/src/vs/workbench/api/common/extHost.protocol.ts b/src/vs/workbench/api/common/extHost.protocol.ts index e6dcc34086047..0d26fe9292661 100644 --- a/src/vs/workbench/api/common/extHost.protocol.ts +++ b/src/vs/workbench/api/common/extHost.protocol.ts @@ -1483,6 +1483,7 @@ export interface IChatDebugModelTurnEventDto extends IChatDebugEventCommonDto { readonly outputTokens?: number; readonly cachedTokens?: number; readonly totalTokens?: number; + readonly copilotUsageNanoAiu?: number; readonly durationInMillis?: number; } diff --git a/src/vs/workbench/api/common/extHostChatDebug.ts b/src/vs/workbench/api/common/extHostChatDebug.ts index a32b21525837a..276429a412343 100644 --- a/src/vs/workbench/api/common/extHostChatDebug.ts +++ b/src/vs/workbench/api/common/extHostChatDebug.ts @@ -157,6 +157,7 @@ export class ExtHostChatDebug extends Disposable implements ExtHostChatDebugShap outputTokens: e.outputTokens, cachedTokens: e.cachedTokens, totalTokens: e.totalTokens, + copilotUsageNanoAiu: e.copilotUsageNanoAiu, durationInMillis: e.durationInMillis, }; } @@ -347,6 +348,7 @@ export class ExtHostChatDebug extends Disposable implements ExtHostChatDebugShap evt.outputTokens = dto.outputTokens; evt.cachedTokens = dto.cachedTokens; evt.totalTokens = dto.totalTokens; + evt.copilotUsageNanoAiu = dto.copilotUsageNanoAiu; evt.durationInMillis = dto.durationInMillis; return evt; } diff --git a/src/vs/workbench/api/common/extHostTypes.ts b/src/vs/workbench/api/common/extHostTypes.ts index c58430ade8f21..e7ea45cb0bda9 100644 --- a/src/vs/workbench/api/common/extHostTypes.ts +++ b/src/vs/workbench/api/common/extHostTypes.ts @@ -3640,6 +3640,7 @@ export class ChatDebugModelTurnEvent { cachedTokens?: number; totalTokens?: number; cost?: number; + copilotUsageNanoAiu?: number; durationInMillis?: number; constructor(created: Date) { diff --git a/src/vs/workbench/contrib/chat/browser/agentSessions/agentHost/agentHostChatContribution.ts b/src/vs/workbench/contrib/chat/browser/agentSessions/agentHost/agentHostChatContribution.ts index d572ec36bab30..009d27099fe89 100644 --- a/src/vs/workbench/contrib/chat/browser/agentSessions/agentHost/agentHostChatContribution.ts +++ b/src/vs/workbench/contrib/chat/browser/agentSessions/agentHost/agentHostChatContribution.ts @@ -35,7 +35,7 @@ import { AgentHostSessionListController } from './agentHostSessionListController import { LoggingAgentConnection } from './loggingAgentConnection.js'; import { SyncedCustomizationBundler } from './syncedCustomizationBundler.js'; -export { AgentHostSessionHandler, getAgentHostBranchNameHint } from './agentHostSessionHandler.js'; +export { AgentHostSessionHandler } from './agentHostSessionHandler.js'; export { AgentHostSessionListController } from './agentHostSessionListController.js'; /** diff --git a/src/vs/workbench/contrib/chat/browser/agentSessions/agentHost/agentHostSessionHandler.ts b/src/vs/workbench/contrib/chat/browser/agentSessions/agentHost/agentHostSessionHandler.ts index 34bbc08bad979..3c811b9c9c0de 100644 --- a/src/vs/workbench/contrib/chat/browser/agentSessions/agentHost/agentHostSessionHandler.ts +++ b/src/vs/workbench/contrib/chat/browser/agentSessions/agentHost/agentHostSessionHandler.ts @@ -18,7 +18,6 @@ import { isLocation, type Location } from '../../../../../../editor/common/langu import { IPosition } from '../../../../../../editor/common/core/position.js'; import { localize } from '../../../../../../nls.js'; import { AgentProvider, AgentSession, type IAgentConnection } from '../../../../../../platform/agentHost/common/agentService.js'; -import { SessionConfigKey } from '../../../../../../platform/agentHost/common/sessionConfigKeys.js'; import { IAgentSubscription, observableFromSubscription } from '../../../../../../platform/agentHost/common/state/agentSubscription.js'; import { SessionTruncatedAction } from '../../../../../../platform/agentHost/common/state/protocol/actions.js'; import { CompletionItemKind as AhpCompletionItemKind, type CompletionItem as AhpCompletionItem } from '../../../../../../platform/agentHost/common/state/protocol/commands.js'; @@ -336,19 +335,6 @@ export interface IAgentHostSessionHandlerConfig { readonly customizations?: IObservable; } -export function getAgentHostBranchNameHint(message: string): string | undefined { - const words = message - .toLowerCase() - .normalize('NFKD') - .replace(/[^a-z0-9]+/g, '-') - .replace(/^-+|-+$/g, '') - .split('-') - .filter(word => word.length > 0) - .slice(0, 8); - const hint = words.join('-').slice(0, 48).replace(/-+$/g, ''); - return hint.length > 0 ? hint : undefined; -} - /** * Converts a UTF-16 code-unit offset in `text` to a 1-based Monaco * `IPosition`. Used to translate AHP completion-item ranges (which use @@ -368,7 +354,6 @@ function offsetToPosition(text: string, offset: number): IPosition { } return { lineNumber, column }; } - export class AgentHostSessionHandler extends Disposable implements IChatSessionContentProvider { private readonly _activeSessions = new ResourceMap(); @@ -665,6 +650,8 @@ export class AgentHostSessionHandler extends Disposable implements IChatSessionC this._activeSessions.set(sessionResource, session); if (!isNewSession) { + this._ensurePendingMessageSubscription(sessionResource, resolvedSession); + // If there are historical turns with file edits, eagerly create // the editing session once the ChatModel is available so that // edit pills render with diff info on session restore. @@ -744,7 +731,7 @@ export class AgentHostSessionHandler extends Disposable implements IChatSessionC // folder-pick time, or this session was created via a legacy/ // test path). Fall back to the original create-then-subscribe // flow. - await this._createAndSubscribe(request.sessionResource, this._createModelSelection(request.userSelectedModelId, request.modelConfiguration), undefined, request.agentHostSessionConfig, getAgentHostBranchNameHint(request.message)); + await this._createAndSubscribe(request.sessionResource, this._createModelSelection(request.userSelectedModelId, request.modelConfiguration), undefined, request.agentHostSessionConfig); } else { // Eager-created session: take a refcounted subscription so the // handler observes state changes for the duration of the chat @@ -829,7 +816,7 @@ export class AgentHostSessionHandler extends Disposable implements IChatSessionC // --- Steering --- if (currentSteering) { - if (currentSteering.id !== prevSteering?.id) { + if (currentSteering.id !== prevSteering?.id || currentSteering.text !== prevSteering.userMessage.text) { this._dispatchAction({ type: ActionType.SessionPendingMessageSet, session, @@ -861,9 +848,10 @@ export class AgentHostSessionHandler extends Disposable implements IChatSessionC } // --- Queued: additions --- - const prevQueuedIds = new Set(prevQueued.map(q => q.id)); + const prevQueuedById = new Map(prevQueued.map(q => [q.id, q])); for (const q of currentQueued) { - if (!prevQueuedIds.has(q.id)) { + const prev = prevQueuedById.get(q.id); + if (!prev || q.text !== prev.userMessage.text) { this._dispatchAction({ type: ActionType.SessionPendingMessageSet, session, @@ -2285,8 +2273,7 @@ export class AgentHostSessionHandler extends Disposable implements IChatSessionC } /** Creates a new backend session and subscribes to its state. */ - private async _createAndSubscribe(sessionResource: URI, model: ModelSelection | undefined, fork?: { session: URI; turnIndex: number; turnId: string }, sessionConfig?: Record, branchNameHint?: string): Promise { - const config = branchNameHint ? { ...sessionConfig, [SessionConfigKey.BranchNameHint]: branchNameHint } : sessionConfig; + private async _createAndSubscribe(sessionResource: URI, model: ModelSelection | undefined, fork?: { session: URI; turnIndex: number; turnId: string }, config?: Record): Promise { const workingDirectory = this._resolveRequestedWorkingDirectory(sessionResource); const requestedSession = fork ? undefined : this._resolveSessionUri(sessionResource); @@ -2382,7 +2369,17 @@ export class AgentHostSessionHandler extends Disposable implements IChatSessionC this._pendingMessageSubscriptions.set(sessionResource, chatModel.onDidChangePendingRequests(() => { this._syncPendingMessages(sessionResource, backendSession); })); + this._syncPendingMessages(sessionResource, backendSession); + return; } + + this._pendingMessageSubscriptions.set(sessionResource, this._chatService.onDidCreateModel(model => { + if (!isEqual(model.sessionResource, sessionResource)) { + return; + } + this._pendingMessageSubscriptions.deleteAndDispose(sessionResource); + this._ensurePendingMessageSubscription(sessionResource, backendSession); + })); } /** diff --git a/src/vs/workbench/contrib/chat/browser/agentSessions/experiments/agentTitleBarStatusWidget.ts b/src/vs/workbench/contrib/chat/browser/agentSessions/experiments/agentTitleBarStatusWidget.ts index 7f89129fe47f7..f4cadd4aa571a 100644 --- a/src/vs/workbench/contrib/chat/browser/agentSessions/experiments/agentTitleBarStatusWidget.ts +++ b/src/vs/workbench/contrib/chat/browser/agentSessions/experiments/agentTitleBarStatusWidget.ts @@ -40,7 +40,7 @@ import { mainWindow } from '../../../../../../base/browser/window.js'; import { LayoutSettings } from '../../../../../services/layout/browser/layoutService.js'; import { WindowTitle } from '../../../../../browser/parts/titlebar/windowTitle.js'; import { ChatConfiguration } from '../../../common/constants.js'; -import { ChatEntitlement, IChatEntitlementService } from '../../../../../services/chat/common/chatEntitlementService.js'; +import { IChatEntitlementService } from '../../../../../services/chat/common/chatEntitlementService.js'; import { IChatWidgetService } from '../../chat.js'; import { ITelemetryService } from '../../../../../../platform/telemetry/common/telemetry.js'; @@ -51,7 +51,6 @@ type AgentStatusClickAction = | 'focusSessionsView' | 'toggleChat' | 'setupChat' - | 'openQuotaExceededDialog' | 'applyFilter' | 'clearFilter' | 'enterProjection' @@ -71,7 +70,6 @@ type AgentStatusClickClassification = { // Action IDs const TOGGLE_CHAT_ACTION_ID = 'workbench.action.chat.toggle'; -const OPEN_CHAT_QUOTA_EXCEEDED_DIALOG = 'workbench.action.chat.openQuotaExceededDialog'; const QUICK_OPEN_ACTION_ID = 'workbench.action.quickOpenWithModes'; // Storage key for filter state @@ -854,24 +852,9 @@ export class AgentTitleBarStatusWidget extends BaseActionViewItem { // Get menu actions for dropdown with proper group separators const menuActions: IAction[] = Separator.join(...this._chatTitleBarMenu.getActions({ shouldForwardArgs: true }).map(([, actions]) => actions)); - // Determine primary action based on entitlement state - // Special case 1: User is signed out (needs to sign in) - // Special case 2: User has exceeded quota (needs to upgrade) - const chatSentiment = this.chatEntitlementService.sentiment; - const chatQuotaExceeded = this.chatEntitlementService.quotas.chat?.percentRemaining === 0; - const free = this.chatEntitlementService.entitlement === ChatEntitlement.Free; - - let primaryActionId = TOGGLE_CHAT_ACTION_ID; - let primaryActionTitle = localize('toggleChat', "Toggle Chat"); - let primaryActionIcon = Codicon.chatSparkle; - - if (chatSentiment.completed && !chatSentiment.disabled) { - if (chatQuotaExceeded && free) { - primaryActionId = OPEN_CHAT_QUOTA_EXCEEDED_DIALOG; - primaryActionTitle = localize('chatQuotaExceededButton', "GitHub Copilot Free plan chat messages quota reached. Click for details."); - primaryActionIcon = Codicon.chatSparkleWarning; - } - } + const primaryActionId = TOGGLE_CHAT_ACTION_ID; + const primaryActionTitle = localize('toggleChat', "Toggle Chat"); + const primaryActionIcon = Codicon.chatSparkle; // Create primary action const primaryAction = this.instantiationService.createInstance(MenuItemAction, { diff --git a/src/vs/workbench/contrib/chat/browser/chatDebug/chatDebugOverviewView.ts b/src/vs/workbench/contrib/chat/browser/chatDebug/chatDebugOverviewView.ts index 37096436877d5..f98c6f2137f7b 100644 --- a/src/vs/workbench/contrib/chat/browser/chatDebug/chatDebugOverviewView.ts +++ b/src/vs/workbench/contrib/chat/browser/chatDebug/chatDebugOverviewView.ts @@ -25,6 +25,8 @@ import { setupBreadcrumbKeyboardNavigation, TextBreadcrumbItem } from './chatDeb const $ = DOM.$; const numberFormatter = safeIntl.NumberFormat(); +const aicFormatter = safeIntl.NumberFormat(undefined, { minimumFractionDigits: 2, maximumFractionDigits: 2 }); +const NANO_AIU_PER_AIC = 1_000_000_000; export const enum OverviewNavigation { Home = 'home', @@ -295,6 +297,7 @@ export class ChatDebugOverviewView extends Disposable { const totalOutputTokens = modelTurns.reduce((sum, e) => sum + (e.outputTokens ?? 0), 0); const totalCachedTokens = modelTurns.reduce((sum, e) => sum + (e.cachedTokens ?? 0), 0); const totalTokens = modelTurns.reduce((sum, e) => sum + (e.totalTokens ?? 0), 0); + const totalCopilotUsageNanoAiu = modelTurns.reduce((sum, e) => sum + (e.copilotUsageNanoAiu ?? 0), 0); interface OverviewMetric { label: string; value: string } const metrics: OverviewMetric[] = [ @@ -307,6 +310,11 @@ export class ChatDebugOverviewView extends Disposable { { label: localize('chatDebug.metric.errors', "Errors"), value: fmt.format(errors.length) }, ]; + if (totalCopilotUsageNanoAiu > 0) { + const aic = totalCopilotUsageNanoAiu / NANO_AIU_PER_AIC; + metrics.push({ label: localize('chatDebug.metric.copilotUsage', "Copilot Usage (AIC)"), value: aicFormatter.value.format(aic) }); + } + for (const metric of metrics) { const card = DOM.append(container, $('.chat-debug-overview-metric-card')); DOM.append(card, $('div.chat-debug-overview-metric-label', undefined, metric.label)); diff --git a/src/vs/workbench/contrib/chat/browser/chatStatus/chatStatusDashboard.ts b/src/vs/workbench/contrib/chat/browser/chatStatus/chatStatusDashboard.ts index e0383715b5068..a5e680a3b68a1 100644 --- a/src/vs/workbench/contrib/chat/browser/chatStatus/chatStatusDashboard.ts +++ b/src/vs/workbench/contrib/chat/browser/chatStatus/chatStatusDashboard.ts @@ -242,8 +242,11 @@ export class ChatStatusDashboard extends DomWidget { } let chatQuotaIndicator: ((quota: IQuotaSnapshot | string) => void) | undefined; - if (chatQuota && !chatQuota.unlimited && !this.chatEntitlementService.quotas.usageBasedBilling) { - chatQuotaIndicator = this.createQuotaIndicator(container, chatQuota, localize('chatsLabel', "Chat messages"), resetLabel); + if (chatQuota && !chatQuota.unlimited && (!this.chatEntitlementService.quotas.usageBasedBilling || this.chatEntitlementService.entitlement === ChatEntitlement.Free)) { + const chatLabel = this.chatEntitlementService.quotas.usageBasedBilling && this.chatEntitlementService.entitlement === ChatEntitlement.Free + ? localize('creditsLabel', "Credits") + : localize('chatsLabel', "Chat messages"); + chatQuotaIndicator = this.createQuotaIndicator(container, chatQuota, chatLabel, resetLabel); } let premiumChatQuotaIndicator: ((quota: IQuotaSnapshot | string) => void) | undefined; diff --git a/src/vs/workbench/contrib/chat/browser/chatStatus/chatStatusEntry.ts b/src/vs/workbench/contrib/chat/browser/chatStatus/chatStatusEntry.ts index 3e1185bd00e8d..b91e2604e6574 100644 --- a/src/vs/workbench/contrib/chat/browser/chatStatus/chatStatusEntry.ts +++ b/src/vs/workbench/contrib/chat/browser/chatStatus/chatStatusEntry.ts @@ -246,6 +246,7 @@ export class ChatStatusBarEntry extends Disposable implements IWorkbenchContribu command: CHAT_SETUP_ACTION_ID, showInAllWindows: true, kind: undefined, + content: this.entryAnchor, }; } diff --git a/src/vs/workbench/contrib/chat/common/chatDebugService.ts b/src/vs/workbench/contrib/chat/common/chatDebugService.ts index b5f0b2ae2bab1..be52dcfcdcec5 100644 --- a/src/vs/workbench/contrib/chat/common/chatDebugService.ts +++ b/src/vs/workbench/contrib/chat/common/chatDebugService.ts @@ -65,6 +65,7 @@ export interface IChatDebugModelTurnEvent extends IChatDebugEventCommon { readonly outputTokens?: number; readonly cachedTokens?: number; readonly totalTokens?: number; + readonly copilotUsageNanoAiu?: number; readonly durationInMillis?: number; } diff --git a/src/vs/workbench/contrib/chat/test/browser/agentSessions/agentHostChatContribution.test.ts b/src/vs/workbench/contrib/chat/test/browser/agentSessions/agentHostChatContribution.test.ts index 4bf0dc0135edb..ebce9b85a2f2e 100644 --- a/src/vs/workbench/contrib/chat/test/browser/agentSessions/agentHostChatContribution.test.ts +++ b/src/vs/workbench/contrib/chat/test/browser/agentSessions/agentHostChatContribution.test.ts @@ -27,7 +27,7 @@ import { IDefaultAccountService } from '../../../../../../platform/defaultAccoun import { IAuthenticationService } from '../../../../../services/authentication/common/authentication.js'; import { IChatAgentData, IChatAgentImplementation, IChatAgentRequest, IChatAgentService } from '../../../common/participants/chatAgents.js'; import { ChatAgentLocation } from '../../../common/constants.js'; -import { IChatService, IChatMarkdownContent, IChatProgress, IChatTerminalToolInvocationData, IChatToolInputInvocationData, IChatToolInvocation, IChatToolInvocationSerialized, ToolConfirmKind } from '../../../common/chatService/chatService.js'; +import { ChatRequestQueueKind, IChatService, IChatMarkdownContent, IChatProgress, IChatTerminalToolInvocationData, IChatToolInputInvocationData, IChatToolInvocation, IChatToolInvocationSerialized, ToolConfirmKind } from '../../../common/chatService/chatService.js'; import { IChatEditingService } from '../../../common/editing/chatEditingService.js'; import { IMarkdownString } from '../../../../../../base/common/htmlContent.js'; import { IChatSessionsService, type IChatSessionRequestHistoryItem } from '../../../common/chatSessionsService.js'; @@ -36,7 +36,7 @@ import { IProductService } from '../../../../../../platform/product/common/produ import { TestInstantiationService } from '../../../../../../platform/instantiation/test/common/instantiationServiceMock.js'; import { IOutputService } from '../../../../../services/output/common/output.js'; import { IWorkspaceContextService } from '../../../../../../platform/workspace/common/workspace.js'; -import { AgentHostContribution, AgentHostSessionListController, AgentHostSessionHandler, getAgentHostBranchNameHint } from '../../../browser/agentSessions/agentHost/agentHostChatContribution.js'; +import { AgentHostContribution, AgentHostSessionListController, AgentHostSessionHandler } from '../../../browser/agentSessions/agentHost/agentHostChatContribution.js'; import { AgentHostLanguageModelProvider } from '../../../browser/agentSessions/agentHost/agentHostLanguageModelProvider.js'; import { IFileService } from '../../../../../../platform/files/common/files.js'; import { TestFileService } from '../../../../../test/common/workbenchTestServices.js'; @@ -53,9 +53,9 @@ import { IAgentHostTerminalService } from '../../../../terminal/browser/agentHos import { IAgentHostSessionWorkingDirectoryResolver } from '../../../browser/agentSessions/agentHost/agentHostSessionWorkingDirectoryResolver.js'; import { ILanguageModelToolsService } from '../../../common/tools/languageModelToolsService.js'; import { IPromptsService } from '../../../common/promptSyntax/service/promptsService.js'; -import { SessionConfigKey } from '../../../../../../platform/agentHost/common/sessionConfigKeys.js'; import { IChatWidgetService } from '../../../browser/chat.js'; import { ChatQuestionCarouselData } from '../../../common/model/chatProgressTypes/chatQuestionCarouselData.js'; +import type { IChatModel, IChatPendingRequest, IChatRequestModel } from '../../../common/model/chatModel.js'; // ---- Mock agent host service ------------------------------------------------ @@ -395,9 +395,15 @@ function createTestServices(disposables: DisposableStore, workingDirectoryResolv instantiationService.stub(IChatEditingService, { registerEditingSessionProvider: () => toDisposable(() => { }), }); + const chatModels = new Map(); + const onDidCreateModel = disposables.add(new Emitter()); const chatService = { - getSession: () => undefined, - onDidCreateModel: Event.None, + getSession: (sessionResource: URI) => chatModels.get(sessionResource.toString()), + onDidCreateModel: onDidCreateModel.event, + setSession(sessionResource: URI, model: IChatModel) { + chatModels.set(sessionResource.toString(), model); + onDidCreateModel.fire(model); + }, removePendingRequestCalls: [] as { sessionResource: URI; requestId: string }[], removePendingRequest(sessionResource: URI, requestId: string) { this.removePendingRequestCalls.push({ sessionResource, requestId }); @@ -463,7 +469,7 @@ function createContribution(disposables: DisposableStore, opts?: { authServiceOv })); const contribution = disposables.add(instantiationService.createInstance(AgentHostContribution)); - return { contribution, listController, sessionHandler, agentHostService, chatAgentService, chatWidgetService, chatService }; + return { contribution, listController, sessionHandler, agentHostService, chatAgentService, chatWidgetService, chatService, instantiationService }; } function makeRequest(overrides: Partial<{ message: string; sessionResource: URI; variables: IChatAgentRequest['variables']; userSelectedModelId: string; modelConfiguration: Record; agentHostSessionConfig: Record; agentId: string }> = {}): IChatAgentRequest { @@ -2659,20 +2665,46 @@ suite('AgentHostChatContribution', () => { await turnPromise; assert.strictEqual(agentHostService.createSessionCalls.length, 1); - assert.deepStrictEqual(agentHostService.createSessionCalls[0].config, { ...config, [SessionConfigKey.BranchNameHint]: 'add-agent-host-session-configuration-flow' }); - })); - - test('handler derives deterministic branch name hints from first request text', () => { - assert.deepStrictEqual([ - getAgentHostBranchNameHint('Add Agent Host session configuration flow'), - getAgentHostBranchNameHint(' Fix: worktree picker + branch config! '), - getAgentHostBranchNameHint('---'), - ], [ - 'add-agent-host-session-configuration-flow', - 'fix-worktree-picker-branch-config', - undefined, - ]); - }); + assert.deepStrictEqual(agentHostService.createSessionCalls[0].config, config); + })); + + test('handler forwards request session config via SessionConfigChanged on eager-create path', () => runWithFakedTimers({ useFakeTimers: true }, async () => { + const { sessionHandler, agentHostService, chatAgentService } = createContribution(disposables); + + // Pre-seed an eagerly-created backend session so the handler + // hits the eager-create branch in `_invokeAgent` (the one that + // dispatches `SessionConfigChanged` instead of calling + // `createSession` with the config inline). + const sessionUri = AgentSession.uri('copilot', 'eager-config'); + agentHostService.sessionStates.set(sessionUri.toString(), { + ...createSessionState({ resource: sessionUri.toString(), provider: 'copilot', title: 'Test', status: SessionStatus.Idle, createdAt: Date.now(), modifiedAt: Date.now() }), + lifecycle: SessionLifecycle.Ready, + turns: [], + }); + + const sessionResource = URI.from({ scheme: 'agent-host-copilot', path: '/eager-config' }); + const chatSession = await sessionHandler.provideChatSessionContent(sessionResource, CancellationToken.None); + disposables.add(toDisposable(() => chatSession.dispose())); + + agentHostService.dispatchedActions.length = 0; + + const registered = chatAgentService.registeredAgents.get('agent-host-copilot')!; + const config = { isolation: 'worktree', branch: 'main' }; + const turnPromise = registered.impl.invoke( + makeRequest({ message: 'Fix worktree branch hint propagation', sessionResource, agentHostSessionConfig: config }), + () => { }, [], CancellationToken.None, + ); + await timeout(10); + const turnDispatch = agentHostService.turnActions[0]; + const turnAction = turnDispatch.action as ITurnStartedAction; + agentHostService.fireAction({ action: turnDispatch.action, serverSeq: 1, origin: { clientId: agentHostService.clientId, clientSeq: turnDispatch.clientSeq } }); + agentHostService.fireAction({ action: { type: 'session/turnComplete', session: turnAction.session, turnId: turnAction.turnId } as SessionAction, serverSeq: 2, origin: undefined }); + await turnPromise; + + const configChanged = agentHostService.dispatchedActions.find(d => d.action.type === ActionType.SessionConfigChanged); + assert.ok(configChanged, 'expected a SessionConfigChanged dispatch'); + assert.deepStrictEqual((configChanged!.action as { config: Record }).config, config); + })); test('handler uses registered working directory resolver', () => runWithFakedTimers({ useFakeTimers: true }, async () => { const resolvedWorkingDirectory = URI.file('/resolved/working/dir'); @@ -3094,6 +3126,101 @@ suite('AgentHostChatContribution', () => { // ---- Server-initiated turns ------------------------------------------- suite('server-initiated turns', () => { + function createPendingChatModel(sessionResource: URI, pendingRequests: IChatPendingRequest[]): { model: IChatModel; firePendingRequestsChanged(): void } { + const onDidChangePendingRequests = disposables.add(new Emitter()); + return { + model: upcastPartial({ + sessionResource, + onDidChangePendingRequests: onDidChangePendingRequests.event, + getPendingRequests: () => pendingRequests, + }), + firePendingRequestsChanged: () => onDidChangePendingRequests.fire(), + }; + } + + test('syncs queued messages added to restored active sessions', async () => { + const { sessionHandler, agentHostService, chatService } = createContribution(disposables); + + const backendSession = AgentSession.uri('copilot', 'restored-pending-sync'); + agentHostService.sessionStates.set(backendSession.toString(), { + ...createSessionState({ + resource: backendSession.toString(), + provider: 'copilot', + title: 'Test', + status: SessionStatus.InProgress, + createdAt: Date.now(), + modifiedAt: Date.now(), + }), + lifecycle: SessionLifecycle.Ready, + activeTurn: createActiveTurn('active-turn-1', { text: 'Working' }), + }); + + const sessionResource = URI.from({ scheme: 'agent-host-copilot', path: '/restored-pending-sync' }); + const chatSession = await sessionHandler.provideChatSessionContent(sessionResource, CancellationToken.None); + disposables.add(toDisposable(() => chatSession.dispose())); + + const pendingRequests: IChatPendingRequest[] = []; + const chatModel = createPendingChatModel(sessionResource, pendingRequests); + chatService.setSession(sessionResource, chatModel.model); + + agentHostService.dispatchedActions.length = 0; + const text = 'Run the queued follow-up'; + const request = upcastPartial({ id: 'queued-request-1', message: { text, parts: [] } }); + pendingRequests.push({ request, kind: ChatRequestQueueKind.Queued, sendOptions: {} }); + chatModel.firePendingRequestsChanged(); + + const action = agentHostService.dispatchedActions.map(d => d.action).find((action): action is Extract => action.type === ActionType.SessionPendingMessageSet); + assert.ok(action, 'queued message should be dispatched to the agent host'); + assert.deepStrictEqual(action, { + type: ActionType.SessionPendingMessageSet, + session: backendSession.toString(), + kind: 'queued', + id: 'queued-request-1', + userMessage: { text }, + }); + }); + + test('syncs text updates for existing queued pending messages', async () => { + const { sessionHandler, agentHostService, chatService } = createContribution(disposables); + + const backendSession = AgentSession.uri('copilot', 'pending-text-update'); + agentHostService.sessionStates.set(backendSession.toString(), { + ...createSessionState({ + resource: backendSession.toString(), + provider: 'copilot', + title: 'Test', + status: SessionStatus.Idle, + createdAt: Date.now(), + modifiedAt: Date.now(), + }), + lifecycle: SessionLifecycle.Ready, + queuedMessages: [{ id: 'queued-request-1', userMessage: { text: 'old queued text' } }], + }); + + const sessionResource = URI.from({ scheme: 'agent-host-copilot', path: '/pending-text-update' }); + const chatSession = await sessionHandler.provideChatSessionContent(sessionResource, CancellationToken.None); + disposables.add(toDisposable(() => chatSession.dispose())); + + agentHostService.dispatchedActions.length = 0; + const text = 'new queued text'; + const pendingRequests: IChatPendingRequest[] = [{ + request: upcastPartial({ id: 'queued-request-1', message: { text, parts: [] } }), + kind: ChatRequestQueueKind.Queued, + sendOptions: {}, + }]; + const chatModel = createPendingChatModel(sessionResource, pendingRequests); + chatService.setSession(sessionResource, chatModel.model); + + const action = agentHostService.dispatchedActions.map(d => d.action).find((action): action is Extract => action.type === ActionType.SessionPendingMessageSet); + assert.ok(action, 'queued message text update should be dispatched to the agent host'); + assert.deepStrictEqual(action, { + type: ActionType.SessionPendingMessageSet, + session: backendSession.toString(), + kind: 'queued', + id: 'queued-request-1', + userMessage: { text }, + }); + }); test('detects server-initiated turn and fires onDidStartServerRequest', () => runWithFakedTimers({ useFakeTimers: true }, async () => { const { sessionHandler, agentHostService, chatAgentService } = createContribution(disposables); diff --git a/src/vs/workbench/contrib/chat/test/browser/chatStatusDashboard.test.ts b/src/vs/workbench/contrib/chat/test/browser/chatStatusDashboard.test.ts index 7c53dd4afa1c1..cbbea65afff62 100644 --- a/src/vs/workbench/contrib/chat/test/browser/chatStatusDashboard.test.ts +++ b/src/vs/workbench/contrib/chat/test/browser/chatStatusDashboard.test.ts @@ -155,23 +155,23 @@ suite('ChatStatusDashboard', () => { assert.deepStrictEqual(getQuotaValues(dashboard.element), ['100%', '100%']); }); - test('Free — TBB: shows Credits and Inline Suggestions, not Chat messages', () => { + test('Free — TBB: shows Credits and Inline Suggestions', () => { const dashboard = createDashboard(createEntitlementService({ chat: { percentRemaining: 80, unlimited: false }, - premiumChat: { percentRemaining: 60, unlimited: false, usageBasedBilling: true }, completions: { percentRemaining: 70, unlimited: false }, + usageBasedBilling: true, entitlement: ChatEntitlement.Free, })); assert.deepStrictEqual(getQuotaLabels(dashboard.element), ['Credits', 'Inline Suggestions']); - assert.deepStrictEqual(getQuotaValues(dashboard.element), ['40%', '30%']); + assert.deepStrictEqual(getQuotaValues(dashboard.element), ['20%', '30%']); }); test('Free — TBB exhausted: shows Credits and Inline Suggestions at 0%', () => { const dashboard = createDashboard(createEntitlementService({ chat: { percentRemaining: 0, unlimited: false }, - premiumChat: { percentRemaining: 0, unlimited: false, usageBasedBilling: true }, completions: { percentRemaining: 0, unlimited: false }, + usageBasedBilling: true, entitlement: ChatEntitlement.Free, })); diff --git a/src/vs/workbench/contrib/chat/test/common/chatDebugServiceImpl.test.ts b/src/vs/workbench/contrib/chat/test/common/chatDebugServiceImpl.test.ts index ac7b0c7db48cf..07b8e73ba2eec 100644 --- a/src/vs/workbench/contrib/chat/test/common/chatDebugServiceImpl.test.ts +++ b/src/vs/workbench/contrib/chat/test/common/chatDebugServiceImpl.test.ts @@ -105,6 +105,7 @@ suite('ChatDebugServiceImpl', () => { inputTokens: 100, outputTokens: 50, totalTokens: 150, + copilotUsageNanoAiu: 5_000_000_000, durationInMillis: 1200, }; @@ -115,6 +116,7 @@ suite('ChatDebugServiceImpl', () => { assert.strictEqual(events.length, 2); assert.strictEqual(events[0].kind, 'toolCall'); assert.strictEqual(events[1].kind, 'modelTurn'); + assert.strictEqual((events[1] as IChatDebugModelTurnEvent).copilotUsageNanoAiu, 5_000_000_000); }); }); diff --git a/src/vs/workbench/contrib/issue/browser/baseIssueReporterService.ts b/src/vs/workbench/contrib/issue/browser/baseIssueReporterService.ts index d7d14153f59e5..44e0b93373c41 100644 --- a/src/vs/workbench/contrib/issue/browser/baseIssueReporterService.ts +++ b/src/vs/workbench/contrib/issue/browser/baseIssueReporterService.ts @@ -134,17 +134,32 @@ export class BaseIssueReporterService extends Disposable { this.createAction = this._register(new Action('issueReporter.create', localize('create', "Create on GitHub"), undefined, true, async () => { this.delayedSubmit.trigger(async () => { - this.createIssue(true); // create issue + this.setSubmittingState(true); + try { + await this.createIssue(true); + } finally { + this.setSubmittingState(false); + } }); })); this.previewAction = this._register(new Action('issueReporter.preview', localize('preview', "Preview on GitHub"), undefined, true, async () => { this.delayedSubmit.trigger(async () => { - this.createIssue(false); // preview issue + this.setSubmittingState(true); + try { + await this.createIssue(false); + } finally { + this.setSubmittingState(false); + } }); })); this.privateAction = this._register(new Action('issueReporter.privateCreate', localize('privateCreate', "Create Internally"), undefined, true, async () => { this.delayedSubmit.trigger(async () => { - this.createIssue(true, true); // create private issue + this.setSubmittingState(true); + try { + await this.createIssue(true, true); + } finally { + this.setSubmittingState(false); + } }); })); @@ -409,6 +424,41 @@ export class BaseIssueReporterService extends Disposable { } } + private preSubmitButtonLabel: string | undefined; + + private getSubmitButtonElement(): HTMLElement { + if (this.publicGithubButton instanceof ButtonWithDropdown) { + return this.publicGithubButton.primaryButton.element; + } + return this.publicGithubButton.element; + } + + private setSubmittingState(submitting: boolean): void { + this.publicGithubButton.enabled = !submitting; + if (this.internalGithubButton) { + this.internalGithubButton.enabled = !submitting; + } + + const buttonEl = this.getSubmitButtonElement(); + if (submitting) { + const currentLabel = this.publicGithubButton instanceof ButtonWithDropdown + ? this.publicGithubButton.primaryButton.label + : this.publicGithubButton.label; + this.preSubmitButtonLabel = typeof currentLabel === 'string' ? currentLabel : ''; + this.publicGithubButton.label = localize('submittingIssue', "Submitting..."); + const spinnerIcon = renderIcon(ThemeIcon.modify(Codicon.loading, 'spin')); + buttonEl.prepend(spinnerIcon); + } else { + // eslint-disable-next-line no-restricted-syntax + const spinnerEl = buttonEl.querySelector('.codicon-loading'); + spinnerEl?.remove(); + if (this.preSubmitButtonLabel !== undefined) { + this.publicGithubButton.label = this.preSubmitButtonLabel; + this.preSubmitButtonLabel = undefined; + } + } + } + private async updateIssueReporterUri(extension: IssueReporterExtensionData): Promise { try { if (extension.uri) { @@ -706,8 +756,13 @@ export class BaseIssueReporterService extends Disposable { // Cmd/Ctrl+Enter previews issue and closes window if (cmdOrCtrlKey && e.key === 'Enter') { this.delayedSubmit.trigger(async () => { - if (await this.createIssue()) { - this.close(); + this.setSubmittingState(true); + try { + if (await this.createIssue()) { + this.close(); + } + } finally { + this.setSubmittingState(false); } }); } diff --git a/src/vs/workbench/contrib/issue/browser/media/issueReporter.css b/src/vs/workbench/contrib/issue/browser/media/issueReporter.css index d24fe259aa852..2ac996960c202 100644 --- a/src/vs/workbench/contrib/issue/browser/media/issueReporter.css +++ b/src/vs/workbench/contrib/issue/browser/media/issueReporter.css @@ -570,6 +570,13 @@ body.issue-reporter-body { align-self: flex-end; } +.issue-reporter-body .public-elements .monaco-text-button .codicon-loading { + margin-right: 4px; + vertical-align: text-bottom; + line-height: inherit; + font-size: inherit; +} + .issue-reporter-body .public-elements #show-repo-name { align-self: flex-end; font-size: 12px; diff --git a/src/vscode-dts/vscode.proposed.chatDebug.d.ts b/src/vscode-dts/vscode.proposed.chatDebug.d.ts index f1a4f480ff17e..42764ef956545 100644 --- a/src/vscode-dts/vscode.proposed.chatDebug.d.ts +++ b/src/vscode-dts/vscode.proposed.chatDebug.d.ts @@ -186,6 +186,11 @@ declare module 'vscode' { */ status?: string; + /** + * The per-request cost from `copilot_usage.total_nano_aiu`, in nano-AIUs. + */ + copilotUsageNanoAiu?: number; + /** * Create a new ChatDebugModelTurnEvent. * @param created The timestamp when the event was created. diff --git a/test/sanity/src/desktop.test.ts b/test/sanity/src/desktop.test.ts index 230fb10f0b61c..297a939f19cd9 100644 --- a/test/sanity/src/desktop.test.ts +++ b/test/sanity/src/desktop.test.ts @@ -15,7 +15,6 @@ export function setup(context: TestContext) { if (!context.options.downloadOnly) { const entryPoint = context.getDesktopEntryPoint(dir); await testDesktopApp(entryPoint); - await testAgentsApp(entryPoint); } }); @@ -25,7 +24,6 @@ export function setup(context: TestContext) { if (!context.options.downloadOnly) { const entryPoint = context.getDesktopEntryPoint(dir); await testDesktopApp(entryPoint); - await testAgentsApp(entryPoint); } }); @@ -35,7 +33,6 @@ export function setup(context: TestContext) { if (!context.options.downloadOnly) { const entryPoint = context.getDesktopEntryPoint(dir); await testDesktopApp(entryPoint); - await testAgentsApp(entryPoint); } }); @@ -47,7 +44,6 @@ export function setup(context: TestContext) { context.validateAllCodesignSignatures(dir); const entryPoint = context.getDesktopEntryPoint(dir); await testDesktopApp(entryPoint); - await testAgentsApp(entryPoint); context.unmountDmg(dir); } }); @@ -60,7 +56,6 @@ export function setup(context: TestContext) { context.validateAllCodesignSignatures(dir); const entryPoint = context.getDesktopEntryPoint(dir); await testDesktopApp(entryPoint); - await testAgentsApp(entryPoint); context.unmountDmg(dir); } }); @@ -73,7 +68,6 @@ export function setup(context: TestContext) { context.validateAllCodesignSignatures(dir); const entryPoint = context.getDesktopEntryPoint(dir); await testDesktopApp(entryPoint); - await testAgentsApp(entryPoint); context.unmountDmg(dir); } }); @@ -180,7 +174,6 @@ export function setup(context: TestContext) { context.validateAllAuthenticodeSignatures(path.dirname(entryPoint)); context.validateAllVersionInfo(path.dirname(entryPoint)); await testDesktopApp(entryPoint); - await testAgentsApp(entryPoint); await context.uninstallWindowsApp('system'); } }); @@ -193,7 +186,6 @@ export function setup(context: TestContext) { const entryPoint = context.getDesktopEntryPoint(dir); const dataDir = context.createPortableDataDir(dir); await testDesktopApp(entryPoint, dataDir); - await testAgentsApp(entryPoint, dataDir); } }); @@ -206,7 +198,6 @@ export function setup(context: TestContext) { context.validateAllAuthenticodeSignatures(path.dirname(entryPoint)); context.validateAllVersionInfo(path.dirname(entryPoint)); await testDesktopApp(entryPoint); - await testAgentsApp(entryPoint); await context.uninstallWindowsApp('user'); } }); @@ -220,7 +211,6 @@ export function setup(context: TestContext) { context.validateAllAuthenticodeSignatures(path.dirname(entryPoint)); context.validateAllVersionInfo(path.dirname(entryPoint)); await testDesktopApp(entryPoint); - await testAgentsApp(entryPoint); await context.uninstallWindowsApp('system'); } }); @@ -233,7 +223,6 @@ export function setup(context: TestContext) { const entryPoint = context.getDesktopEntryPoint(dir); const dataDir = context.createPortableDataDir(dir); await testDesktopApp(entryPoint, dataDir); - await testAgentsApp(entryPoint, dataDir); } }); @@ -246,7 +235,6 @@ export function setup(context: TestContext) { context.validateAllAuthenticodeSignatures(path.dirname(entryPoint)); context.validateAllVersionInfo(path.dirname(entryPoint)); await testDesktopApp(entryPoint); - await testAgentsApp(entryPoint); await context.uninstallWindowsApp('user'); } }); @@ -271,32 +259,4 @@ export function setup(context: TestContext) { test.validate(); } - - async function testAgentsApp(desktopEntryPoint: string, dataDir?: string) { - if (context.options.quality === 'stable') { - // Agents window is not included in stable builds yet. - return; - } - - const test = new UITest(context, dataDir); - const args = ['--agents']; - if (!dataDir) { - args.push('--extensions-dir', test.extensionsDir); - args.push('--user-data-dir', test.userDataDir); - } - - context.log(`Starting Agents app ${desktopEntryPoint} with args ${args.join(' ')}`); - const app = await _electron.launch({ executablePath: desktopEntryPoint, args }); - try { - const window = await context.getPage(app.firstWindow()); - await window.waitForSelector('.agent-sessions-workbench', { timeout: 60000 }); - - context.log('Clicking "Sign in with GitHub" button'); - const button = await window.waitForSelector('button.provider-github'); - await button.click(); - } finally { - context.log('Closing the Agents app'); - await app.close(); - } - } }