From 9f043037f41b7377e351a8d1b53a33799ba9d54f Mon Sep 17 00:00:00 2001 From: Vizards Date: Thu, 11 Jun 2026 13:18:04 +0800 Subject: [PATCH] fix: support OpenRouter reasoning effort compatibility --- src/client/core.ts | 8 +- src/client/error/index.ts | 12 ++ src/client/types.ts | 4 +- src/provider/debug/dump.ts | 299 +++++++++++++++++++++++++++++- src/provider/debug/index.ts | 8 +- src/provider/request.ts | 30 +-- src/provider/stream.ts | 137 +++++++++----- src/provider/thinking/index.ts | 17 ++ src/provider/thinking/precheck.ts | 166 +++++++++++++++++ src/provider/thinking/shape.ts | 46 +++++ src/provider/thinking/types.ts | 16 ++ src/types.ts | 1 + 12 files changed, 674 insertions(+), 70 deletions(-) create mode 100644 src/provider/thinking/index.ts create mode 100644 src/provider/thinking/precheck.ts create mode 100644 src/provider/thinking/shape.ts create mode 100644 src/provider/thinking/types.ts diff --git a/src/client/core.ts b/src/client/core.ts index 8bb2443..06a3742 100644 --- a/src/client/core.ts +++ b/src/client/core.ts @@ -1,8 +1,8 @@ import type { CancellationToken } from 'vscode'; import { safeStringify } from '../json'; import { logger } from '../logger'; +import type { ChatCompletionRequestBody } from '../provider/thinking'; import type { - DeepSeekRequest, DeepSeekStreamChunk, DeepSeekToolCall, DeepSeekUsage, @@ -25,7 +25,7 @@ export class DeepSeekClient { * Parses SSE chunks and dispatches callbacks for content, thinking, and tool calls. */ async streamChatCompletion( - request: DeepSeekRequest, + request: ChatCompletionRequestBody, callbacks: StreamCallbacks, cancellationToken?: CancellationToken, ): Promise { @@ -81,7 +81,9 @@ export class DeepSeekClient { break; } - buffer += decoder.decode(value, { stream: true }); + const decoded = decoder.decode(value, { stream: true }); + callbacks.onRawResponseData?.(decoded); + buffer += decoded; const lines = buffer.split('\n'); buffer = lines.pop() || ''; diff --git a/src/client/error/index.ts b/src/client/error/index.ts index de3f428..5463ac0 100644 --- a/src/client/error/index.ts +++ b/src/client/error/index.ts @@ -38,7 +38,10 @@ export class DeepSeekRequestError extends Error { readonly diagnosticMessage: string; readonly baseUrl?: string; readonly status?: number; + readonly statusText?: string; readonly code?: string; + readonly serverMessage?: string; + readonly responseText?: string; constructor(options: { message: string; @@ -47,7 +50,10 @@ export class DeepSeekRequestError extends Error { diagnosticMessage?: string; baseUrl?: string; status?: number; + statusText?: string; code?: string; + serverMessage?: string; + responseText?: string; cause?: unknown; }) { super(options.message, { cause: options.cause }); @@ -57,7 +63,10 @@ export class DeepSeekRequestError extends Error { this.diagnosticMessage = options.diagnosticMessage ?? options.message; this.baseUrl = options.baseUrl; this.status = options.status; + this.statusText = options.statusText; this.code = options.code; + this.serverMessage = options.serverMessage; + this.responseText = options.responseText; } } @@ -79,7 +88,10 @@ export async function createHttpError( kind: 'http', baseUrl, status: response.status, + statusText: response.statusText, code: `HTTP_${response.status}`, + serverMessage, + responseText, diagnosticMessage: joinDiagnosticParts( `kind=http`, `status=${response.status}`, diff --git a/src/client/types.ts b/src/client/types.ts index 773932f..eef2a58 100644 --- a/src/client/types.ts +++ b/src/client/types.ts @@ -1,4 +1,4 @@ -import type { DeepSeekRequest } from '../types'; +import type { ChatCompletionRequestBody } from '../provider/thinking'; export interface ErrorActionUrls { configureApiKey?: string; @@ -7,7 +7,7 @@ export interface ErrorActionUrls { export interface RequestErrorContext { baseUrl: string; - request: DeepSeekRequest; + request: ChatCompletionRequestBody; } export interface ErrorActionLink { diff --git a/src/provider/debug/dump.ts b/src/provider/debug/dump.ts index adf831c..ad69e64 100644 --- a/src/provider/debug/dump.ts +++ b/src/provider/debug/dump.ts @@ -3,11 +3,18 @@ import { appendFile, mkdir, writeFile } from 'fs/promises'; import { tmpdir } from 'os'; import { join } from 'path'; import vscode from 'vscode'; +import { DeepSeekRequestError } from '../../client/error'; import { getRequestDumpEnabled } from '../../config'; import { LANGUAGE_MODEL_CHAT_SYSTEM_ROLE } from '../../consts'; import { safeStringify, toWellFormedString } from '../../json'; import { logger } from '../../logger'; -import type { DeepSeekMessage, DeepSeekRequest } from '../../types'; +import type { + DeepSeekMessage, + DeepSeekRequest, + DeepSeekToolCall, + DeepSeekUsage, + StreamCallbacks, +} from '../../types'; import { classifyDeepSeekRequest, classifyProviderRequest, @@ -22,12 +29,14 @@ import type { VisionProxySource, VisionResolutionStats } from '../vision'; let dumpCounter = 0; let providerInputDumpCounter = 0; +let thinkingRetryDumpCounter = 0; let dumpWriteQueue: Promise = Promise.resolve(); const REQUEST_OBSERVATIONS_FILE = '_request-observations.jsonl'; const HASH_WINDOW_CHARS = 2_048; +const THINKING_COMPAT_LOG_PREFIX = '[reasoning-effort-compat]'; -type DumpEvent = 'provider-input' | 'deepseek-request'; +type DumpEvent = 'provider-input' | 'deepseek-request' | 'thinking-compat-retry'; type DumpStage = 'provider-input' | 'input' | 'resolved'; interface DumpContext { @@ -50,6 +59,12 @@ interface RequestDumpPaths { msg0?: string; } +interface ThinkingRetryDumpPaths { + directory: string; + request: string; + response: string; +} + interface ToolSummary { toolCount: number; toolNames: string[]; @@ -119,6 +134,44 @@ export interface DumpProviderInputOptions { requestOptions: vscode.ProvideLanguageModelChatResponseOptions; } +export interface DumpThinkingCompatibilityRetryAttemptOptions { + globalStorageUri: vscode.Uri; + segment: ConversationSegment; + requestKind: RequestKind; + endpoint: string; + strategy: string; + sourceStatus: number; + request: object; + response: object; +} + +export interface CreateThinkingCompatibilityRetryDumpOptions { + globalStorageUri: vscode.Uri; + segment: ConversationSegment; + requestKind: RequestKind; + endpoint: string; + strategy: string; + sourceStatus: number; + request: object; + callbacks: StreamCallbacks; +} + +interface ThinkingCompatibilityRetryDumpRecorder { + callbacks: StreamCallbacks; + dumpSuccess(): void; + dumpFailure(error: unknown): void; +} + +interface CapturedRetryResponse { + rawResponseParts: string[]; + contentParts: string[]; + reasoningParts: string[]; + toolCalls: DeepSeekToolCall[]; + usage?: DeepSeekUsage; + done: boolean; + error?: object; +} + /** * Dump the raw LanguageModelChatProvider input before any request preparation. * This captures the first observable `options.tools` list, including any @@ -242,6 +295,199 @@ export function dumpDeepSeekRequest( }); } +export function dumpThinkingCompatibilityRetryAttempt( + options: DumpThinkingCompatibilityRetryAttemptOptions, +): void { + if (!getRequestDumpEnabled()) return; + + const context = createDumpContext( + options.globalStorageUri, + options.segment, + 'thinking-compat-retry', + (thinkingRetryDumpCounter += 1), + options.requestKind, + ); + const paths = createThinkingRetryDumpPaths(context); + + enqueueDumpWrite(`${THINKING_COMPAT_LOG_PREFIX} retryDump`, async () => { + await mkdir(context.root, { recursive: true }); + const requestJson = await writeJsonFile(paths.request, options.request, (value) => + JSON.stringify(value, null, 2), + ); + const responseJson = await writeJsonFile(paths.response, options.response, (value) => + JSON.stringify(value, null, 2), + ); + + await writeDumpObservation( + options.globalStorageUri, + createDumpObservation({ + event: 'thinking-compat-retry', + context, + segment: options.segment, + paths, + model: {}, + requestKind: options.requestKind, + requestOptions: undefined, + messages: undefined, + toolSummary: undefined, + retry: { + endpoint: options.endpoint, + strategy: options.strategy, + sourceStatus: options.sourceStatus, + }, + }), + ); + logThinkingCompatibilityRetryDump(options, paths, requestJson.length, responseJson.length); + }); +} + +export function createThinkingCompatibilityRetryDump( + options: CreateThinkingCompatibilityRetryDumpOptions, +): ThinkingCompatibilityRetryDumpRecorder { + const captured: CapturedRetryResponse = { + rawResponseParts: [], + contentParts: [], + reasoningParts: [], + toolCalls: [], + done: false, + }; + + return { + callbacks: { + onContent: (content) => { + captured.contentParts.push(content); + options.callbacks.onContent(content); + }, + onThinking: (text) => { + captured.reasoningParts.push(text); + options.callbacks.onThinking(text); + }, + onToolCall: (toolCall) => { + captured.toolCalls.push(toolCall); + options.callbacks.onToolCall(toolCall); + }, + onError: (error) => { + captured.error = createErrorSnapshot(error); + options.callbacks.onError(error); + }, + onDone: () => { + captured.done = true; + options.callbacks.onDone(); + }, + onUsage: (usage) => { + captured.usage = usage; + options.callbacks.onUsage?.(usage); + }, + onRawResponseData: (data) => { + captured.rawResponseParts.push(data); + options.callbacks.onRawResponseData?.(data); + }, + }, + dumpSuccess: () => { + dumpThinkingCompatibilityRetry(options, createSuccessResponseSnapshot(captured)); + }, + dumpFailure: (error) => { + captured.error ??= createErrorSnapshot(error); + dumpThinkingCompatibilityRetry(options, createFailureResponseSnapshot(captured)); + }, + }; +} + +function dumpThinkingCompatibilityRetry( + options: CreateThinkingCompatibilityRetryDumpOptions, + response: object, +): void { + dumpThinkingCompatibilityRetryAttempt({ + globalStorageUri: options.globalStorageUri, + segment: options.segment, + requestKind: options.requestKind, + endpoint: options.endpoint, + strategy: options.strategy, + sourceStatus: options.sourceStatus, + request: createSentRequestBody(options.request), + response, + }); +} + +function createSentRequestBody(request: object): object { + return { + ...request, + stream_options: { include_usage: true }, + }; +} + +function createSuccessResponseSnapshot(captured: CapturedRetryResponse): object { + return { + ok: true, + stream: true, + done: captured.done, + rawResponseText: joinIfAny(captured.rawResponseParts), + content: joinIfAny(captured.contentParts), + reasoning_content: joinIfAny(captured.reasoningParts), + tool_calls: captured.toolCalls.length > 0 ? captured.toolCalls : undefined, + usage: captured.usage, + summary: createResponseSummary(captured), + }; +} + +function createFailureResponseSnapshot(captured: CapturedRetryResponse): object { + return { + ok: false, + stream: true, + done: captured.done, + rawResponseText: joinIfAny(captured.rawResponseParts), + partial_content: joinIfAny(captured.contentParts), + partial_reasoning_content: joinIfAny(captured.reasoningParts), + partial_tool_calls: captured.toolCalls.length > 0 ? captured.toolCalls : undefined, + usage: captured.usage, + error: captured.error, + summary: createResponseSummary(captured), + }; +} + +function createResponseSummary(captured: CapturedRetryResponse): object { + return { + contentChars: captured.contentParts.reduce((total, part) => total + part.length, 0), + reasoningChars: captured.reasoningParts.reduce((total, part) => total + part.length, 0), + rawResponseChars: captured.rawResponseParts.reduce((total, part) => total + part.length, 0), + toolCallCount: captured.toolCalls.length, + hasUsage: Boolean(captured.usage), + }; +} + +function joinIfAny(parts: readonly string[]): string | undefined { + return parts.length > 0 ? parts.join('') : undefined; +} + +function createErrorSnapshot(error: unknown): object { + if (error instanceof DeepSeekRequestError) { + return { + name: error.name, + message: error.message, + kind: error.kind, + status: error.status, + statusText: error.statusText, + code: error.code, + baseUrl: error.baseUrl, + serverMessage: error.serverMessage, + responseText: error.responseText, + userSummary: error.userSummary, + diagnosticMessage: error.diagnosticMessage, + stack: error.stack, + }; + } + if (error instanceof Error) { + return { + name: error.name, + message: error.message, + stack: error.stack, + }; + } + return { + value: String(error), + }; +} + export async function ensureRequestDumpRoot(globalStorageUri: vscode.Uri): Promise { const root = getRequestDumpBaseRootUri(globalStorageUri); await mkdir(root.fsPath, { recursive: true }); @@ -281,16 +527,29 @@ function createRequestDumpPaths(context: DumpContext, hasMsg0: boolean): Request }; } +function createThinkingRetryDumpPaths(context: DumpContext): ThinkingRetryDumpPaths { + return { + directory: context.root, + request: join(context.root, `${context.basename}.request.json`), + response: join(context.root, `${context.basename}.response.json`), + }; +} + function createDumpObservation(options: { event: DumpEvent; context: DumpContext; segment: ConversationSegment; - paths: ProviderInputDumpPaths | RequestDumpPaths; + paths: ProviderInputDumpPaths | RequestDumpPaths | ThinkingRetryDumpPaths; model: object; requestKind: RequestKind; - requestOptions: vscode.ProvideLanguageModelChatResponseOptions; - messages: readonly vscode.LanguageModelChatRequestMessage[]; - toolSummary: ToolSummary; + requestOptions?: vscode.ProvideLanguageModelChatResponseOptions; + messages?: readonly vscode.LanguageModelChatRequestMessage[]; + toolSummary?: ToolSummary; + retry?: { + endpoint: string; + strategy: string; + sourceStatus: number; + }; }): object { return { event: options.event, @@ -300,11 +559,14 @@ function createDumpObservation(options: { paths: options.paths, model: options.model, requestKind: options.requestKind, - options: summarizeRequestOptions(options.requestOptions), + options: options.requestOptions ? summarizeRequestOptions(options.requestOptions) : undefined, hostSettings: summarizeHostSettings(), - systemPromptSummary: summarizeVscodeSystemPrompt(options.messages), - messageStats: summarizeMessagesFromInput(options.messages), + systemPromptSummary: options.messages + ? summarizeVscodeSystemPrompt(options.messages) + : undefined, + messageStats: options.messages ? summarizeMessagesFromInput(options.messages) : undefined, toolStats: options.toolSummary, + retry: options.retry, }; } @@ -1022,6 +1284,25 @@ function logRequestDump( ); } +function logThinkingCompatibilityRetryDump( + options: DumpThinkingCompatibilityRetryAttemptOptions, + paths: ThinkingRetryDumpPaths, + requestJsonLength: number, + responseJsonLength: number, +): void { + logger.info( + formatRequestLogLine( + options.requestKind, + `${THINKING_COMPAT_LOG_PREFIX} retry-dump-written ` + + `${formatDumpSegment(options.segment)} endpoint=${safeStringify(options.endpoint)}` + + ` strategy=${options.strategy} sourceStatus=${options.sourceStatus}` + + ` request=${formatFileUri(paths.request)} response=${formatFileUri(paths.response)}` + + ` requestKB=${(requestJsonLength / 1024).toFixed(0)}` + + ` responseKB=${(responseJsonLength / 1024).toFixed(0)}`, + ), + ); +} + function formatDumpSegment(segment: ConversationSegment): string { if (segment.reason === 'markerFound') { return `dumpSegment=${segment.segmentId} legacySegmentMarker=found`; diff --git a/src/provider/debug/index.ts b/src/provider/debug/index.ts index 3b078ac..8a514a5 100644 --- a/src/provider/debug/index.ts +++ b/src/provider/debug/index.ts @@ -8,4 +8,10 @@ export type { CacheDiagnosticsRun, ReplayMarkerReportTrigger, } from './diagnostics'; -export { dumpDeepSeekRequest, dumpProviderInput, ensureRequestDumpRoot } from './dump'; +export { + createThinkingCompatibilityRetryDump, + dumpDeepSeekRequest, + dumpProviderInput, + dumpThinkingCompatibilityRetryAttempt, + ensureRequestDumpRoot, +} from './dump'; diff --git a/src/provider/request.ts b/src/provider/request.ts index b9dcf90..fb25c7c 100644 --- a/src/provider/request.ts +++ b/src/provider/request.ts @@ -11,17 +11,25 @@ import { type CacheDiagnosticsRecorder, type CacheDiagnosticsRun, } from './debug'; -import { getConfiguredThinkingEffort, type ModelConfigurationOptions } from './models'; +import { + getConfiguredThinkingEffort, + type ModelConfigurationOptions, + type ThinkingEffort, +} from './models'; import { classifyDeepSeekRequest, shouldForceThinkingNone, type RequestKind } from './routing'; import type { ReplayMarkerMetadata } from './replay'; import type { ConversationSegment } from './segment'; +import { toDeepSeekNativeReasoningRequest } from './thinking'; import { collectTrailingToolResultIds, prepareRequestTools } from './tools/request'; import { resolveImageMessages, type VisionDescriber } from './vision'; export interface PreparedChatRequest { client: DeepSeekClient; + baseUrl: string; + globalStorageUri: vscode.Uri; request: DeepSeekRequest; isThinkingModel: boolean; + thinkingEffort: ThinkingEffort; totalRequestChars: number; trailingToolResultIds: string[]; cacheDiagnostics: CacheDiagnosticsRun; @@ -60,7 +68,8 @@ export async function prepareChatRequest({ throw new Error(t('auth.notConfigured')); } - const client = new DeepSeekClient(getBaseUrl(), apiKey); + const baseUrl = getBaseUrl(); + const client = new DeepSeekClient(baseUrl, apiKey); const modelDef = MODELS.find((m) => m.id === modelInfo.id); const isThinkingModel = modelDef?.capabilities.thinking ?? false; const maxTokens = getMaxTokens(); @@ -87,17 +96,9 @@ export async function prepareChatRequest({ options as ModelConfigurationOptions, ); const thinkingEffort = shouldForceThinkingNone(requestKind) ? 'none' : configuredThinkingEffort; - const request: DeepSeekRequest = { - ...baseRequest, - ...(isThinkingModel - ? { - thinking: { - type: thinkingEffort === 'none' ? ('disabled' as const) : ('enabled' as const), - }, - ...(thinkingEffort === 'none' ? {} : { reasoning_effort: thinkingEffort }), - } - : {}), - }; + const request: DeepSeekRequest = isThinkingModel + ? toDeepSeekNativeReasoningRequest(baseRequest, thinkingEffort) + : baseRequest; dumpDeepSeekRequest(request, { globalStorageUri, segment, @@ -131,8 +132,11 @@ export async function prepareChatRequest({ return { client, + baseUrl, + globalStorageUri, request, isThinkingModel, + thinkingEffort, totalRequestChars, trailingToolResultIds: collectTrailingToolResultIds(deepseekMessages), cacheDiagnostics: diagnosticsRun, diff --git a/src/provider/stream.ts b/src/provider/stream.ts index a4c5477..0d9eeda 100644 --- a/src/provider/stream.ts +++ b/src/provider/stream.ts @@ -1,8 +1,9 @@ import vscode from 'vscode'; import { createUserFacingError } from '../client'; import { logger } from '../logger'; -import type { DeepSeekToolCall, DeepSeekUsage } from '../types'; +import type { DeepSeekToolCall, DeepSeekUsage, StreamCallbacks } from '../types'; import { + createThinkingCompatibilityRetryDump, observeCancellationToken, type CacheDiagnosticsRun, type ReplayMarkerReportTrigger, @@ -14,6 +15,10 @@ import { type ReplayMarkerMetadata, } from './replay'; import type { PreparedChatRequest } from './request'; +import { + createThinkingCompatibilityPrecheck, + type ThinkingCompatibilityPrecheck, +} from './thinking'; interface ResponseStreamState { accumulatedReasoning: string; @@ -48,52 +53,50 @@ export function streamChatCompletion({ replayMarkerReported: false, }; const cancelListener = observeCancellationToken(token, prepared.cacheDiagnostics); + const precheck = createThinkingCompatibilityPrecheck({ + baseUrl: prepared.baseUrl, + request: prepared.request, + isThinkingModel: prepared.isThinkingModel, + thinkingEffort: prepared.thinkingEffort, + }); + const callbacks: StreamCallbacks = { + onContent: (content: string) => { + reportInitialResponseNoticeOnce(progress, state, initialResponseNotice); + progress.report(new vscode.LanguageModelTextPart(content)); + }, - return prepared.client - .streamChatCompletion( - prepared.request, - { - onContent: (content: string) => { - reportInitialResponseNoticeOnce(progress, state, initialResponseNotice); - progress.report(new vscode.LanguageModelTextPart(content)); - }, + onThinking: (text: string) => { + reportInitialResponseNoticeOnce(progress, state, initialResponseNotice); + handleThinking(text, state, progress); + }, - onThinking: (text: string) => { - reportInitialResponseNoticeOnce(progress, state, initialResponseNotice); - handleThinking(text, state, progress); - }, + onToolCall: (toolCall: DeepSeekToolCall) => { + reportInitialResponseNoticeOnce(progress, state, initialResponseNotice); + handleToolCall(toolCall, state, progress); + }, - onToolCall: (toolCall: DeepSeekToolCall) => { - reportInitialResponseNoticeOnce(progress, state, initialResponseNotice); - handleToolCall(toolCall, state, progress); - }, + onError: (error: Error) => { + throw error; + }, - onError: (error: Error) => { - throw createUserFacingError(error); - }, + onDone: () => { + reportReplayMarkerOnce(prepared, progress, state, 'done'); + finalizeReplayDiagnostics(prepared.trailingToolResultIds, state, prepared.cacheDiagnostics); + }, - onDone: () => { - reportReplayMarkerOnce(prepared, progress, state, 'done'); - finalizeReplayDiagnostics( - prepared.trailingToolResultIds, - state, - prepared.cacheDiagnostics, - ); - }, + onUsage: (usage: DeepSeekUsage) => { + const charsPerToken = updateCharsPerToken( + prepared.totalRequestChars, + usage, + getCharsPerToken(), + ); + setCharsPerToken(charsPerToken); + prepared.cacheDiagnostics.onUsage(usage, charsPerToken); + reportCopilotContextUsage(progress, usage, prepared.requestKind); + }, + }; - onUsage: (usage) => { - const charsPerToken = updateCharsPerToken( - prepared.totalRequestChars, - usage, - getCharsPerToken(), - ); - setCharsPerToken(charsPerToken); - prepared.cacheDiagnostics.onUsage(usage, charsPerToken); - reportCopilotContextUsage(progress, usage, prepared.requestKind); - }, - }, - token, - ) + return streamWithThinkingCompatibility(prepared, callbacks, token, precheck) .then(undefined, (error) => { reportSkippedReplayMarkerIfNeeded( prepared, @@ -101,7 +104,7 @@ export function streamChatCompletion({ token.isCancellationRequested ? 'cancelled' : 'stream-error', error, ); - throw error; + throw createUserFacingError(toError(error)); }) .then(() => { if (token.isCancellationRequested) { @@ -113,6 +116,52 @@ export function streamChatCompletion({ }); } +function streamWithThinkingCompatibility( + prepared: PreparedChatRequest, + callbacks: StreamCallbacks, + token: vscode.CancellationToken, + precheck: ThinkingCompatibilityPrecheck, +): Promise { + return prepared.client + .streamChatCompletion(precheck.initialRequest, callbacks, token) + .then(undefined, (error) => { + const retryAttempt = token.isCancellationRequested + ? undefined + : precheck.createRetryAttempt(error); + if (!retryAttempt) { + throw error; + } + const retryDump = createThinkingCompatibilityRetryDump({ + globalStorageUri: prepared.globalStorageUri, + segment: prepared.segment, + requestKind: prepared.requestKind, + endpoint: prepared.baseUrl, + strategy: retryAttempt.strategy, + sourceStatus: retryAttempt.sourceStatus, + request: retryAttempt.request, + callbacks, + }); + retryAttempt.logStart(); + return prepared.client + .streamChatCompletion(retryAttempt.request, retryDump.callbacks, token) + .then( + () => { + if (token.isCancellationRequested) { + retryDump.dumpFailure(new Error('DeepSeek retry cancelled')); + return; + } + retryAttempt.recordSuccess(); + retryDump.dumpSuccess(); + }, + (retryError) => { + retryAttempt.logFailure(retryError); + retryDump.dumpFailure(retryError); + throw error; + }, + ); + }); +} + function reportInitialResponseNoticeOnce( progress: vscode.Progress, state: ResponseStreamState, @@ -289,3 +338,7 @@ function reportCopilotContextUsage( logger.warn(formatRequestLogLine(requestKind, 'Failed to report usage data'), error); } } + +function toError(error: unknown): Error { + return error instanceof Error ? error : new Error(String(error)); +} diff --git a/src/provider/thinking/index.ts b/src/provider/thinking/index.ts new file mode 100644 index 0000000..beb10b6 --- /dev/null +++ b/src/provider/thinking/index.ts @@ -0,0 +1,17 @@ +export { + createThinkingCompatibilityPrecheck, + type ThinkingCompatibilityPrecheck, + type ThinkingCompatibilityRetryAttempt, + type ThinkingCompatibilityRetryStrategy, +} from './precheck'; +export { + toDeepSeekNativeReasoningRequest, + toOpenAICompatibleMaxRetryRequest, + toOpenAICompatibleReasoningEffort, + toOpenAICompatibleReasoningRequest, +} from './shape'; +export type { + ChatCompletionRequestBody, + OpenAICompatibleReasoningEffort, + OpenAICompatibleReasoningRequest, +} from './types'; diff --git a/src/provider/thinking/precheck.ts b/src/provider/thinking/precheck.ts new file mode 100644 index 0000000..e64a1be --- /dev/null +++ b/src/provider/thinking/precheck.ts @@ -0,0 +1,166 @@ +import { DeepSeekRequestError } from '../../client/error'; +import { OFFICIAL_DEEPSEEK_API_HOST } from '../../client/consts'; +import { logger } from '../../logger'; +import type { DeepSeekRequest } from '../../types'; +import type { ThinkingEffort } from '../models'; +import { toOpenAICompatibleMaxRetryRequest, toOpenAICompatibleReasoningRequest } from './shape'; +import type { ChatCompletionRequestBody } from './types'; + +const LOG_PREFIX = '[reasoning-effort-compat]'; +const openAICompatibleEffortEndpoints = new Set(); + +export interface ThinkingCompatibilityPrecheck { + readonly initialRequest: ChatCompletionRequestBody; + createRetryAttempt(error: unknown): ThinkingCompatibilityRetryAttempt | undefined; +} + +export interface ThinkingCompatibilityRetryAttempt { + readonly request: ChatCompletionRequestBody; + readonly strategy: ThinkingCompatibilityRetryStrategy; + readonly sourceStatus: number; + logStart(): void; + logFailure(error: unknown): void; + recordSuccess(): void; +} + +export type ThinkingCompatibilityRetryStrategy = 'max-to-xhigh'; + +export function createThinkingCompatibilityPrecheck(options: { + baseUrl: string; + request: DeepSeekRequest; + isThinkingModel: boolean; + thinkingEffort: ThinkingEffort; +}): ThinkingCompatibilityPrecheck { + const endpointKey = getSessionCacheKey(options.baseUrl); + const initialRequest = createInitialRequest({ + ...options, + endpointKey, + }); + + return { + initialRequest, + createRetryAttempt: (error) => + createRetryAttempt({ + ...options, + endpointKey, + initialRequest, + error, + }), + }; +} + +function createInitialRequest(options: { + baseUrl: string; + request: DeepSeekRequest; + isThinkingModel: boolean; + thinkingEffort: ThinkingEffort; + endpointKey: string; +}): ChatCompletionRequestBody { + if (!options.isThinkingModel) { + return options.request; + } + if (openAICompatibleEffortEndpoints.has(options.endpointKey)) { + const request = toOpenAICompatibleReasoningRequest(options.request, options.thinkingEffort); + logger.info( + `${LOG_PREFIX} precheck-cache-hit endpoint=${options.endpointKey}` + + ` effort=${options.thinkingEffort}` + + ` mappedEffort=${request.reasoning_effort}` + + ` removedThinking=true`, + ); + return request; + } + return options.request; +} + +function createRetryAttempt(options: { + baseUrl: string; + request: DeepSeekRequest; + thinkingEffort: ThinkingEffort; + endpointKey: string; + initialRequest: ChatCompletionRequestBody; + error: unknown; +}): ThinkingCompatibilityRetryAttempt | undefined { + const failure = getHttpFailure(options.error); + if (!failure || openAICompatibleEffortEndpoints.has(options.endpointKey)) { + return undefined; + } + if ( + options.thinkingEffort === 'max' && + options.initialRequest.reasoning_effort === 'max' && + !isOfficialDeepSeekEndpoint(options.baseUrl) && + isRetryableThinkingHttpFailure(failure) + ) { + return createMaxToXHighAttempt(options, failure); + } + return undefined; +} + +function createMaxToXHighAttempt( + options: { + request: DeepSeekRequest; + endpointKey: string; + }, + failure: HttpFailure, +): ThinkingCompatibilityRetryAttempt { + const retryRequest = toOpenAICompatibleMaxRetryRequest(options.request); + return { + request: retryRequest, + strategy: 'max-to-xhigh', + sourceStatus: failure.status, + logStart: () => { + logger.info( + `${LOG_PREFIX} precheck-retry-start endpoint=${options.endpointKey}` + + ` status=${failure.status} effort=max->xhigh removedThinking=true`, + ); + }, + logFailure: (error) => { + logger.info( + `${LOG_PREFIX} precheck-retry-failed endpoint=${options.endpointKey}` + + ` status=${getHttpFailure(error)?.status ?? 'unknown'}`, + ); + }, + recordSuccess: () => { + openAICompatibleEffortEndpoints.add(options.endpointKey); + logger.info( + `${LOG_PREFIX} precheck-retry-success endpoint=${options.endpointKey}` + + ` sessionCache=openai-compatible-effort`, + ); + }, + }; +} + +interface HttpFailure { + status: number; +} + +function getHttpFailure(error: unknown): HttpFailure | undefined { + if (!(error instanceof DeepSeekRequestError) || error.kind !== 'http') { + return undefined; + } + return { + status: error.status ?? 0, + }; +} + +function isRetryableThinkingHttpFailure(failure: HttpFailure): boolean { + return failure.status === 400; +} + +function getSessionCacheKey(baseUrl: string): string { + try { + const url = new URL(baseUrl); + const port = url.port ? `:${url.port}` : ''; + const pathname = url.pathname.replace(/\/+$/u, ''); + return `${url.protocol.toLowerCase()}//${url.hostname.toLowerCase()}${port}${pathname}`; + } catch { + return baseUrl.trim().replace(/\/+$/u, ''); + } +} + +function isOfficialDeepSeekEndpoint(baseUrl: string): boolean { + try { + return new URL(baseUrl).hostname.toLowerCase() === OFFICIAL_DEEPSEEK_API_HOST; + } catch { + return false; + } +} diff --git a/src/provider/thinking/shape.ts b/src/provider/thinking/shape.ts new file mode 100644 index 0000000..9460fdc --- /dev/null +++ b/src/provider/thinking/shape.ts @@ -0,0 +1,46 @@ +import type { DeepSeekRequest } from '../../types'; +import type { ThinkingEffort } from '../models'; +import type { + DeepSeekNativeReasoningEffort, + OpenAICompatibleReasoningEffort, + OpenAICompatibleReasoningRequest, +} from './types'; + +export function toOpenAICompatibleMaxRetryRequest( + request: DeepSeekRequest, +): OpenAICompatibleReasoningRequest { + return toOpenAICompatibleReasoningRequest(request, 'max'); +} + +export function toOpenAICompatibleReasoningRequest( + request: DeepSeekRequest, + effort: ThinkingEffort, +): OpenAICompatibleReasoningRequest { + const { thinking: _thinking, reasoning_effort: _reasoningEffort, ...rest } = request; + return { + ...rest, + reasoning_effort: toOpenAICompatibleReasoningEffort(effort), + }; +} + +export function toDeepSeekNativeReasoningRequest( + request: DeepSeekRequest, + effort: ThinkingEffort, +): DeepSeekRequest { + const nativeRequest: DeepSeekRequest = { + ...request, + thinking: { type: effort === 'none' ? 'disabled' : 'enabled' }, + }; + if (effort === 'none') { + delete nativeRequest.reasoning_effort; + } else { + nativeRequest.reasoning_effort = effort as DeepSeekNativeReasoningEffort; + } + return nativeRequest; +} + +export function toOpenAICompatibleReasoningEffort( + effort: ThinkingEffort, +): OpenAICompatibleReasoningEffort { + return effort === 'max' ? 'xhigh' : effort; +} diff --git a/src/provider/thinking/types.ts b/src/provider/thinking/types.ts new file mode 100644 index 0000000..88acbbb --- /dev/null +++ b/src/provider/thinking/types.ts @@ -0,0 +1,16 @@ +import type { DeepSeekRequest } from '../../types'; +import type { ThinkingEffort } from '../models'; + +export type DeepSeekNativeReasoningEffort = Exclude; + +export type OpenAICompatibleReasoningEffort = Extract | 'xhigh'; + +export type OpenAICompatibleReasoningRequest = Omit< + DeepSeekRequest, + 'thinking' | 'reasoning_effort' +> & { + thinking?: never; + reasoning_effort?: OpenAICompatibleReasoningEffort; +}; + +export type ChatCompletionRequestBody = DeepSeekRequest | OpenAICompatibleReasoningRequest; diff --git a/src/types.ts b/src/types.ts index f9e9689..e70b068 100644 --- a/src/types.ts +++ b/src/types.ts @@ -89,6 +89,7 @@ export interface StreamCallbacks { onError: (error: Error) => void; onDone: () => void; onUsage?: (usage: DeepSeekUsage) => void; + onRawResponseData?: (data: string) => void; } // ---- Model definitions ----