From 9f043037f41b7377e351a8d1b53a33799ba9d54f Mon Sep 17 00:00:00 2001
From: Vizards <gvizards@gmail.com>
Date: Thu, 11 Jun 2026 13:18:04 +0800
Subject: [PATCH] fix: support OpenRouter reasoning effort compatibility

---
 src/client/core.ts                |   8 +-
 src/client/error/index.ts         |  12 ++
 src/client/types.ts               |   4 +-
 src/provider/debug/dump.ts        | 299 +++++++++++++++++++++++++++++-
 src/provider/debug/index.ts       |   8 +-
 src/provider/request.ts           |  30 +--
 src/provider/stream.ts            | 137 +++++++++-----
 src/provider/thinking/index.ts    |  17 ++
 src/provider/thinking/precheck.ts | 166 +++++++++++++++++
 src/provider/thinking/shape.ts    |  46 +++++
 src/provider/thinking/types.ts    |  16 ++
 src/types.ts                      |   1 +
 12 files changed, 674 insertions(+), 70 deletions(-)
 create mode 100644 src/provider/thinking/index.ts
 create mode 100644 src/provider/thinking/precheck.ts
 create mode 100644 src/provider/thinking/shape.ts
 create mode 100644 src/provider/thinking/types.ts
diff --git a/src/client/core.ts b/src/client/core.ts
index 8bb2443..06a3742 100644
--- a/src/client/core.ts
+++ b/src/client/core.ts
@@ -1,8 +1,8 @@
 import type { CancellationToken } from 'vscode';
 import { safeStringify } from '../json';
 import { logger } from '../logger';
+import type { ChatCompletionRequestBody } from '../provider/thinking';
 import type {
-	DeepSeekRequest,
 	DeepSeekStreamChunk,
 	DeepSeekToolCall,
 	DeepSeekUsage,
@@ -25,7 +25,7 @@ export class DeepSeekClient {
 	 * Parses SSE chunks and dispatches callbacks for content, thinking, and tool calls.
 	 */
 	async streamChatCompletion(
-		request: DeepSeekRequest,
+		request: ChatCompletionRequestBody,
 		callbacks: StreamCallbacks,
 		cancellationToken?: CancellationToken,
 	): Promise<void> {
@@ -81,7 +81,9 @@ export class DeepSeekClient {
 					break;
 				}
 
-				buffer += decoder.decode(value, { stream: true });
+				const decoded = decoder.decode(value, { stream: true });
+				callbacks.onRawResponseData?.(decoded);
+				buffer += decoded;
 
 				const lines = buffer.split('\n');
 				buffer = lines.pop() || '';
diff --git a/src/client/error/index.ts b/src/client/error/index.ts
index de3f428..5463ac0 100644
--- a/src/client/error/index.ts
+++ b/src/client/error/index.ts
@@ -38,7 +38,10 @@ export class DeepSeekRequestError extends Error {
 	readonly diagnosticMessage: string;
 	readonly baseUrl?: string;
 	readonly status?: number;
+	readonly statusText?: string;
 	readonly code?: string;
+	readonly serverMessage?: string;
+	readonly responseText?: string;
 
 	constructor(options: {
 		message: string;
@@ -47,7 +50,10 @@ export class DeepSeekRequestError extends Error {
 		diagnosticMessage?: string;
 		baseUrl?: string;
 		status?: number;
+		statusText?: string;
 		code?: string;
+		serverMessage?: string;
+		responseText?: string;
 		cause?: unknown;
 	}) {
 		super(options.message, { cause: options.cause });
@@ -57,7 +63,10 @@ export class DeepSeekRequestError extends Error {
 		this.diagnosticMessage = options.diagnosticMessage ?? options.message;
 		this.baseUrl = options.baseUrl;
 		this.status = options.status;
+		this.statusText = options.statusText;
 		this.code = options.code;
+		this.serverMessage = options.serverMessage;
+		this.responseText = options.responseText;
 	}
 }
 
@@ -79,7 +88,10 @@ export async function createHttpError(
 		kind: 'http',
 		baseUrl,
 		status: response.status,
+		statusText: response.statusText,
 		code: `HTTP_${response.status}`,
+		serverMessage,
+		responseText,
 		diagnosticMessage: joinDiagnosticParts(
 			`kind=http`,
 			`status=${response.status}`,
diff --git a/src/client/types.ts b/src/client/types.ts
index 773932f..eef2a58 100644
--- a/src/client/types.ts
+++ b/src/client/types.ts
@@ -1,4 +1,4 @@
-import type { DeepSeekRequest } from '../types';
+import type { ChatCompletionRequestBody } from '../provider/thinking';
 
 export interface ErrorActionUrls {
 	configureApiKey?: string;
@@ -7,7 +7,7 @@ export interface ErrorActionUrls {
 
 export interface RequestErrorContext {
 	baseUrl: string;
-	request: DeepSeekRequest;
+	request: ChatCompletionRequestBody;
 }
 
 export interface ErrorActionLink {
diff --git a/src/provider/debug/dump.ts b/src/provider/debug/dump.ts
index adf831c..ad69e64 100644
--- a/src/provider/debug/dump.ts
+++ b/src/provider/debug/dump.ts
@@ -3,11 +3,18 @@ import { appendFile, mkdir, writeFile } from 'fs/promises';
 import { tmpdir } from 'os';
 import { join } from 'path';
 import vscode from 'vscode';
+import { DeepSeekRequestError } from '../../client/error';
 import { getRequestDumpEnabled } from '../../config';
 import { LANGUAGE_MODEL_CHAT_SYSTEM_ROLE } from '../../consts';
 import { safeStringify, toWellFormedString } from '../../json';
 import { logger } from '../../logger';
-import type { DeepSeekMessage, DeepSeekRequest } from '../../types';
+import type {
+	DeepSeekMessage,
+	DeepSeekRequest,
+	DeepSeekToolCall,
+	DeepSeekUsage,
+	StreamCallbacks,
+} from '../../types';
 import {
 	classifyDeepSeekRequest,
 	classifyProviderRequest,
@@ -22,12 +29,14 @@ import type { VisionProxySource, VisionResolutionStats } from '../vision';
 
 let dumpCounter = 0;
 let providerInputDumpCounter = 0;
+let thinkingRetryDumpCounter = 0;
 let dumpWriteQueue: Promise<void> = Promise.resolve();
 
 const REQUEST_OBSERVATIONS_FILE = '_request-observations.jsonl';
 const HASH_WINDOW_CHARS = 2_048;
+const THINKING_COMPAT_LOG_PREFIX = '[reasoning-effort-compat]';
 
-type DumpEvent = 'provider-input' | 'deepseek-request';
+type DumpEvent = 'provider-input' | 'deepseek-request' | 'thinking-compat-retry';
 type DumpStage = 'provider-input' | 'input' | 'resolved';
 
 interface DumpContext {
@@ -50,6 +59,12 @@ interface RequestDumpPaths {
 	msg0?: string;
 }
 
+interface ThinkingRetryDumpPaths {
+	directory: string;
+	request: string;
+	response: string;
+}
+
 interface ToolSummary {
 	toolCount: number;
 	toolNames: string[];
@@ -119,6 +134,44 @@ export interface DumpProviderInputOptions {
 	requestOptions: vscode.ProvideLanguageModelChatResponseOptions;
 }
 
+export interface DumpThinkingCompatibilityRetryAttemptOptions {
+	globalStorageUri: vscode.Uri;
+	segment: ConversationSegment;
+	requestKind: RequestKind;
+	endpoint: string;
+	strategy: string;
+	sourceStatus: number;
+	request: object;
+	response: object;
+}
+
+export interface CreateThinkingCompatibilityRetryDumpOptions {
+	globalStorageUri: vscode.Uri;
+	segment: ConversationSegment;
+	requestKind: RequestKind;
+	endpoint: string;
+	strategy: string;
+	sourceStatus: number;
+	request: object;
+	callbacks: StreamCallbacks;
+}
+
+interface ThinkingCompatibilityRetryDumpRecorder {
+	callbacks: StreamCallbacks;
+	dumpSuccess(): void;
+	dumpFailure(error: unknown): void;
+}
+
+interface CapturedRetryResponse {
+	rawResponseParts: string[];
+	contentParts: string[];
+	reasoningParts: string[];
+	toolCalls: DeepSeekToolCall[];
+	usage?: DeepSeekUsage;
+	done: boolean;
+	error?: object;
+}
+
 /**
  * Dump the raw LanguageModelChatProvider input before any request preparation.
  * This captures the first observable `options.tools` list, including any
@@ -242,6 +295,199 @@ export function dumpDeepSeekRequest(
 	});
 }
 
+export function dumpThinkingCompatibilityRetryAttempt(
+	options: DumpThinkingCompatibilityRetryAttemptOptions,
+): void {
+	if (!getRequestDumpEnabled()) return;
+
+	const context = createDumpContext(
+		options.globalStorageUri,
+		options.segment,
+		'thinking-compat-retry',
+		(thinkingRetryDumpCounter += 1),
+		options.requestKind,
+	);
+	const paths = createThinkingRetryDumpPaths(context);
+
+	enqueueDumpWrite(`${THINKING_COMPAT_LOG_PREFIX} retryDump`, async () => {
+		await mkdir(context.root, { recursive: true });
+		const requestJson = await writeJsonFile(paths.request, options.request, (value) =>
+			JSON.stringify(value, null, 2),
+		);
+		const responseJson = await writeJsonFile(paths.response, options.response, (value) =>
+			JSON.stringify(value, null, 2),
+		);
+
+		await writeDumpObservation(
+			options.globalStorageUri,
+			createDumpObservation({
+				event: 'thinking-compat-retry',
+				context,
+				segment: options.segment,
+				paths,
+				model: {},
+				requestKind: options.requestKind,
+				requestOptions: undefined,
+				messages: undefined,
+				toolSummary: undefined,
+				retry: {
+					endpoint: options.endpoint,
+					strategy: options.strategy,
+					sourceStatus: options.sourceStatus,
+				},
+			}),
+		);
+		logThinkingCompatibilityRetryDump(options, paths, requestJson.length, responseJson.length);
+	});
+}
+
+export function createThinkingCompatibilityRetryDump(
+	options: CreateThinkingCompatibilityRetryDumpOptions,
+): ThinkingCompatibilityRetryDumpRecorder {
+	const captured: CapturedRetryResponse = {
+		rawResponseParts: [],
+		contentParts: [],
+		reasoningParts: [],
+		toolCalls: [],
+		done: false,
+	};
+
+	return {
+		callbacks: {
+			onContent: (content) => {
+				captured.contentParts.push(content);
+				options.callbacks.onContent(content);
+			},
+			onThinking: (text) => {
+				captured.reasoningParts.push(text);
+				options.callbacks.onThinking(text);
+			},
+			onToolCall: (toolCall) => {
+				captured.toolCalls.push(toolCall);
+				options.callbacks.onToolCall(toolCall);
+			},
+			onError: (error) => {
+				captured.error = createErrorSnapshot(error);
+				options.callbacks.onError(error);
+			},
+			onDone: () => {
+				captured.done = true;
+				options.callbacks.onDone();
+			},
+			onUsage: (usage) => {
+				captured.usage = usage;
+				options.callbacks.onUsage?.(usage);
+			},
+			onRawResponseData: (data) => {
+				captured.rawResponseParts.push(data);
+				options.callbacks.onRawResponseData?.(data);
+			},
+		},
+		dumpSuccess: () => {
+			dumpThinkingCompatibilityRetry(options, createSuccessResponseSnapshot(captured));
+		},
+		dumpFailure: (error) => {
+			captured.error ??= createErrorSnapshot(error);
+			dumpThinkingCompatibilityRetry(options, createFailureResponseSnapshot(captured));
+		},
+	};
+}
+
+function dumpThinkingCompatibilityRetry(
+	options: CreateThinkingCompatibilityRetryDumpOptions,
+	response: object,
+): void {
+	dumpThinkingCompatibilityRetryAttempt({
+		globalStorageUri: options.globalStorageUri,
+		segment: options.segment,
+		requestKind: options.requestKind,
+		endpoint: options.endpoint,
+		strategy: options.strategy,
+		sourceStatus: options.sourceStatus,
+		request: createSentRequestBody(options.request),
+		response,
+	});
+}
+
+function createSentRequestBody(request: object): object {
+	return {
+		...request,
+		stream_options: { include_usage: true },
+	};
+}
+
+function createSuccessResponseSnapshot(captured: CapturedRetryResponse): object {
+	return {
+		ok: true,
+		stream: true,
+		done: captured.done,
+		rawResponseText: joinIfAny(captured.rawResponseParts),
+		content: joinIfAny(captured.contentParts),
+		reasoning_content: joinIfAny(captured.reasoningParts),
+		tool_calls: captured.toolCalls.length > 0 ? captured.toolCalls : undefined,
+		usage: captured.usage,
+		summary: createResponseSummary(captured),
+	};
+}
+
+function createFailureResponseSnapshot(captured: CapturedRetryResponse): object {
+	return {
+		ok: false,
+		stream: true,
+		done: captured.done,
+		rawResponseText: joinIfAny(captured.rawResponseParts),
+		partial_content: joinIfAny(captured.contentParts),
+		partial_reasoning_content: joinIfAny(captured.reasoningParts),
+		partial_tool_calls: captured.toolCalls.length > 0 ? captured.toolCalls : undefined,
+		usage: captured.usage,
+		error: captured.error,
+		summary: createResponseSummary(captured),
+	};
+}
+
+function createResponseSummary(captured: CapturedRetryResponse): object {
+	return {
+		contentChars: captured.contentParts.reduce((total, part) => total + part.length, 0),
+		reasoningChars: captured.reasoningParts.reduce((total, part) => total + part.length, 0),
+		rawResponseChars: captured.rawResponseParts.reduce((total, part) => total + part.length, 0),
+		toolCallCount: captured.toolCalls.length,
+		hasUsage: Boolean(captured.usage),
+	};
+}
+
+function joinIfAny(parts: readonly string[]): string | undefined {
+	return parts.length > 0 ? parts.join('') : undefined;
+}
+
+function createErrorSnapshot(error: unknown): object {
+	if (error instanceof DeepSeekRequestError) {
+		return {
+			name: error.name,
+			message: error.message,
+			kind: error.kind,
+			status: error.status,
+			statusText: error.statusText,
+			code: error.code,
+			baseUrl: error.baseUrl,
+			serverMessage: error.serverMessage,
+			responseText: error.responseText,
+			userSummary: error.userSummary,
+			diagnosticMessage: error.diagnosticMessage,
+			stack: error.stack,
+		};
+	}
+	if (error instanceof Error) {
+		return {
+			name: error.name,
+			message: error.message,
+			stack: error.stack,
+		};
+	}
+	return {
+		value: String(error),
+	};
+}
+
 export async function ensureRequestDumpRoot(globalStorageUri: vscode.Uri): Promise<vscode.Uri> {
 	const root = getRequestDumpBaseRootUri(globalStorageUri);
 	await mkdir(root.fsPath, { recursive: true });
@@ -281,16 +527,29 @@ function createRequestDumpPaths(context: DumpContext, hasMsg0: boolean): Request
 	};
 }
 
+function createThinkingRetryDumpPaths(context: DumpContext): ThinkingRetryDumpPaths {
+	return {
+		directory: context.root,
+		request: join(context.root, `${context.basename}.request.json`),
+		response: join(context.root, `${context.basename}.response.json`),
+	};
+}
+
 function createDumpObservation(options: {
 	event: DumpEvent;
 	context: DumpContext;
 	segment: ConversationSegment;
-	paths: ProviderInputDumpPaths | RequestDumpPaths;
+	paths: ProviderInputDumpPaths | RequestDumpPaths | ThinkingRetryDumpPaths;
 	model: object;
 	requestKind: RequestKind;
-	requestOptions: vscode.ProvideLanguageModelChatResponseOptions;
-	messages: readonly vscode.LanguageModelChatRequestMessage[];
-	toolSummary: ToolSummary;
+	requestOptions?: vscode.ProvideLanguageModelChatResponseOptions;
+	messages?: readonly vscode.LanguageModelChatRequestMessage[];
+	toolSummary?: ToolSummary;
+	retry?: {
+		endpoint: string;
+		strategy: string;
+		sourceStatus: number;
+	};
 }): object {
 	return {
 		event: options.event,
@@ -300,11 +559,14 @@ function createDumpObservation(options: {
 		paths: options.paths,
 		model: options.model,
 		requestKind: options.requestKind,
-		options: summarizeRequestOptions(options.requestOptions),
+		options: options.requestOptions ? summarizeRequestOptions(options.requestOptions) : undefined,
 		hostSettings: summarizeHostSettings(),
-		systemPromptSummary: summarizeVscodeSystemPrompt(options.messages),
-		messageStats: summarizeMessagesFromInput(options.messages),
+		systemPromptSummary: options.messages
+			? summarizeVscodeSystemPrompt(options.messages)
+			: undefined,
+		messageStats: options.messages ? summarizeMessagesFromInput(options.messages) : undefined,
 		toolStats: options.toolSummary,
+		retry: options.retry,
 	};
 }
 
@@ -1022,6 +1284,25 @@ function logRequestDump(
 	);
 }
 
+function logThinkingCompatibilityRetryDump(
+	options: DumpThinkingCompatibilityRetryAttemptOptions,
+	paths: ThinkingRetryDumpPaths,
+	requestJsonLength: number,
+	responseJsonLength: number,
+): void {
+	logger.info(
+		formatRequestLogLine(
+			options.requestKind,
+			`${THINKING_COMPAT_LOG_PREFIX} retry-dump-written ` +
+				`${formatDumpSegment(options.segment)} endpoint=${safeStringify(options.endpoint)}` +
+				` strategy=${options.strategy} sourceStatus=${options.sourceStatus}` +
+				` request=${formatFileUri(paths.request)} response=${formatFileUri(paths.response)}` +
+				` requestKB=${(requestJsonLength / 1024).toFixed(0)}` +
+				` responseKB=${(responseJsonLength / 1024).toFixed(0)}`,
+		),
+	);
+}
+
 function formatDumpSegment(segment: ConversationSegment): string {
 	if (segment.reason === 'markerFound') {
 		return `dumpSegment=${segment.segmentId} legacySegmentMarker=found`;
diff --git a/src/provider/debug/index.ts b/src/provider/debug/index.ts
index 3b078ac..8a514a5 100644
--- a/src/provider/debug/index.ts
+++ b/src/provider/debug/index.ts
@@ -8,4 +8,10 @@ export type {
 	CacheDiagnosticsRun,
 	ReplayMarkerReportTrigger,
 } from './diagnostics';
-export { dumpDeepSeekRequest, dumpProviderInput, ensureRequestDumpRoot } from './dump';
+export {
+	createThinkingCompatibilityRetryDump,
+	dumpDeepSeekRequest,
+	dumpProviderInput,
+	dumpThinkingCompatibilityRetryAttempt,
+	ensureRequestDumpRoot,
+} from './dump';
diff --git a/src/provider/request.ts b/src/provider/request.ts
index b9dcf90..fb25c7c 100644
--- a/src/provider/request.ts
+++ b/src/provider/request.ts
@@ -11,17 +11,25 @@ import {
 	type CacheDiagnosticsRecorder,
 	type CacheDiagnosticsRun,
 } from './debug';
-import { getConfiguredThinkingEffort, type ModelConfigurationOptions } from './models';
+import {
+	getConfiguredThinkingEffort,
+	type ModelConfigurationOptions,
+	type ThinkingEffort,
+} from './models';
 import { classifyDeepSeekRequest, shouldForceThinkingNone, type RequestKind } from './routing';
 import type { ReplayMarkerMetadata } from './replay';
 import type { ConversationSegment } from './segment';
+import { toDeepSeekNativeReasoningRequest } from './thinking';
 import { collectTrailingToolResultIds, prepareRequestTools } from './tools/request';
 import { resolveImageMessages, type VisionDescriber } from './vision';
 
 export interface PreparedChatRequest {
 	client: DeepSeekClient;
+	baseUrl: string;
+	globalStorageUri: vscode.Uri;
 	request: DeepSeekRequest;
 	isThinkingModel: boolean;
+	thinkingEffort: ThinkingEffort;
 	totalRequestChars: number;
 	trailingToolResultIds: string[];
 	cacheDiagnostics: CacheDiagnosticsRun;
@@ -60,7 +68,8 @@ export async function prepareChatRequest({
 		throw new Error(t('auth.notConfigured'));
 	}
 
-	const client = new DeepSeekClient(getBaseUrl(), apiKey);
+	const baseUrl = getBaseUrl();
+	const client = new DeepSeekClient(baseUrl, apiKey);
 	const modelDef = MODELS.find((m) => m.id === modelInfo.id);
 	const isThinkingModel = modelDef?.capabilities.thinking ?? false;
 	const maxTokens = getMaxTokens();
@@ -87,17 +96,9 @@ export async function prepareChatRequest({
 		options as ModelConfigurationOptions,
 	);
 	const thinkingEffort = shouldForceThinkingNone(requestKind) ? 'none' : configuredThinkingEffort;
-	const request: DeepSeekRequest = {
-		...baseRequest,
-		...(isThinkingModel
-			? {
-					thinking: {
-						type: thinkingEffort === 'none' ? ('disabled' as const) : ('enabled' as const),
-					},
-					...(thinkingEffort === 'none' ? {} : { reasoning_effort: thinkingEffort }),
-				}
-			: {}),
-	};
+	const request: DeepSeekRequest = isThinkingModel
+		? toDeepSeekNativeReasoningRequest(baseRequest, thinkingEffort)
+		: baseRequest;
 	dumpDeepSeekRequest(request, {
 		globalStorageUri,
 		segment,
@@ -131,8 +132,11 @@ export async function prepareChatRequest({
 
 	return {
 		client,
+		baseUrl,
+		globalStorageUri,
 		request,
 		isThinkingModel,
+		thinkingEffort,
 		totalRequestChars,
 		trailingToolResultIds: collectTrailingToolResultIds(deepseekMessages),
 		cacheDiagnostics: diagnosticsRun,
diff --git a/src/provider/stream.ts b/src/provider/stream.ts
index a4c5477..0d9eeda 100644
--- a/src/provider/stream.ts
+++ b/src/provider/stream.ts
@@ -1,8 +1,9 @@
 import vscode from 'vscode';
 import { createUserFacingError } from '../client';
 import { logger } from '../logger';
-import type { DeepSeekToolCall, DeepSeekUsage } from '../types';
+import type { DeepSeekToolCall, DeepSeekUsage, StreamCallbacks } from '../types';
 import {
+	createThinkingCompatibilityRetryDump,
 	observeCancellationToken,
 	type CacheDiagnosticsRun,
 	type ReplayMarkerReportTrigger,
@@ -14,6 +15,10 @@ import {
 	type ReplayMarkerMetadata,
 } from './replay';
 import type { PreparedChatRequest } from './request';
+import {
+	createThinkingCompatibilityPrecheck,
+	type ThinkingCompatibilityPrecheck,
+} from './thinking';
 
 interface ResponseStreamState {
 	accumulatedReasoning: string;
@@ -48,52 +53,50 @@ export function streamChatCompletion({
 		replayMarkerReported: false,
 	};
 	const cancelListener = observeCancellationToken(token, prepared.cacheDiagnostics);
+	const precheck = createThinkingCompatibilityPrecheck({
+		baseUrl: prepared.baseUrl,
+		request: prepared.request,
+		isThinkingModel: prepared.isThinkingModel,
+		thinkingEffort: prepared.thinkingEffort,
+	});
+	const callbacks: StreamCallbacks = {
+		onContent: (content: string) => {
+			reportInitialResponseNoticeOnce(progress, state, initialResponseNotice);
+			progress.report(new vscode.LanguageModelTextPart(content));
+		},
 
-	return prepared.client
-		.streamChatCompletion(
-			prepared.request,
-			{
-				onContent: (content: string) => {
-					reportInitialResponseNoticeOnce(progress, state, initialResponseNotice);
-					progress.report(new vscode.LanguageModelTextPart(content));
-				},
+		onThinking: (text: string) => {
+			reportInitialResponseNoticeOnce(progress, state, initialResponseNotice);
+			handleThinking(text, state, progress);
+		},
 
-				onThinking: (text: string) => {
-					reportInitialResponseNoticeOnce(progress, state, initialResponseNotice);
-					handleThinking(text, state, progress);
-				},
+		onToolCall: (toolCall: DeepSeekToolCall) => {
+			reportInitialResponseNoticeOnce(progress, state, initialResponseNotice);
+			handleToolCall(toolCall, state, progress);
+		},
 
-				onToolCall: (toolCall: DeepSeekToolCall) => {
-					reportInitialResponseNoticeOnce(progress, state, initialResponseNotice);
-					handleToolCall(toolCall, state, progress);
-				},
+		onError: (error: Error) => {
+			throw error;
+		},
 
-				onError: (error: Error) => {
-					throw createUserFacingError(error);
-				},
+		onDone: () => {
+			reportReplayMarkerOnce(prepared, progress, state, 'done');
+			finalizeReplayDiagnostics(prepared.trailingToolResultIds, state, prepared.cacheDiagnostics);
+		},
 
-				onDone: () => {
-					reportReplayMarkerOnce(prepared, progress, state, 'done');
-					finalizeReplayDiagnostics(
-						prepared.trailingToolResultIds,
-						state,
-						prepared.cacheDiagnostics,
-					);
-				},
+		onUsage: (usage: DeepSeekUsage) => {
+			const charsPerToken = updateCharsPerToken(
+				prepared.totalRequestChars,
+				usage,
+				getCharsPerToken(),
+			);
+			setCharsPerToken(charsPerToken);
+			prepared.cacheDiagnostics.onUsage(usage, charsPerToken);
+			reportCopilotContextUsage(progress, usage, prepared.requestKind);
+		},
+	};
 
-				onUsage: (usage) => {
-					const charsPerToken = updateCharsPerToken(
-						prepared.totalRequestChars,
-						usage,
-						getCharsPerToken(),
-					);
-					setCharsPerToken(charsPerToken);
-					prepared.cacheDiagnostics.onUsage(usage, charsPerToken);
-					reportCopilotContextUsage(progress, usage, prepared.requestKind);
-				},
-			},
-			token,
-		)
+	return streamWithThinkingCompatibility(prepared, callbacks, token, precheck)
 		.then(undefined, (error) => {
 			reportSkippedReplayMarkerIfNeeded(
 				prepared,
@@ -101,7 +104,7 @@ export function streamChatCompletion({
 				token.isCancellationRequested ? 'cancelled' : 'stream-error',
 				error,
 			);
-			throw error;
+			throw createUserFacingError(toError(error));
 		})
 		.then(() => {
 			if (token.isCancellationRequested) {
@@ -113,6 +116,52 @@ export function streamChatCompletion({
 		});
 }
 
+function streamWithThinkingCompatibility(
+	prepared: PreparedChatRequest,
+	callbacks: StreamCallbacks,
+	token: vscode.CancellationToken,
+	precheck: ThinkingCompatibilityPrecheck,
+): Promise<void> {
+	return prepared.client
+		.streamChatCompletion(precheck.initialRequest, callbacks, token)
+		.then(undefined, (error) => {
+			const retryAttempt = token.isCancellationRequested
+				? undefined
+				: precheck.createRetryAttempt(error);
+			if (!retryAttempt) {
+				throw error;
+			}
+			const retryDump = createThinkingCompatibilityRetryDump({
+				globalStorageUri: prepared.globalStorageUri,
+				segment: prepared.segment,
+				requestKind: prepared.requestKind,
+				endpoint: prepared.baseUrl,
+				strategy: retryAttempt.strategy,
+				sourceStatus: retryAttempt.sourceStatus,
+				request: retryAttempt.request,
+				callbacks,
+			});
+			retryAttempt.logStart();
+			return prepared.client
+				.streamChatCompletion(retryAttempt.request, retryDump.callbacks, token)
+				.then(
+					() => {
+						if (token.isCancellationRequested) {
+							retryDump.dumpFailure(new Error('DeepSeek retry cancelled'));
+							return;
+						}
+						retryAttempt.recordSuccess();
+						retryDump.dumpSuccess();
+					},
+					(retryError) => {
+						retryAttempt.logFailure(retryError);
+						retryDump.dumpFailure(retryError);
+						throw error;
+					},
+				);
+		});
+}
+
 function reportInitialResponseNoticeOnce(
 	progress: vscode.Progress<vscode.LanguageModelResponsePart>,
 	state: ResponseStreamState,
@@ -289,3 +338,7 @@ function reportCopilotContextUsage(
 		logger.warn(formatRequestLogLine(requestKind, 'Failed to report usage data'), error);
 	}
 }
+
+function toError(error: unknown): Error {
+	return error instanceof Error ? error : new Error(String(error));
+}
diff --git a/src/provider/thinking/index.ts b/src/provider/thinking/index.ts
new file mode 100644
index 0000000..beb10b6
--- /dev/null
+++ b/src/provider/thinking/index.ts
@@ -0,0 +1,17 @@
+export {
+	createThinkingCompatibilityPrecheck,
+	type ThinkingCompatibilityPrecheck,
+	type ThinkingCompatibilityRetryAttempt,
+	type ThinkingCompatibilityRetryStrategy,
+} from './precheck';
+export {
+	toDeepSeekNativeReasoningRequest,
+	toOpenAICompatibleMaxRetryRequest,
+	toOpenAICompatibleReasoningEffort,
+	toOpenAICompatibleReasoningRequest,
+} from './shape';
+export type {
+	ChatCompletionRequestBody,
+	OpenAICompatibleReasoningEffort,
+	OpenAICompatibleReasoningRequest,
+} from './types';
diff --git a/src/provider/thinking/precheck.ts b/src/provider/thinking/precheck.ts
new file mode 100644
index 0000000..e64a1be
--- /dev/null
+++ b/src/provider/thinking/precheck.ts
@@ -0,0 +1,166 @@
+import { DeepSeekRequestError } from '../../client/error';
+import { OFFICIAL_DEEPSEEK_API_HOST } from '../../client/consts';
+import { logger } from '../../logger';
+import type { DeepSeekRequest } from '../../types';
+import type { ThinkingEffort } from '../models';
+import { toOpenAICompatibleMaxRetryRequest, toOpenAICompatibleReasoningRequest } from './shape';
+import type { ChatCompletionRequestBody } from './types';
+
+const LOG_PREFIX = '[reasoning-effort-compat]';
+const openAICompatibleEffortEndpoints = new Set<string>();
+
+export interface ThinkingCompatibilityPrecheck {
+	readonly initialRequest: ChatCompletionRequestBody;
+	createRetryAttempt(error: unknown): ThinkingCompatibilityRetryAttempt | undefined;
+}
+
+export interface ThinkingCompatibilityRetryAttempt {
+	readonly request: ChatCompletionRequestBody;
+	readonly strategy: ThinkingCompatibilityRetryStrategy;
+	readonly sourceStatus: number;
+	logStart(): void;
+	logFailure(error: unknown): void;
+	recordSuccess(): void;
+}
+
+export type ThinkingCompatibilityRetryStrategy = 'max-to-xhigh';
+
+export function createThinkingCompatibilityPrecheck(options: {
+	baseUrl: string;
+	request: DeepSeekRequest;
+	isThinkingModel: boolean;
+	thinkingEffort: ThinkingEffort;
+}): ThinkingCompatibilityPrecheck {
+	const endpointKey = getSessionCacheKey(options.baseUrl);
+	const initialRequest = createInitialRequest({
+		...options,
+		endpointKey,
+	});
+
+	return {
+		initialRequest,
+		createRetryAttempt: (error) =>
+			createRetryAttempt({
+				...options,
+				endpointKey,
+				initialRequest,
+				error,
+			}),
+	};
+}
+
+function createInitialRequest(options: {
+	baseUrl: string;
+	request: DeepSeekRequest;
+	isThinkingModel: boolean;
+	thinkingEffort: ThinkingEffort;
+	endpointKey: string;
+}): ChatCompletionRequestBody {
+	if (!options.isThinkingModel) {
+		return options.request;
+	}
+	if (openAICompatibleEffortEndpoints.has(options.endpointKey)) {
+		const request = toOpenAICompatibleReasoningRequest(options.request, options.thinkingEffort);
+		logger.info(
+			`${LOG_PREFIX} precheck-cache-hit endpoint=${options.endpointKey}` +
+				` effort=${options.thinkingEffort}` +
+				` mappedEffort=${request.reasoning_effort}` +
+				` removedThinking=true`,
+		);
+		return request;
+	}
+	return options.request;
+}
+
+function createRetryAttempt(options: {
+	baseUrl: string;
+	request: DeepSeekRequest;
+	thinkingEffort: ThinkingEffort;
+	endpointKey: string;
+	initialRequest: ChatCompletionRequestBody;
+	error: unknown;
+}): ThinkingCompatibilityRetryAttempt | undefined {
+	const failure = getHttpFailure(options.error);
+	if (!failure || openAICompatibleEffortEndpoints.has(options.endpointKey)) {
+		return undefined;
+	}
+	if (
+		options.thinkingEffort === 'max' &&
+		options.initialRequest.reasoning_effort === 'max' &&
+		!isOfficialDeepSeekEndpoint(options.baseUrl) &&
+		isRetryableThinkingHttpFailure(failure)
+	) {
+		return createMaxToXHighAttempt(options, failure);
+	}
+	return undefined;
+}
+
+function createMaxToXHighAttempt(
+	options: {
+		request: DeepSeekRequest;
+		endpointKey: string;
+	},
+	failure: HttpFailure,
+): ThinkingCompatibilityRetryAttempt {
+	const retryRequest = toOpenAICompatibleMaxRetryRequest(options.request);
+	return {
+		request: retryRequest,
+		strategy: 'max-to-xhigh',
+		sourceStatus: failure.status,
+		logStart: () => {
+			logger.info(
+				`${LOG_PREFIX} precheck-retry-start endpoint=${options.endpointKey}` +
+					` status=${failure.status} effort=max->xhigh removedThinking=true`,
+			);
+		},
+		logFailure: (error) => {
+			logger.info(
+				`${LOG_PREFIX} precheck-retry-failed endpoint=${options.endpointKey}` +
+					` status=${getHttpFailure(error)?.status ?? 'unknown'}`,
+			);
+		},
+		recordSuccess: () => {
+			openAICompatibleEffortEndpoints.add(options.endpointKey);
+			logger.info(
+				`${LOG_PREFIX} precheck-retry-success endpoint=${options.endpointKey}` +
+					` sessionCache=openai-compatible-effort`,
+			);
+		},
+	};
+}
+
+interface HttpFailure {
+	status: number;
+}
+
+function getHttpFailure(error: unknown): HttpFailure | undefined {
+	if (!(error instanceof DeepSeekRequestError) || error.kind !== 'http') {
+		return undefined;
+	}
+	return {
+		status: error.status ?? 0,
+	};
+}
+
+function isRetryableThinkingHttpFailure(failure: HttpFailure): boolean {
+	return failure.status === 400;
+}
+
+function getSessionCacheKey(baseUrl: string): string {
+	try {
+		const url = new URL(baseUrl);
+		const port = url.port ? `:${url.port}` : '';
+		const pathname = url.pathname.replace(/\/+$/u, '');
+		return `${url.protocol.toLowerCase()}//${url.hostname.toLowerCase()}${port}${pathname}`;
+	} catch {
+		return baseUrl.trim().replace(/\/+$/u, '');
+	}
+}
+
+function isOfficialDeepSeekEndpoint(baseUrl: string): boolean {
+	try {
+		return new URL(baseUrl).hostname.toLowerCase() === OFFICIAL_DEEPSEEK_API_HOST;
+	} catch {
+		return false;
+	}
+}
diff --git a/src/provider/thinking/shape.ts b/src/provider/thinking/shape.ts
new file mode 100644
index 0000000..9460fdc
--- /dev/null
+++ b/src/provider/thinking/shape.ts
@@ -0,0 +1,46 @@
+import type { DeepSeekRequest } from '../../types';
+import type { ThinkingEffort } from '../models';
+import type {
+	DeepSeekNativeReasoningEffort,
+	OpenAICompatibleReasoningEffort,
+	OpenAICompatibleReasoningRequest,
+} from './types';
+
+export function toOpenAICompatibleMaxRetryRequest(
+	request: DeepSeekRequest,
+): OpenAICompatibleReasoningRequest {
+	return toOpenAICompatibleReasoningRequest(request, 'max');
+}
+
+export function toOpenAICompatibleReasoningRequest(
+	request: DeepSeekRequest,
+	effort: ThinkingEffort,
+): OpenAICompatibleReasoningRequest {
+	const { thinking: _thinking, reasoning_effort: _reasoningEffort, ...rest } = request;
+	return {
+		...rest,
+		reasoning_effort: toOpenAICompatibleReasoningEffort(effort),
+	};
+}
+
+export function toDeepSeekNativeReasoningRequest(
+	request: DeepSeekRequest,
+	effort: ThinkingEffort,
+): DeepSeekRequest {
+	const nativeRequest: DeepSeekRequest = {
+		...request,
+		thinking: { type: effort === 'none' ? 'disabled' : 'enabled' },
+	};
+	if (effort === 'none') {
+		delete nativeRequest.reasoning_effort;
+	} else {
+		nativeRequest.reasoning_effort = effort as DeepSeekNativeReasoningEffort;
+	}
+	return nativeRequest;
+}
+
+export function toOpenAICompatibleReasoningEffort(
+	effort: ThinkingEffort,
+): OpenAICompatibleReasoningEffort {
+	return effort === 'max' ? 'xhigh' : effort;
+}
diff --git a/src/provider/thinking/types.ts b/src/provider/thinking/types.ts
new file mode 100644
index 0000000..88acbbb
--- /dev/null
+++ b/src/provider/thinking/types.ts
@@ -0,0 +1,16 @@
+import type { DeepSeekRequest } from '../../types';
+import type { ThinkingEffort } from '../models';
+
+export type DeepSeekNativeReasoningEffort = Exclude<ThinkingEffort, 'none'>;
+
+export type OpenAICompatibleReasoningEffort = Extract<ThinkingEffort, 'none' | 'high'> | 'xhigh';
+
+export type OpenAICompatibleReasoningRequest = Omit<
+	DeepSeekRequest,
+	'thinking' | 'reasoning_effort'
+> & {
+	thinking?: never;
+	reasoning_effort?: OpenAICompatibleReasoningEffort;
+};
+
+export type ChatCompletionRequestBody = DeepSeekRequest | OpenAICompatibleReasoningRequest;
diff --git a/src/types.ts b/src/types.ts
index f9e9689..e70b068 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -89,6 +89,7 @@ export interface StreamCallbacks {
 	onError: (error: Error) => void;
 	onDone: () => void;
 	onUsage?: (usage: DeepSeekUsage) => void;
+	onRawResponseData?: (data: string) => void;
 }
 
 // ---- Model definitions ----