From 291c4cb3c3378d63827cf9e50d1fa2acdd02774e Mon Sep 17 00:00:00 2001
From: CookSleep <CookSleep@outlook.com>
Date: Sun, 7 Jun 2026 09:08:16 +0800
Subject: [PATCH 1/2] fix(core): align maxRetries semantics

---
 packages/core/src/llm-core/platform/client.ts |  4 ++--
 packages/core/src/llm-core/platform/model.ts  | 18 +++++++++---------
 packages/core/src/services/chat.ts            |  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/packages/core/src/llm-core/platform/client.ts b/packages/core/src/llm-core/platform/client.ts
index 00996b959..72a98a4df 100644
--- a/packages/core/src/llm-core/platform/client.ts
+++ b/packages/core/src/llm-core/platform/client.ts
@@ -68,7 +68,7 @@ export abstract class BasePlatformClient<
 
         const maxRetries = cfg.value.maxRetries ?? 5
 
-        while (retryCount < (maxRetries ?? 1)) {
+        while (retryCount <= maxRetries) {
             let oldConfig: ClientConfigWrapper<T> | undefined
 
             try {
@@ -93,7 +93,7 @@ export abstract class BasePlatformClient<
                     return false
                 }
 
-                if (retryCount === maxRetries - 1) {
+                if (retryCount >= maxRetries) {
                     if (oldConfig == null) {
                         this.ctx.logger.error(e)
                         unlock()
diff --git a/packages/core/src/llm-core/platform/model.ts b/packages/core/src/llm-core/platform/model.ts
index 7237c0cc3..48564c461 100644
--- a/packages/core/src/llm-core/platform/model.ts
+++ b/packages/core/src/llm-core/platform/model.ts
@@ -232,7 +232,7 @@ export class ChatLunaChatModel extends BaseChatModel<ChatLunaModelCallOptions> {
         runManager?: CallbackManagerForLLMRun,
         reportUsage = true
     ): AsyncGenerator<ChatGenerationChunk> {
-        const maxRetries = Math.max(1, this._options.maxRetries ?? 1)
+        const maxAttempts = Math.max(1, (this._options.maxRetries ?? 1) + 1)
         let promptTokens = 0
 
         if (reportUsage) {
@@ -247,7 +247,7 @@ export class ChatLunaChatModel extends BaseChatModel<ChatLunaModelCallOptions> {
             input: messages
         }
 
-        for (let attempt = 0; attempt < maxRetries; attempt++) {
+        for (let attempt = 0; attempt < maxAttempts; attempt++) {
             const latestTokenUsage = this._createTokenUsageTracker()
             let stream: AsyncGenerator<ChatGenerationChunk> | null = null
             let hasChunk = false
@@ -303,7 +303,7 @@ export class ChatLunaChatModel extends BaseChatModel<ChatLunaModelCallOptions> {
                         error,
                         hasChunk,
                         attempt,
-                        maxRetries
+                        maxAttempts
                     )
                 ) {
                     if (hasChunk) {
@@ -323,7 +323,7 @@ export class ChatLunaChatModel extends BaseChatModel<ChatLunaModelCallOptions> {
                 }
 
                 logger.debug(
-                    `Stream failed before first chunk (attempt ${attempt + 1}/${maxRetries}), retrying...`,
+                    `Stream failed before first chunk (attempt ${attempt + 1}/${maxAttempts}), retrying...`,
                     error
                 )
                 await sleep(2000 * 2 ** attempt)
@@ -470,10 +470,10 @@ export class ChatLunaChatModel extends BaseChatModel<ChatLunaModelCallOptions> {
         error: unknown,
         hasChunk: boolean,
         attempt: number,
-        maxRetries: number
+        maxAttempts: number
     ): boolean {
         return (
-            this._isAbortError(error) || hasChunk || attempt === maxRetries - 1
+            this._isAbortError(error) || hasChunk || attempt === maxAttempts - 1
         )
     }
 
@@ -630,10 +630,10 @@ export class ChatLunaChatModel extends BaseChatModel<ChatLunaModelCallOptions> {
         options: this['ParsedCallOptions'],
         runManager?: CallbackManagerForLLMRun
     ): Promise<ChatGeneration> {
-        const maxRetries = Math.max(1, this._options.maxRetries ?? 1)
+        const maxAttempts = Math.max(1, (this._options.maxRetries ?? 1) + 1)
 
         const generateWithRetry = async () => {
-            for (let attempt = 0; attempt < maxRetries; attempt++) {
+            for (let attempt = 0; attempt < maxAttempts; attempt++) {
                 try {
                     let response: ChatGeneration
 
@@ -675,7 +675,7 @@ export class ChatLunaChatModel extends BaseChatModel<ChatLunaModelCallOptions> {
                     if (
                         options.stream ||
                         this._isAbortError(error) ||
-                        attempt === maxRetries - 1
+                        attempt === maxAttempts - 1
                     ) {
                         throw error
                     }
diff --git a/packages/core/src/services/chat.ts b/packages/core/src/services/chat.ts
index 76c88c504..4fac7f103 100644
--- a/packages/core/src/services/chat.ts
+++ b/packages/core/src/services/chat.ts
@@ -1295,7 +1295,7 @@ export namespace ChatLunaPlugin {
                 Schema.const('default'),
                 Schema.const('balance')
             ]).default('default'),
-            maxRetries: Schema.number().min(1).max(6).default(5),
+            maxRetries: Schema.number().min(0).max(6).default(5),
             timeout: Schema.number().default(300 * 1000),
             proxyMode: Schema.union([
                 Schema.const('system'),

From 36c1aac27af35ea985cc7268b7d62c0a8d386970 Mon Sep 17 00:00:00 2001
From: CookSleep <CookSleep@outlook.com>
Date: Sun, 7 Jun 2026 09:18:50 +0800
Subject: [PATCH 2/2] fix(core): preserve retry fallback default

---
 packages/core/src/llm-core/platform/model.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/core/src/llm-core/platform/model.ts b/packages/core/src/llm-core/platform/model.ts
index 48564c461..8bcc6b99c 100644
--- a/packages/core/src/llm-core/platform/model.ts
+++ b/packages/core/src/llm-core/platform/model.ts
@@ -232,7 +232,7 @@ export class ChatLunaChatModel extends BaseChatModel<ChatLunaModelCallOptions> {
         runManager?: CallbackManagerForLLMRun,
         reportUsage = true
     ): AsyncGenerator<ChatGenerationChunk> {
-        const maxAttempts = Math.max(1, (this._options.maxRetries ?? 1) + 1)
+        const maxAttempts = Math.max(1, (this._options.maxRetries ?? 0) + 1)
         let promptTokens = 0
 
         if (reportUsage) {
@@ -630,7 +630,7 @@ export class ChatLunaChatModel extends BaseChatModel<ChatLunaModelCallOptions> {
         options: this['ParsedCallOptions'],
         runManager?: CallbackManagerForLLMRun
     ): Promise<ChatGeneration> {
-        const maxAttempts = Math.max(1, (this._options.maxRetries ?? 1) + 1)
+        const maxAttempts = Math.max(1, (this._options.maxRetries ?? 0) + 1)
 
         const generateWithRetry = async () => {
             for (let attempt = 0; attempt < maxAttempts; attempt++) {