From 668daf6d2f78257aa8e51265330ae3b86fe7d20b Mon Sep 17 00:00:00 2001 From: mci77777 Date: Tue, 28 Apr 2026 19:19:16 +0800 Subject: [PATCH] fix(proxy): limit provider circuit breaker failures --- .env.example | 2 +- README.en.md | 2 +- README.md | 2 +- src/app/v1/_lib/proxy/errors.ts | 18 ++-- src/app/v1/_lib/proxy/forwarder.ts | 24 +++-- src/app/v1/_lib/proxy/response-handler.ts | 95 +++---------------- src/app/v1/_lib/proxy/session.ts | 4 +- src/lib/config/env.schema.ts | 2 +- .../client-abort-vs-upstream-499.test.ts | 2 +- .../proxy-forwarder-fake-200-html.test.ts | 57 ++--------- ...handler-endpoint-circuit-isolation.test.ts | 18 ++-- .../proxy/response-handler-non200.test.ts | 32 ++----- 12 files changed, 58 insertions(+), 200 deletions(-) diff --git a/.env.example b/.env.example index 437f1f4bb..3e68cb13e 100644 --- a/.env.example +++ b/.env.example @@ -94,7 +94,7 @@ DASHBOARD_LOGS_POLL_INTERVAL_MS=5000 # 日志页自动刷新轮询间隔( # 使用场景: # - 默认关闭:适用于网络不稳定环境(如使用代理),避免因临时网络抖动触发熔断器 # - 启用:适用于网络稳定环境,连续网络错误也应触发熔断保护,避免持续请求不可达的供应商 -ENABLE_CIRCUIT_BREAKER_ON_NETWORK_ERRORS=false +ENABLE_CIRCUIT_BREAKER_ON_NETWORK_ERRORS=true # 端点级别熔断器 # 功能说明:控制是否启用端点级别的熔断器 diff --git a/README.en.md b/README.en.md index aa19ab2cc..e1bb09819 100644 --- a/README.en.md +++ b/README.en.md @@ -344,7 +344,7 @@ See **[docs/k8s-deployment.md](docs/k8s-deployment.md)** for full options, place | `API_KEY_AUTH_CACHE_TTL_SECONDS` | `60` | API Key auth cache TTL in seconds (default 60, max 3600). | | `SESSION_TTL` | `300` | Session cache window (seconds) that drives vendor reuse. | | `ENABLE_SECURE_COOKIES` | `true` | Browsers require HTTPS for Secure cookies; set to `false` when serving plain HTTP outside localhost. | -| `ENABLE_CIRCUIT_BREAKER_ON_NETWORK_ERRORS` | `false` | When `true`, network errors also trip the circuit breaker for quicker isolation. | +| `ENABLE_CIRCUIT_BREAKER_ON_NETWORK_ERRORS` | `true` | When `true`, only connectivity failures and timeouts affect provider circuit breaker state. | | `APP_PORT` | `23000` | Production port (override via container or process manager). | | `APP_URL` | empty | Populate to expose correct `servers` entries in OpenAPI docs. | | `API_TEST_TIMEOUT_MS` | `15000` | Timeout (ms) for provider API connectivity tests. Accepts 5000-120000 for regional tuning. | diff --git a/README.md b/README.md index 5a1a96ac4..e706a3f50 100644 --- a/README.md +++ b/README.md @@ -357,7 +357,7 @@ cch doctor # 诊断集群与部署状态 | `API_KEY_AUTH_CACHE_TTL_SECONDS` | `60` | API Key 鉴权缓存 TTL(秒,默认 60,最大 3600)。 | | `SESSION_TTL` | `300` | Session 缓存时间(秒),影响供应商复用策略。 | | `ENABLE_SECURE_COOKIES` | `true` | 仅 HTTPS 场景能设置 Secure Cookie;HTTP 访问(非 localhost)需改为 `false`。 | -| `ENABLE_CIRCUIT_BREAKER_ON_NETWORK_ERRORS` | `false` | 是否将网络错误计入熔断器;开启后能更激进地阻断异常线路。 | +| `ENABLE_CIRCUIT_BREAKER_ON_NETWORK_ERRORS` | `true` | 是否将网络错误计入熔断器;开启后只让连接失败、超时这类不可达问题影响供应商熔断。 | | `APP_PORT` | `23000` | 生产端口,可被容器或进程管理器覆盖。 | | `APP_URL` | 空 | 设置后 OpenAPI 文档 `servers` 将展示正确域名/端口。 | | `API_TEST_TIMEOUT_MS` | `15000` | 供应商 API 测试超时时间(毫秒,范围 5000-120000),跨境网络可适当提高。 | diff --git a/src/app/v1/_lib/proxy/errors.ts b/src/app/v1/_lib/proxy/errors.ts index 91e220263..adc5b13b4 100644 --- a/src/app/v1/_lib/proxy/errors.ts +++ b/src/app/v1/_lib/proxy/errors.ts @@ -549,8 +549,8 @@ export class ProxyError extends Error { * 错误分类:区分供应商错误和系统错误 */ export enum ErrorCategory { - PROVIDER_ERROR, // 供应商问题(所有 4xx/5xx HTTP 错误)→ 计入熔断器 + 直接切换 - SYSTEM_ERROR, // 系统/网络问题(fetch 网络异常)→ 不计入熔断器 + 先重试1次 + PROVIDER_ERROR, // 上游已响应但请求失败(HTTP 4xx/5xx、空响应等)→ 重试/切换,但不累计 provider 熔断 + SYSTEM_ERROR, // 系统/网络问题(fetch 网络异常)→ 按网络错误配置决定是否累计 provider 熔断 CLIENT_ABORT, // 客户端主动中断 → 不计入熔断器 + 不重试 + 直接返回 NON_RETRYABLE_CLIENT_ERROR, // 客户端输入错误(Prompt 超限、内容过滤、PDF 限制、Thinking 格式、参数缺失/额外参数、非法请求)→ 不计入熔断器 + 不重试 + 直接返回 RESOURCE_NOT_FOUND, // 上游 404 错误 → 不计入熔断器 + 直接切换供应商 @@ -844,7 +844,7 @@ export function isRateLimitError(error: unknown): error is RateLimitError { * * 设计原则: * 1. 结构化错误:携带供应商信息和失败原因 - * 2. 计入熔断器:空响应视为供应商问题 + * 2. 不累计 provider 熔断:空响应可能只影响当前请求 * 3. 触发故障切换:尝试其他供应商 */ export class EmptyResponseError extends Error { @@ -912,14 +912,14 @@ export function isEmptyResponseError(error: unknown): error is EmptyResponseErro * → 不应重试(重试也会失败) * → 应立即返回错误,提示用户修正输入 * - * 3. 供应商问题(ProxyError - 所有 4xx/5xx HTTP 错误) + * 3. 上游已响应但请求失败(ProxyError - 所有 4xx/5xx HTTP 错误) * → 说明请求到达供应商并得到响应,但供应商无法正常处理 - * → 应计入熔断器,连续失败时触发熔断保护 - * → 应直接切换到其他供应商 + * → 不累计 provider 熔断,避免部分模型/部分请求错误影响整个供应商 + * → 应按重试策略尝试当前供应商或切换到其他供应商 * * 4. 系统/网络问题(fetch 网络异常) * → 包括:DNS 解析失败、连接被拒绝、连接超时、网络中断等 - * → 不应计入供应商熔断器(不是供应商服务不可用) + * → 可按 ENABLE_CIRCUIT_BREAKER_ON_NETWORK_ERRORS 配置累计 provider 熔断 * → 应先重试1次当前供应商(可能是临时网络抖动) * * 此函数会确保错误规则已加载后再进行检测 @@ -954,9 +954,9 @@ export async function categorizeErrorAsync(error: Error): Promise return ErrorCategory.PROVIDER_ERROR; // 其他 HTTP 错误都是供应商问题 } - // 优先级 3.2: 空响应错误 - 计入熔断器 + 触发故障切换 + // 优先级 3.2: 空响应错误 - 触发故障切换,但不累计 provider 熔断 if (error instanceof EmptyResponseError) { - return ErrorCategory.PROVIDER_ERROR; // 空响应视为供应商问题 + return ErrorCategory.PROVIDER_ERROR; // 空响应视为本次供应商请求失败 } // 优先级 4: 其他所有错误都是系统错误 diff --git a/src/app/v1/_lib/proxy/forwarder.ts b/src/app/v1/_lib/proxy/forwarder.ts index a0d7bc66d..351e5b20c 100644 --- a/src/app/v1/_lib/proxy/forwarder.ts +++ b/src/app/v1/_lib/proxy/forwarder.ts @@ -1733,7 +1733,7 @@ export class ProxyForwarder { } } else { logger.debug( - "ProxyForwarder: Network error not counted towards circuit breaker (disabled by default)", + "ProxyForwarder: Network error not counted towards circuit breaker (disabled by config)", { providerId: currentProvider.id, providerName: currentProvider.name, @@ -1816,7 +1816,7 @@ export class ProxyForwarder { break; // ⭐ 跳出内层循环,进入供应商切换逻辑 } - // ⭐ 6. 供应商错误处理(所有 4xx/5xx HTTP 错误 + 空响应错误,计入熔断器,重试耗尽后切换) + // ⭐ 6. 供应商错误处理(所有 4xx/5xx HTTP 错误 + 空响应错误,重试耗尽后切换) if (errorCategory === ErrorCategory.PROVIDER_ERROR) { // 🆕 空响应错误特殊处理(EmptyResponseError 不是 ProxyError) if (isEmptyResponseError(lastError)) { @@ -1844,7 +1844,7 @@ export class ProxyForwarder { attemptNumber: attemptCount, rawCrossProviderFallbackEnabled: rawCrossProviderFallbackEnabled || undefined, errorMessage: emptyError.message, - circuitFailureCount: health.failureCount + 1, + circuitFailureCount: health.failureCount, circuitFailureThreshold: config.failureThreshold, statusCode: 520, // Web Server Returned an Unknown Error errorDetails: { @@ -1877,12 +1877,7 @@ export class ProxyForwarder { continue; } - // 重试耗尽:计入熔断器并切换供应商 - if (!session.isProbeRequest()) { - if (shouldAccountCircuitBreaker) { - await recordFailure(currentProvider.id, lastError); - } - } + // 重试耗尽:空响应说明上游可达但本次响应不可用,不累计 provider 熔断。 ProxyForwarder.markProviderFailed(session, failedProviderIds, currentProvider.id); break; // 跳出内层循环,进入供应商切换逻辑 @@ -1892,6 +1887,7 @@ export class ProxyForwarder { const proxyError = lastError as ProxyError; const statusCode = proxyError.statusCode; const willRetry = attemptCount < maxAttemptsPerProvider; + const shouldRecordProviderCircuitFailure = statusCode === 524; if ( !isMcpRequest && @@ -1975,7 +1971,8 @@ export class ProxyForwarder { attemptNumber: attemptCount, rawCrossProviderFallbackEnabled: rawCrossProviderFallbackEnabled || undefined, errorMessage: errorMessage, - circuitFailureCount: health.failureCount + 1, // 包含本次失败 + circuitFailureCount: + health.failureCount + (shouldRecordProviderCircuitFailure ? 1 : 0), circuitFailureThreshold: config.failureThreshold, statusCode: statusCode, statusCodeInferred: proxyError.upstreamError?.statusCodeInferred ?? false, @@ -2015,7 +2012,8 @@ export class ProxyForwarder { continue; } - // ⭐ 重试耗尽:只有非探测请求才计入熔断器 + // ⭐ 重试耗尽:只有连接/超时类失败才累计 provider 熔断。 + // HTTP 500/502 等说明上游可达,可能只是部分请求或模型错误,不应影响整个供应商。 if (session.isProbeRequest()) { logger.debug("ProxyForwarder: Probe request error, skipping circuit breaker", { providerId: currentProvider.id, @@ -2023,7 +2021,7 @@ export class ProxyForwarder { messagesCount: session.getMessagesLength(), }); } else { - if (shouldAccountCircuitBreaker) { + if (shouldRecordProviderCircuitFailure && shouldAccountCircuitBreaker) { await recordFailure(currentProvider.id, lastError); } } @@ -3781,7 +3779,7 @@ export class ProxyForwarder { attempts.delete(attempt); ProxyForwarder.markProviderFailed(session, failedProviderIds, attempt.provider.id); - if (errorCategory === ErrorCategory.PROVIDER_ERROR && statusCode !== 404) { + if (errorCategory === ErrorCategory.PROVIDER_ERROR && statusCode === 524) { await recordFailure(attempt.provider.id, error); } diff --git a/src/app/v1/_lib/proxy/response-handler.ts b/src/app/v1/_lib/proxy/response-handler.ts index 4d8662f37..c6fdf2cad 100644 --- a/src/app/v1/_lib/proxy/response-handler.ts +++ b/src/app/v1/_lib/proxy/response-handler.ts @@ -461,7 +461,7 @@ type FinalizeDeferredStreamingResult = { * - Forwarder 收到 Response 且识别为 SSE 时,会在 session 上挂载 DeferredStreamingFinalization 元信息。 * - ResponseHandler 在后台读取完整 SSE 内容后,调用本函数: * - 如果内容看起来是上游错误 JSON(假 200),则: - * - 计入熔断器失败; + * - 记录本次请求失败,但不累计 provider 熔断; * - 不更新 session 智能绑定(避免把会话粘到坏 provider); * - 内部状态码改为“推断得到的 4xx/5xx”(未命中则回退 502), * 仅影响统计与后续重试选择,不影响本次客户端响应。 @@ -587,30 +587,16 @@ async function finalizeDeferredStreamingFinalizationIfNeeded( // 未自然结束:不更新 session 绑定(避免把会话粘到不稳定 provider),但要避免把它误记为 200 completed。 // - // 同时,为了让故障转移/熔断能正确工作: + // 同时,为了让故障转移能正确工作: // - 客户端主动中断:不计入熔断器(这通常不是供应商问题) - // - 非客户端中断:计入 provider/endpoint 熔断失败(与 timeout 路径保持一致) + // - 非客户端中断:记录为本次请求失败,但不累计 provider 熔断; + // 已经建立上游响应流的错误可能只是部分请求/模型异常,不应影响整个供应商。 if (!streamEndedNormally) { await clearSessionBinding(); - if (!clientAborted && session.getEndpointPolicy().allowCircuitBreakerAccounting) { - try { - // 动态导入:避免 proxy 模块与熔断器模块之间潜在的循环依赖。 - const { recordFailure } = await import("@/lib/circuit-breaker"); - await recordFailure(meta.providerId, new Error(errorMessage ?? "STREAM_ABORTED")); - } catch (cbError) { - logger.warn("[ResponseHandler] Failed to record streaming failure in circuit breaker", { - providerId: meta.providerId, - sessionId: session.sessionId ?? null, - error: cbError, - }); - } - - // NOTE: Do NOT call recordEndpointFailure here. Stream aborts are key-level - // errors (auth, rate limit, bad key). The endpoint itself delivered HTTP 200 - // successfully. Only forwarder-level failures (timeout, network error) and - // probe failures should penalize the endpoint circuit breaker. - } + // NOTE: Do NOT call recordFailure or recordEndpointFailure here. Stream aborts + // happen after the endpoint has already responded; forwarder-level connection + // failures remain the path that can affect provider/endpoint circuit state. session.addProviderToChain(providerForChain, { endpointId: meta.endpointId, @@ -640,22 +626,8 @@ async function finalizeDeferredStreamingFinalizationIfNeeded( const chainReason = effectiveStatusCode === 404 ? "resource_not_found" : "retry_failed"; - // 计入熔断器:让后续请求能正确触发故障转移/熔断。 - // - // 注意:404 语义在 forwarder 中属于 RESOURCE_NOT_FOUND,不计入熔断器(避免把“资源/模型不存在”当作供应商故障)。 - if (effectiveStatusCode !== 404 && session.getEndpointPolicy().allowCircuitBreakerAccounting) { - try { - // 动态导入:避免 proxy 模块与熔断器模块之间潜在的循环依赖。 - const { recordFailure } = await import("@/lib/circuit-breaker"); - await recordFailure(meta.providerId, new Error(detected.code)); - } catch (cbError) { - logger.warn("[ResponseHandler] Failed to record fake-200 error in circuit breaker", { - providerId: meta.providerId, - sessionId: session.sessionId ?? null, - error: cbError, - }); - } - } + // 假 200 代表请求已到达上游并拿到响应,可能只是部分请求或模型返回错误。 + // 这里保留本次失败和供应商切换信息,但不累计 provider 熔断。 // NOTE: Do NOT call recordEndpointFailure here. Fake-200 errors are key-level // issues (invalid key, auth failure). The endpoint returned HTTP 200 successfully; @@ -692,20 +664,8 @@ async function finalizeDeferredStreamingFinalizationIfNeeded( const chainReason = effectiveStatusCode === 404 ? "resource_not_found" : "retry_failed"; - // 计入熔断器:让后续请求能正确触发故障转移/熔断。 - // 注意:与 forwarder 口径保持一致:404 不计入熔断器(资源不存在不是供应商故障)。 - if (effectiveStatusCode !== 404 && session.getEndpointPolicy().allowCircuitBreakerAccounting) { - try { - const { recordFailure } = await import("@/lib/circuit-breaker"); - await recordFailure(meta.providerId, new Error(errorMessage)); - } catch (cbError) { - logger.warn("[ResponseHandler] Failed to record non-200 error in circuit breaker", { - providerId: meta.providerId, - sessionId: session.sessionId ?? null, - error: cbError, - }); - } - } + // 非 200 HTTP 响应说明上游可达;例如 500/502 可能只影响当前请求。 + // 这里不累计 provider 熔断,避免健康供应商因个别请求被整体排除。 // NOTE: Do NOT call recordEndpointFailure here. Non-200 HTTP errors (401, 429, // etc.) are typically key/auth-level errors. The endpoint was reachable and @@ -963,28 +923,12 @@ export class ProxyResponseHandler { }); } - // 非200状态码处理:解析错误响应并计入熔断器 + // 非200状态码处理:解析错误响应并记录到决策链。 let errorMessageForFinalize: string | undefined; if (statusCode >= 400) { const detected = detectUpstreamErrorFromSseOrJsonText(responseText); errorMessageForFinalize = detected.isError ? detected.code : `HTTP ${statusCode}`; - // 计入熔断器 - if (session.getEndpointPolicy().allowCircuitBreakerAccounting) { - try { - const { recordFailure } = await import("@/lib/circuit-breaker"); - await recordFailure(provider.id, new Error(errorMessageForFinalize)); - } catch (cbError) { - logger.warn( - "ResponseHandler: Failed to record non-200 error in circuit breaker (passthrough)", - { - providerId: provider.id, - error: cbError, - } - ); - } - } - // 记录到决策链 session.addProviderToChain(provider, { reason: "retry_failed", @@ -1358,24 +1302,11 @@ export class ProxyResponseHandler { }); } - // 非200状态码处理:解析错误响应并计入熔断器 + // 非200状态码处理:解析错误响应并记录到决策链。 if (statusCode >= 400) { const detected = detectUpstreamErrorFromSseOrJsonText(responseText); const errorMessageForDb = detected.isError ? detected.code : `HTTP ${statusCode}`; - // 计入熔断器 - if (session.getEndpointPolicy().allowCircuitBreakerAccounting) { - try { - const { recordFailure } = await import("@/lib/circuit-breaker"); - await recordFailure(provider.id, new Error(errorMessageForDb)); - } catch (cbError) { - logger.warn("ResponseHandler: Failed to record non-200 error in circuit breaker", { - providerId: provider.id, - error: cbError, - }); - } - } - // 记录到决策链 session.addProviderToChain(provider, { reason: "retry_failed", diff --git a/src/app/v1/_lib/proxy/session.ts b/src/app/v1/_lib/proxy/session.ts index 692e695d2..91a518cef 100644 --- a/src/app/v1/_lib/proxy/session.ts +++ b/src/app/v1/_lib/proxy/session.ts @@ -562,8 +562,8 @@ export class ProxySession { | "concurrent_limit_failed" | "request_success" // 修复:添加 request_success | "retry_success" - | "retry_failed" // 供应商错误(已计入熔断器) - | "system_error" // 系统/网络错误(不计入熔断器) + | "retry_failed" // 供应商错误(记录本次失败,通常不累计 provider 熔断) + | "system_error" // 系统/网络错误(按网络错误配置决定是否累计 provider 熔断) | "resource_not_found" // 上游 404 错误(不计入熔断器,仅切换供应商) | "retry_with_official_instructions" // Codex instructions 自动重试(官方) | "retry_with_cached_instructions" // Codex instructions 智能重试(缓存) diff --git a/src/lib/config/env.schema.ts b/src/lib/config/env.schema.ts index e9d3b431d..938e37bae 100644 --- a/src/lib/config/env.schema.ts +++ b/src/lib/config/env.schema.ts @@ -110,7 +110,7 @@ export const EnvSchema = z.object({ DEBUG_MODE: z.string().default("false").transform(booleanTransform), LOG_LEVEL: z.enum(["fatal", "error", "warn", "info", "debug", "trace"]).default("info"), TZ: z.string().default("Asia/Shanghai"), - ENABLE_CIRCUIT_BREAKER_ON_NETWORK_ERRORS: z.string().default("false").transform(booleanTransform), + ENABLE_CIRCUIT_BREAKER_ON_NETWORK_ERRORS: z.string().default("true").transform(booleanTransform), // 端点级别熔断器开关 // - false (默认):禁用端点熔断器,所有端点均可使用 // - true:启用端点熔断器,连续失败的端点会被临时屏蔽 diff --git a/tests/unit/proxy/client-abort-vs-upstream-499.test.ts b/tests/unit/proxy/client-abort-vs-upstream-499.test.ts index 326ecd0fa..a449ecda9 100644 --- a/tests/unit/proxy/client-abort-vs-upstream-499.test.ts +++ b/tests/unit/proxy/client-abort-vs-upstream-499.test.ts @@ -4,7 +4,7 @@ * Validates that isClientAbortError() and categorizeErrorAsync() correctly * distinguish between: * - Local client disconnection (CCH synthesized 499) -> CLIENT_ABORT - * - Upstream HTTP 499 response -> PROVIDER_ERROR (triggers fallback/circuit-breaker) + * - Upstream HTTP 499 response -> PROVIDER_ERROR (triggers retry/fallback) */ import { describe, expect, it } from "vitest"; import { diff --git a/tests/unit/proxy/proxy-forwarder-fake-200-html.test.ts b/tests/unit/proxy/proxy-forwarder-fake-200-html.test.ts index 8b57d3915..03988d916 100644 --- a/tests/unit/proxy/proxy-forwarder-fake-200-html.test.ts +++ b/tests/unit/proxy/proxy-forwarder-fake-200-html.test.ts @@ -244,14 +244,7 @@ describe("ProxyForwarder - fake 200 HTML body", () => { expect(doForward.mock.calls[1][1].id).toBe(2); expect(mocks.pickRandomProviderWithExclusion).toHaveBeenCalledWith(session, [1]); - expect(mocks.recordFailure).toHaveBeenCalledWith( - 1, - expect.objectContaining({ message: "FAKE_200_HTML_BODY" }) - ); - const failure1 = mocks.recordFailure.mock.calls[0]?.[1]; - expect(failure1).toBeInstanceOf(ProxyError); - expect((failure1 as ProxyError).getClientSafeMessage()).toContain("HTML document"); - expect((failure1 as ProxyError).getClientSafeMessage()).toContain("Upstream detail:"); + expect(mocks.recordFailure).not.toHaveBeenCalled(); expect(mocks.recordSuccess).toHaveBeenCalledWith(2); expect(mocks.recordSuccess).not.toHaveBeenCalledWith(1); }); @@ -299,17 +292,7 @@ describe("ProxyForwarder - fake 200 HTML body", () => { expect(doForward.mock.calls[1][1].id).toBe(2); expect(mocks.pickRandomProviderWithExclusion).toHaveBeenCalledWith(session, [1]); - expect(mocks.recordFailure).toHaveBeenCalledWith( - 1, - expect.objectContaining({ message: "FAKE_200_JSON_ERROR_NON_EMPTY" }) - ); - const failure2 = mocks.recordFailure.mock.calls[0]?.[1]; - expect(failure2).toBeInstanceOf(ProxyError); - expect((failure2 as ProxyError).getClientSafeMessage()).toContain("JSON body"); - expect((failure2 as ProxyError).getClientSafeMessage()).toContain("`error`"); - expect((failure2 as ProxyError).getClientSafeMessage()).toContain("upstream blocked"); - expect((failure2 as ProxyError).upstreamError?.rawBody).toBe(jsonErrorBody); - expect((failure2 as ProxyError).upstreamError?.rawBodyTruncated).toBe(false); + expect(mocks.recordFailure).not.toHaveBeenCalled(); expect(mocks.recordSuccess).toHaveBeenCalledWith(2); expect(mocks.recordSuccess).not.toHaveBeenCalledWith(1); }); @@ -356,17 +339,7 @@ describe("ProxyForwarder - fake 200 HTML body", () => { expect(doForward.mock.calls[1][1].id).toBe(2); expect(mocks.pickRandomProviderWithExclusion).toHaveBeenCalledWith(session, [1]); - expect(mocks.recordFailure).toHaveBeenCalledWith( - 1, - expect.objectContaining({ message: "FAKE_200_JSON_ERROR_NON_EMPTY" }) - ); - const failure3 = mocks.recordFailure.mock.calls[0]?.[1]; - expect(failure3).toBeInstanceOf(ProxyError); - expect((failure3 as ProxyError).getClientSafeMessage()).toContain("JSON body"); - expect((failure3 as ProxyError).getClientSafeMessage()).toContain("`error`"); - expect((failure3 as ProxyError).getClientSafeMessage()).toContain("upstream blocked"); - expect((failure3 as ProxyError).upstreamError?.rawBody).toBe(jsonErrorBody); - expect((failure3 as ProxyError).upstreamError?.rawBodyTruncated).toBe(false); + expect(mocks.recordFailure).not.toHaveBeenCalled(); expect(mocks.recordSuccess).toHaveBeenCalledWith(2); expect(mocks.recordSuccess).not.toHaveBeenCalledWith(1); }); @@ -408,15 +381,7 @@ describe("ProxyForwarder - fake 200 HTML body", () => { const response = await ProxyForwarder.send(session); expect(await response.text()).toContain("ok"); - expect(mocks.recordFailure).toHaveBeenCalledWith( - 1, - expect.objectContaining({ message: "FAKE_200_JSON_ERROR_NON_EMPTY" }) - ); - - const failure = mocks.recordFailure.mock.calls[0]?.[1]; - expect(failure).toBeInstanceOf(ProxyError); - expect((failure as ProxyError).statusCode).toBe(429); - expect((failure as ProxyError).upstreamError?.statusCodeInferred).toBe(true); + expect(mocks.recordFailure).not.toHaveBeenCalled(); const chain = session.getProviderChain(); expect( @@ -474,14 +439,7 @@ describe("ProxyForwarder - fake 200 HTML body", () => { expect(doForward.mock.calls[1][1].id).toBe(2); expect(mocks.pickRandomProviderWithExclusion).toHaveBeenCalledWith(session, [1]); - expect(mocks.recordFailure).toHaveBeenCalledWith( - 1, - expect.objectContaining({ message: "FAKE_200_HTML_BODY" }) - ); - - const failure = mocks.recordFailure.mock.calls[0]?.[1]; - expect(failure).toBeInstanceOf(ProxyError); - expect((failure as ProxyError).upstreamError?.rawBody).toBe(htmlErrorBody); + expect(mocks.recordFailure).not.toHaveBeenCalled(); expect(mocks.recordSuccess).toHaveBeenCalledWith(2); expect(mocks.recordSuccess).not.toHaveBeenCalledWith(1); }); @@ -528,10 +486,7 @@ describe("ProxyForwarder - fake 200 HTML body", () => { expect(doForward.mock.calls[1][1].id).toBe(2); expect(mocks.pickRandomProviderWithExclusion).toHaveBeenCalledWith(session, [1]); - expect(mocks.recordFailure).toHaveBeenCalledWith( - 1, - expect.objectContaining({ reason: "missing_content" }) - ); + expect(mocks.recordFailure).not.toHaveBeenCalled(); expect(mocks.recordSuccess).toHaveBeenCalledWith(2); expect(mocks.recordSuccess).not.toHaveBeenCalledWith(1); }); diff --git a/tests/unit/proxy/response-handler-endpoint-circuit-isolation.test.ts b/tests/unit/proxy/response-handler-endpoint-circuit-isolation.test.ts index 05b6bfa7a..f8892a2e3 100644 --- a/tests/unit/proxy/response-handler-endpoint-circuit-isolation.test.ts +++ b/tests/unit/proxy/response-handler-endpoint-circuit-isolation.test.ts @@ -351,7 +351,7 @@ describe("Endpoint circuit breaker isolation", () => { setupCommonMocks(); }); - it("fake-200 error should call recordFailure but NOT recordEndpointFailure", async () => { + it("fake-200 error should NOT call provider or endpoint circuit breaker failure", async () => { const session = createSession(); setDeferredMeta(session, 42); @@ -359,10 +359,7 @@ describe("Endpoint circuit breaker isolation", () => { await ProxyResponseHandler.dispatch(session, response); await drainAsyncTasks(); - expect(mockRecordFailure).toHaveBeenCalledWith( - 1, - expect.objectContaining({ message: expect.stringContaining("FAKE_200") }) - ); + expect(mockRecordFailure).not.toHaveBeenCalled(); expect(mockRecordEndpointFailure).not.toHaveBeenCalled(); expect(SessionManager.clearSessionProvider).toHaveBeenCalledWith("fake-session"); @@ -378,7 +375,7 @@ describe("Endpoint circuit breaker isolation", () => { ).toBe(true); }); - it("高并发模式下,fake-200 流式错误仍应记录核心失败,但跳过 session 观测写入", async () => { + it("高并发模式下,fake-200 流式错误仍应记录核心失败,但不累计熔断", async () => { const session = createSession(); session.setHighConcurrencyModeEnabled(true); setDeferredMeta(session, 42); @@ -387,10 +384,7 @@ describe("Endpoint circuit breaker isolation", () => { await ProxyResponseHandler.dispatch(session, response); await drainAsyncTasks(); - expect(mockRecordFailure).toHaveBeenCalledWith( - 1, - expect.objectContaining({ message: expect.stringContaining("FAKE_200") }) - ); + expect(mockRecordFailure).not.toHaveBeenCalled(); expect(mockRecordEndpointFailure).not.toHaveBeenCalled(); expect(SessionManager.clearSessionProvider).toHaveBeenCalledWith("fake-session"); expect(SessionManager.updateSessionUsage).not.toHaveBeenCalled(); @@ -421,7 +415,7 @@ describe("Endpoint circuit breaker isolation", () => { ).toBe(true); }); - it("non-200 HTTP status should call recordFailure but NOT recordEndpointFailure", async () => { + it("non-200 HTTP status should NOT call provider or endpoint circuit breaker failure", async () => { const session = createSession(); // Set upstream status to 429 in deferred meta setDeferredStreamingFinalization(session, { @@ -441,7 +435,7 @@ describe("Endpoint circuit breaker isolation", () => { await ProxyResponseHandler.dispatch(session, response); await drainAsyncTasks(); - expect(mockRecordFailure).toHaveBeenCalledWith(1, expect.any(Error)); + expect(mockRecordFailure).not.toHaveBeenCalled(); expect(mockRecordEndpointFailure).not.toHaveBeenCalled(); }); diff --git a/tests/unit/proxy/response-handler-non200.test.ts b/tests/unit/proxy/response-handler-non200.test.ts index 74e0bc909..ad3757299 100644 --- a/tests/unit/proxy/response-handler-non200.test.ts +++ b/tests/unit/proxy/response-handler-non200.test.ts @@ -2,7 +2,7 @@ * Tests for non-200 status code handling in response-handler.ts * * Verifies that: - * - Non-200 responses trigger circuit breaker recording + * - Non-200 responses do not trigger provider circuit breaker recording * - JSON error responses are parsed correctly * - Provider chain is updated with error info * - Error messages are captured for logging @@ -267,7 +267,7 @@ describe("Non-200 Status Code Handling", () => { }); describe("handleNonStream with non-200 status code", () => { - it("should record failure in circuit breaker for 500 status", async () => { + it("should not record provider circuit breaker failure for 500 status", async () => { const session = createSession({ provider: mockProvider, messageContext: mockMessageContext, @@ -280,8 +280,6 @@ describe("Non-200 Status Code Handling", () => { const detected = detectUpstreamErrorFromSseOrJsonText(responseText); const errorMessageForDb = detected.isError ? detected.code : `HTTP ${statusCode}`; - await mockRecordFailure(mockProvider.id, new Error(errorMessageForDb)); - session.addProviderToChain(mockProvider, { reason: "retry_failed", attemptNumber: 1, @@ -290,10 +288,7 @@ describe("Non-200 Status Code Handling", () => { }); } - expect(mockRecordFailure).toHaveBeenCalledWith( - mockProvider.id, - expect.objectContaining({ message: "FAKE_200_JSON_ERROR_NON_EMPTY" }) - ); + expect(mockRecordFailure).not.toHaveBeenCalled(); const chain = session.getProviderChain(); expect(chain.length).toBeGreaterThan(0); @@ -313,8 +308,6 @@ describe("Non-200 Status Code Handling", () => { const detected = detectUpstreamErrorFromSseOrJsonText(responseText); const errorMessageForDb = detected.isError ? detected.code : `HTTP ${statusCode}`; - await mockRecordFailure(mockProvider.id, new Error(errorMessageForDb)); - session.addProviderToChain(mockProvider, { reason: "retry_failed", attemptNumber: 1, @@ -323,10 +316,7 @@ describe("Non-200 Status Code Handling", () => { }); } - expect(mockRecordFailure).toHaveBeenCalledWith( - mockProvider.id, - expect.objectContaining({ message: "HTTP 401" }) - ); + expect(mockRecordFailure).not.toHaveBeenCalled(); }); it("should handle 400 status with JSON error", async () => { @@ -342,8 +332,6 @@ describe("Non-200 Status Code Handling", () => { const detected = detectUpstreamErrorFromSseOrJsonText(responseText); const errorMessageForDb = detected.isError ? detected.code : `HTTP ${statusCode}`; - await mockRecordFailure(mockProvider.id, new Error(errorMessageForDb)); - session.addProviderToChain(mockProvider, { reason: "retry_failed", attemptNumber: 1, @@ -352,10 +340,7 @@ describe("Non-200 Status Code Handling", () => { }); } - expect(mockRecordFailure).toHaveBeenCalledWith( - mockProvider.id, - expect.objectContaining({ message: "FAKE_200_JSON_ERROR_MESSAGE_NON_EMPTY" }) - ); + expect(mockRecordFailure).not.toHaveBeenCalled(); }); it("should handle 429 rate limit error", async () => { @@ -371,8 +356,6 @@ describe("Non-200 Status Code Handling", () => { const detected = detectUpstreamErrorFromSseOrJsonText(responseText); const errorMessageForDb = detected.isError ? detected.code : `HTTP ${statusCode}`; - await mockRecordFailure(mockProvider.id, new Error(errorMessageForDb)); - session.addProviderToChain(mockProvider, { reason: "retry_failed", attemptNumber: 1, @@ -381,10 +364,7 @@ describe("Non-200 Status Code Handling", () => { }); } - expect(mockRecordFailure).toHaveBeenCalledWith( - mockProvider.id, - expect.objectContaining({ message: "FAKE_200_JSON_ERROR_NON_EMPTY" }) - ); + expect(mockRecordFailure).not.toHaveBeenCalled(); }); });