Skip to content

Commit 1cfcf33

Browse files
jahoomaclaude
andcommitted
Route deepseek-v4-flash to Fireworks when DeepSeek API is unhealthy
Adds Fireworks as a transparent fallback for deepseek-v4-flash, gated by a passive circuit breaker so we only divert when the official DeepSeek API actually misbehaves. - New deepseek-health.ts circuit breaker: 3 failures in 60s opens the circuit for 5 min; the next request after expiry probes DeepSeek again and resets on success. No background polling — every user request is itself the probe. - Tighter 60s headersTimeout for the Flash undici agent so dead-API requests fail fast (the existing 30-min default is kept for reasoning models on v4-pro). - handleDeepSeek{Stream,NonStream} now wrap the fetch call so network errors, timeouts, and 5xx/408/429 responses feed the breaker; 2xx resets it. - _post.ts routes to Fireworks when the circuit is open and adds inline pre-stream failover so the first user to hit an outage also gets a Fireworks response instead of an error. - Adds accounts/fireworks/models/deepseek-v4-flash to the Fireworks model + pricing maps. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 547e061 commit 1cfcf33

5 files changed

Lines changed: 405 additions & 5 deletions

File tree

web/src/app/api/v1/chat/completions/_post.ts

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ import {
6767
handleDeepSeekStream,
6868
isDeepSeekModel,
6969
} from '@/llm-api/deepseek'
70+
import {
71+
isLikelyDeepSeekOutage,
72+
shouldBypassDeepSeek,
73+
} from '@/llm-api/deepseek-health'
7074
import {
7175
handleMoonshotNonStream,
7276
handleMoonshotStream,
@@ -118,6 +122,23 @@ import { withDefaultProperties } from '@codebuff/common/analytics'
118122
import { checkFreeModeRateLimit as defaultCheckFreeModeRateLimit } from './free-mode-rate-limiter'
119123
import { beginChatCompletionRequestMetrics } from './request-metrics'
120124

125+
/**
126+
* Decide whether a failed DeepSeek request should transparently fail over to
127+
* Fireworks. Pre-stream errors (network/timeout/5xx) on a model that has a
128+
* known Fireworks fallback are eligible. The circuit-breaker failure was
129+
* already recorded inside the DeepSeek handler.
130+
*/
131+
function canFailoverDeepSeekToFireworks(
132+
error: unknown,
133+
model: string,
134+
): boolean {
135+
if (!isFireworksModel(model)) return false
136+
if (error instanceof DeepSeekError) {
137+
return isLikelyDeepSeekOutage(undefined, error.statusCode)
138+
}
139+
return isLikelyDeepSeekOutage(error)
140+
}
141+
121142
export const formatQuotaResetCountdown = (
122143
nextQuotaReset: string | null | undefined,
123144
): string => {
@@ -814,10 +835,18 @@ export async function postChatCompletions(params: {
814835
const useMoonshot = !useOpenCodeZen && isMoonshotModel(typedBody.model)
815836
const useCanopyWave =
816837
!useMoonshot && !useOpenCodeZen && isCanopyWaveModel(typedBody.model)
838+
const deepseekBypassed = shouldBypassDeepSeek(typedBody.model)
839+
if (deepseekBypassed) {
840+
providerLogger.info(
841+
{ model: typedBody.model },
842+
'DeepSeek circuit open — routing to Fireworks fallback',
843+
)
844+
}
817845
const useDeepSeek =
818846
!useMoonshot &&
819847
!useOpenCodeZen &&
820848
!useCanopyWave &&
849+
!deepseekBypassed &&
821850
isDeepSeekModel(typedBody.model)
822851
const useFireworks =
823852
!useMoonshot &&
@@ -841,6 +870,23 @@ export async function postChatCompletions(params: {
841870
logger: providerLogger,
842871
insertMessageBigquery,
843872
}
873+
const callDeepSeekStream = async () => {
874+
try {
875+
return await handleDeepSeekStream(baseArgs)
876+
} catch (error) {
877+
if (canFailoverDeepSeekToFireworks(error, typedBody.model)) {
878+
providerLogger.warn(
879+
{
880+
model: typedBody.model,
881+
error: getErrorObject(error),
882+
},
883+
'DeepSeek failed pre-stream — falling back to Fireworks',
884+
)
885+
return await handleFireworksStream(baseArgs)
886+
}
887+
throw error
888+
}
889+
}
844890
const stream = useSiliconFlow
845891
? await handleSiliconFlowStream(baseArgs)
846892
: useMoonshot
@@ -850,7 +896,7 @@ export async function postChatCompletions(params: {
850896
: useCanopyWave
851897
? await handleCanopyWaveStream(baseArgs)
852898
: useDeepSeek
853-
? await handleDeepSeekStream(baseArgs)
899+
? await callDeepSeekStream()
854900
: useFireworks
855901
? await handleFireworksStream(baseArgs)
856902
: useOpenAIDirect
@@ -886,10 +932,18 @@ export async function postChatCompletions(params: {
886932
const useMoonshot = !useOpenCodeZen && isMoonshotModel(model)
887933
const useCanopyWave =
888934
!useMoonshot && !useOpenCodeZen && isCanopyWaveModel(model)
935+
const deepseekBypassed = shouldBypassDeepSeek(model)
936+
if (deepseekBypassed) {
937+
providerLogger.info(
938+
{ model },
939+
'DeepSeek circuit open — routing to Fireworks fallback',
940+
)
941+
}
889942
const useDeepSeek =
890943
!useMoonshot &&
891944
!useOpenCodeZen &&
892945
!useCanopyWave &&
946+
!deepseekBypassed &&
893947
isDeepSeekModel(model)
894948
const useFireworks =
895949
!useMoonshot &&
@@ -914,6 +968,20 @@ export async function postChatCompletions(params: {
914968
logger: providerLogger,
915969
insertMessageBigquery,
916970
}
971+
const callDeepSeekNonStream = async () => {
972+
try {
973+
return await handleDeepSeekNonStream(baseArgs)
974+
} catch (error) {
975+
if (canFailoverDeepSeekToFireworks(error, model)) {
976+
providerLogger.warn(
977+
{ model, error: getErrorObject(error) },
978+
'DeepSeek failed — falling back to Fireworks',
979+
)
980+
return await handleFireworksNonStream(baseArgs)
981+
}
982+
throw error
983+
}
984+
}
917985
const nonStreamRequest = useSiliconFlow
918986
? handleSiliconFlowNonStream(baseArgs)
919987
: useMoonshot
@@ -923,7 +991,7 @@ export async function postChatCompletions(params: {
923991
: useCanopyWave
924992
? handleCanopyWaveNonStream(baseArgs)
925993
: useDeepSeek
926-
? handleDeepSeekNonStream(baseArgs)
994+
? callDeepSeekNonStream()
927995
: useFireworks
928996
? handleFireworksNonStream(baseArgs)
929997
: shouldUseOpenAIEndpoint
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import { afterEach, beforeEach, describe, expect, it } from 'bun:test'
2+
3+
import { deepseekModels } from '@codebuff/common/constants/model-config'
4+
5+
import {
6+
DEEPSEEK_CIRCUIT_CONFIG,
7+
__resetDeepSeekCircuitForTests,
8+
isDeepSeekCircuitOpen,
9+
isLikelyDeepSeekOutage,
10+
recordDeepSeekFailure,
11+
recordDeepSeekSuccess,
12+
shouldBypassDeepSeek,
13+
} from '../deepseek-health'
14+
15+
describe('DeepSeek circuit breaker', () => {
16+
beforeEach(() => {
17+
__resetDeepSeekCircuitForTests()
18+
})
19+
afterEach(() => {
20+
__resetDeepSeekCircuitForTests()
21+
})
22+
23+
it('starts closed', () => {
24+
expect(isDeepSeekCircuitOpen()).toBe(false)
25+
expect(shouldBypassDeepSeek(deepseekModels.deepseekV4Flash)).toBe(false)
26+
})
27+
28+
it('stays closed after fewer failures than threshold', () => {
29+
for (let i = 0; i < DEEPSEEK_CIRCUIT_CONFIG.FAILURE_THRESHOLD - 1; i++) {
30+
recordDeepSeekFailure()
31+
}
32+
expect(isDeepSeekCircuitOpen()).toBe(false)
33+
})
34+
35+
it('opens after threshold failures in the window', () => {
36+
for (let i = 0; i < DEEPSEEK_CIRCUIT_CONFIG.FAILURE_THRESHOLD; i++) {
37+
recordDeepSeekFailure()
38+
}
39+
expect(isDeepSeekCircuitOpen()).toBe(true)
40+
})
41+
42+
it('only bypasses v4-flash variants, not v4-pro', () => {
43+
for (let i = 0; i < DEEPSEEK_CIRCUIT_CONFIG.FAILURE_THRESHOLD; i++) {
44+
recordDeepSeekFailure()
45+
}
46+
expect(shouldBypassDeepSeek(deepseekModels.deepseekV4Flash)).toBe(true)
47+
expect(shouldBypassDeepSeek(deepseekModels.deepseekV4FlashDirect)).toBe(
48+
true,
49+
)
50+
expect(shouldBypassDeepSeek(deepseekModels.deepseekV4Pro)).toBe(false)
51+
expect(shouldBypassDeepSeek(deepseekModels.deepseekV4ProDirect)).toBe(false)
52+
expect(shouldBypassDeepSeek('anthropic/claude-sonnet-4.5')).toBe(false)
53+
})
54+
55+
it('resets on success', () => {
56+
for (let i = 0; i < DEEPSEEK_CIRCUIT_CONFIG.FAILURE_THRESHOLD; i++) {
57+
recordDeepSeekFailure()
58+
}
59+
expect(isDeepSeekCircuitOpen()).toBe(true)
60+
recordDeepSeekSuccess()
61+
expect(isDeepSeekCircuitOpen()).toBe(false)
62+
expect(shouldBypassDeepSeek(deepseekModels.deepseekV4Flash)).toBe(false)
63+
})
64+
})
65+
66+
describe('isLikelyDeepSeekOutage', () => {
67+
it('treats 5xx, 408, 429 as outages', () => {
68+
expect(isLikelyDeepSeekOutage(undefined, 500)).toBe(true)
69+
expect(isLikelyDeepSeekOutage(undefined, 502)).toBe(true)
70+
expect(isLikelyDeepSeekOutage(undefined, 503)).toBe(true)
71+
expect(isLikelyDeepSeekOutage(undefined, 504)).toBe(true)
72+
expect(isLikelyDeepSeekOutage(undefined, 408)).toBe(true)
73+
expect(isLikelyDeepSeekOutage(undefined, 429)).toBe(true)
74+
})
75+
76+
it('does not treat 4xx (other than 408/429) as outages', () => {
77+
expect(isLikelyDeepSeekOutage(undefined, 400)).toBe(false)
78+
expect(isLikelyDeepSeekOutage(undefined, 401)).toBe(false)
79+
expect(isLikelyDeepSeekOutage(undefined, 403)).toBe(false)
80+
expect(isLikelyDeepSeekOutage(undefined, 404)).toBe(false)
81+
})
82+
83+
it('classifies undici header-timeout errors as outages', () => {
84+
const error = Object.assign(new Error('Headers Timeout Error'), {
85+
code: 'UND_ERR_HEADERS_TIMEOUT',
86+
})
87+
expect(isLikelyDeepSeekOutage(error)).toBe(true)
88+
})
89+
90+
it('classifies common network errors as outages', () => {
91+
for (const code of [
92+
'ECONNRESET',
93+
'ECONNREFUSED',
94+
'ENOTFOUND',
95+
'ETIMEDOUT',
96+
]) {
97+
const error = Object.assign(new Error('boom'), { code })
98+
expect(isLikelyDeepSeekOutage(error)).toBe(true)
99+
}
100+
})
101+
102+
it('classifies AbortError as outage', () => {
103+
const error = new Error('aborted')
104+
error.name = 'AbortError'
105+
expect(isLikelyDeepSeekOutage(error)).toBe(true)
106+
})
107+
108+
it('treats generic non-network errors as non-outage', () => {
109+
expect(isLikelyDeepSeekOutage(new Error('bad json'))).toBe(false)
110+
expect(isLikelyDeepSeekOutage(undefined)).toBe(false)
111+
expect(isLikelyDeepSeekOutage('string')).toBe(false)
112+
})
113+
})

0 commit comments

Comments
 (0)