diff --git a/apps/web/src/lib/ai-gateway/providers/anthropic.constants.ts b/apps/web/src/lib/ai-gateway/providers/anthropic.constants.ts index 5a591b871..8272fafb0 100644 --- a/apps/web/src/lib/ai-gateway/providers/anthropic.constants.ts +++ b/apps/web/src/lib/ai-gateway/providers/anthropic.constants.ts @@ -20,6 +20,7 @@ export const claude_sonnet_clawsetup_model: KiloExclusiveModel = { flags: ['reasoning', 'vision'], pricing: null, exclusive_to: [], + inference_provider_restriction: [], }; export function isClaudeModel(requestedModel: string) { diff --git a/apps/web/src/lib/ai-gateway/providers/apply-provider-specific-logic.ts b/apps/web/src/lib/ai-gateway/providers/apply-provider-specific-logic.ts index e347a8038..883c38e91 100644 --- a/apps/web/src/lib/ai-gateway/providers/apply-provider-specific-logic.ts +++ b/apps/web/src/lib/ai-gateway/providers/apply-provider-specific-logic.ts @@ -7,7 +7,7 @@ import type { import { applyMistralModelSettings, isMistralModel } from '@/lib/ai-gateway/providers/mistral'; import { applyXaiModelSettings, isGrokModel } from '@/lib/ai-gateway/providers/xai'; import { kiloExclusiveModels } from '@/lib/ai-gateway/models'; -import { getInferenceProvider } from '@/lib/ai-gateway/providers/kilo-exclusive-model'; +import { applyKiloExclusiveModelSettings } from '@/lib/ai-gateway/providers/kilo-exclusive-model'; import { applyAnthropicModelSettings } from '@/lib/ai-gateway/providers/anthropic'; import { isClaudeModel, isHaikuModel } from '@/lib/ai-gateway/providers/anthropic.constants'; import { OpenRouterInferenceProviderIdSchema } from '@/lib/ai-gateway/providers/openrouter/inference-provider-id'; @@ -112,15 +112,7 @@ export function applyProviderSpecificLogic( ) { const kiloExclusiveModel = kiloExclusiveModels.find(m => m.public_id === requestedModel); if (kiloExclusiveModel) { - requestToMutate.body.model = kiloExclusiveModel.internal_id; - const inferenceProvider = getInferenceProvider(kiloExclusiveModel); - if (inferenceProvider) { - if (requestToMutate.body.provider) { - requestToMutate.body.provider.only = [inferenceProvider]; - } else { - requestToMutate.body.provider = { only: [inferenceProvider] }; - } - } + applyKiloExclusiveModelSettings(requestToMutate, kiloExclusiveModel); } if (isClaudeModel(requestedModel)) { diff --git a/apps/web/src/lib/ai-gateway/providers/google.ts b/apps/web/src/lib/ai-gateway/providers/google.ts index 753fefda1..5ce5bd8b2 100644 --- a/apps/web/src/lib/ai-gateway/providers/google.ts +++ b/apps/web/src/lib/ai-gateway/providers/google.ts @@ -25,6 +25,7 @@ export const gemma_4_26b_a4b_it_free_model: KiloExclusiveModel = { internal_id: 'google/gemma-4-26b-a4b-it', pricing: null, exclusive_to: [], + inference_provider_restriction: [], }; export function isGemini3Model(model: string) { diff --git a/apps/web/src/lib/ai-gateway/providers/kilo-exclusive-model.test.ts b/apps/web/src/lib/ai-gateway/providers/kilo-exclusive-model.test.ts new file mode 100644 index 000000000..4e1098aee --- /dev/null +++ b/apps/web/src/lib/ai-gateway/providers/kilo-exclusive-model.test.ts @@ -0,0 +1,118 @@ +import { describe, it, expect } from '@jest/globals'; +import { + applyKiloExclusiveModelSettings, + type KiloExclusiveModel, +} from '@/lib/ai-gateway/providers/kilo-exclusive-model'; +import type { + GatewayRequest, + OpenRouterChatCompletionRequest, + OpenRouterProviderConfig, +} from '@/lib/ai-gateway/providers/openrouter/types'; +import type { OpenRouterInferenceProviderId } from '@/lib/ai-gateway/providers/openrouter/inference-provider-id'; + +function makeModel( + overrides: Partial & Pick +): KiloExclusiveModel { + return { + public_id: 'kilo/test-model', + display_name: 'Test', + description: '', + context_length: 0, + max_completion_tokens: 0, + status: 'public', + flags: [], + gateway: 'openrouter', + pricing: null, + exclusive_to: [], + inference_provider_restriction: [], + ...overrides, + }; +} + +function makeRequest( + provider?: OpenRouterProviderConfig, + model = 'public/id' +): GatewayRequest & { kind: 'chat_completions' } { + const body: OpenRouterChatCompletionRequest = { + model, + messages: [], + ...(provider ? { provider } : {}), + } as OpenRouterChatCompletionRequest; + return { kind: 'chat_completions', body }; +} + +describe('applyKiloExclusiveModelSettings', () => { + it('rewrites the public model id to the internal id', () => { + const req = makeRequest(undefined, 'kilo/test-model'); + applyKiloExclusiveModelSettings(req, makeModel({ internal_id: 'vendor/real-model' })); + expect(req.body.model).toBe('vendor/real-model'); + }); + + it('leaves provider untouched when there is no restriction', () => { + const req = makeRequest({ only: ['anthropic'], zdr: true }); + applyKiloExclusiveModelSettings(req, makeModel({ internal_id: 'vendor/x' })); + expect(req.body.provider).toEqual({ only: ['anthropic'], zdr: true }); + }); + + it('creates provider.only when no provider block is present', () => { + const req = makeRequest(undefined); + applyKiloExclusiveModelSettings( + req, + makeModel({ + internal_id: 'vendor/x', + inference_provider_restriction: [ + 'anthropic', + 'amazon-bedrock', + ] as OpenRouterInferenceProviderId[], + }) + ); + expect(req.body.provider).toEqual({ only: ['anthropic', 'amazon-bedrock'] }); + }); + + it('adds only to an existing provider block that has no only set', () => { + const req = makeRequest({ zdr: true }); + applyKiloExclusiveModelSettings( + req, + makeModel({ + internal_id: 'vendor/x', + inference_provider_restriction: ['anthropic'] as OpenRouterInferenceProviderId[], + }) + ); + expect(req.body.provider).toEqual({ zdr: true, only: ['anthropic'] }); + }); + + it('intersects caller-supplied only with the restriction', () => { + const req = makeRequest({ only: ['anthropic', 'openai', 'amazon-bedrock'] }); + applyKiloExclusiveModelSettings( + req, + makeModel({ + internal_id: 'vendor/x', + inference_provider_restriction: [ + 'anthropic', + 'amazon-bedrock', + ] as OpenRouterInferenceProviderId[], + }) + ); + expect(req.body.provider?.only?.sort()).toEqual(['amazon-bedrock', 'anthropic']); + }); + + it('produces an empty only list when caller only and restriction are disjoint', () => { + const req = makeRequest({ only: ['openai'] }); + applyKiloExclusiveModelSettings( + req, + makeModel({ + internal_id: 'vendor/x', + inference_provider_restriction: ['anthropic'] as OpenRouterInferenceProviderId[], + }) + ); + expect(req.body.provider?.only).toEqual([]); + }); + + it('does not clone shared configuration when there is no restriction', () => { + const sharedProvider: OpenRouterProviderConfig = { only: ['openai'] }; + const req = makeRequest(sharedProvider); + applyKiloExclusiveModelSettings(req, makeModel({ internal_id: 'vendor/x' })); + expect(req.body.provider).toBe(sharedProvider); + expect(sharedProvider.only).toEqual(['openai']); + }); +}); diff --git a/apps/web/src/lib/ai-gateway/providers/kilo-exclusive-model.ts b/apps/web/src/lib/ai-gateway/providers/kilo-exclusive-model.ts index 87f1244f5..288f65c1f 100644 --- a/apps/web/src/lib/ai-gateway/providers/kilo-exclusive-model.ts +++ b/apps/web/src/lib/ai-gateway/providers/kilo-exclusive-model.ts @@ -4,6 +4,7 @@ import { type OpenRouterInferenceProviderId, } from '@/lib/ai-gateway/providers/openrouter/inference-provider-id'; import type { ProviderId } from '@/lib/ai-gateway/providers/types'; +import type { GatewayRequest } from '@/lib/ai-gateway/providers/openrouter/types'; export type KiloExclusiveModelFlag = 'reasoning' | 'vision' | 'stealth' | 'vercel-routing'; @@ -35,8 +36,33 @@ export type KiloExclusiveModel = { pricing: Pricing | null; /** Features allowed to use this model. Empty array means no restriction. */ exclusive_to: ReadonlyArray; + /** + * Upstream inference providers this model may be routed to; empty means no + * restriction. Only honored by the OpenRouter and Vercel AI Gateway upstreams. + */ + inference_provider_restriction: ReadonlyArray; }; +/** Rewrites a gateway request to target a Kilo-exclusive model. */ +export function applyKiloExclusiveModelSettings( + requestToMutate: GatewayRequest, + kiloExclusiveModel: KiloExclusiveModel +) { + requestToMutate.body.model = kiloExclusiveModel.internal_id; + const restriction = kiloExclusiveModel.inference_provider_restriction; + if (restriction.length === 0) { + return; + } + const provider = requestToMutate.body.provider; + if (provider?.only) { + provider.only = [...new Set(provider.only).intersection(new Set(restriction))]; + } else if (provider) { + provider.only = [...restriction]; + } else { + requestToMutate.body.provider = { only: [...restriction] }; + } +} + export function getInferenceProvider( model: KiloExclusiveModel ): OpenRouterInferenceProviderId | null { diff --git a/apps/web/src/lib/ai-gateway/providers/minimax.ts b/apps/web/src/lib/ai-gateway/providers/minimax.ts index 47d541f8c..645dbf363 100644 --- a/apps/web/src/lib/ai-gateway/providers/minimax.ts +++ b/apps/web/src/lib/ai-gateway/providers/minimax.ts @@ -13,6 +13,7 @@ export const minimax_m25_free_model: KiloExclusiveModel = { internal_id: 'minimax/minimax-m2.5', pricing: null, exclusive_to: [], + inference_provider_restriction: [], }; export function isMinimaxModel(model: string) { diff --git a/apps/web/src/lib/ai-gateway/providers/morph.ts b/apps/web/src/lib/ai-gateway/providers/morph.ts index 702d5bcf8..95ebb2cae 100644 --- a/apps/web/src/lib/ai-gateway/providers/morph.ts +++ b/apps/web/src/lib/ai-gateway/providers/morph.ts @@ -13,4 +13,5 @@ export const morph_warp_grep_free_model: KiloExclusiveModel = { internal_id: 'morph-warp-grep-v2', pricing: null, exclusive_to: [], + inference_provider_restriction: [], }; diff --git a/apps/web/src/lib/ai-gateway/providers/qwen.ts b/apps/web/src/lib/ai-gateway/providers/qwen.ts index 795c6bda5..8ef16937f 100644 --- a/apps/web/src/lib/ai-gateway/providers/qwen.ts +++ b/apps/web/src/lib/ai-gateway/providers/qwen.ts @@ -102,6 +102,7 @@ export const qwen36_plus_model: KiloExclusiveModel = { }, ]), exclusive_to: [], + inference_provider_restriction: [], }; export const qwen36_flash_model: KiloExclusiveModel = { @@ -136,6 +137,7 @@ export const qwen36_flash_model: KiloExclusiveModel = { }, ]), exclusive_to: [], + inference_provider_restriction: [], }; export const qwen36_max_preview_model: KiloExclusiveModel = { @@ -170,6 +172,7 @@ export const qwen36_max_preview_model: KiloExclusiveModel = { }, ]), exclusive_to: [], + inference_provider_restriction: [], }; export const qwen36_27b_model: KiloExclusiveModel = { @@ -190,6 +193,7 @@ export const qwen36_27b_model: KiloExclusiveModel = { input_cache_write_per_million: null, }), exclusive_to: [], + inference_provider_restriction: [], }; export const alibabaDirectModels: ReadonlyArray = [ diff --git a/apps/web/src/lib/ai-gateway/providers/seed.ts b/apps/web/src/lib/ai-gateway/providers/seed.ts index a636d0eeb..6084f98e6 100644 --- a/apps/web/src/lib/ai-gateway/providers/seed.ts +++ b/apps/web/src/lib/ai-gateway/providers/seed.ts @@ -13,4 +13,5 @@ export const seed_20_code_free_model: KiloExclusiveModel = { internal_id: 'seed-2-0-code-preview-260328', pricing: null, exclusive_to: [], + inference_provider_restriction: [], }; diff --git a/apps/web/src/lib/ai-gateway/providers/stepfun.ts b/apps/web/src/lib/ai-gateway/providers/stepfun.ts index 7311d95d1..c35936dcd 100644 --- a/apps/web/src/lib/ai-gateway/providers/stepfun.ts +++ b/apps/web/src/lib/ai-gateway/providers/stepfun.ts @@ -17,4 +17,5 @@ export const stepfun_35_flash_free_model: KiloExclusiveModel = { internal_id: 'stepfun/step-3.5-flash', pricing: null, exclusive_to: [], + inference_provider_restriction: ['stepfun'], }; diff --git a/apps/web/src/lib/ai-gateway/providers/xai.ts b/apps/web/src/lib/ai-gateway/providers/xai.ts index 64f5fd5ff..9c966ee41 100644 --- a/apps/web/src/lib/ai-gateway/providers/xai.ts +++ b/apps/web/src/lib/ai-gateway/providers/xai.ts @@ -14,6 +14,7 @@ export const grok_code_fast_1_optimized_free_model: KiloExclusiveModel = { internal_id: 'x-ai/grok-code-fast-1:optimized', pricing: null, exclusive_to: [], + inference_provider_restriction: [], }; export function isGrokModel(requestedModel: string) {