Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# ==============================================================================

# Primary model provider to use
# Options: databricks, azure-anthropic, azure-openai, openrouter, openai, ollama, llamacpp, lmstudio, bedrock, zai, vertex
# Options: databricks, azure-anthropic, azure-openai, openrouter, openai, ollama, llamacpp, lmstudio, bedrock, zai, moonshot, vertex
# Default: databricks
MODEL_PROVIDER=ollama

Expand Down Expand Up @@ -158,6 +158,14 @@ OLLAMA_MAX_TOOLS_FOR_ROUTING=3
# Options: GLM-4.7, GLM-4.5-Air, GLM-4-Plus
# ZAI_MODEL=GLM-4.7

# ==============================================================================
# Moonshot (Kimi) Configuration (Anthropic-compatible endpoint)
# ==============================================================================

# MOONSHOT_API_KEY=your-moonshot-api-key
# MOONSHOT_ENDPOINT=https://api.moonshot.ai/anthropic/v1/messages
# MOONSHOT_MODEL=kimi-k2.5

# ==============================================================================
# Google Vertex AI Configuration (Gemini Models)
# ==============================================================================
Expand Down Expand Up @@ -361,6 +369,12 @@ HOT_RELOAD_DEBOUNCE_MS=1000
# ZAI_MODEL=GLM-4.7
# npm start

# Moonshot (Kimi):
# MODEL_PROVIDER=moonshot
# MOONSHOT_API_KEY=your-moonshot-api-key
# MOONSHOT_MODEL=kimi-k2.5
# npm start

# Google Gemini (via Vertex AI):
# MODEL_PROVIDER=vertex
# VERTEX_API_KEY=your-google-api-key
Expand Down
12 changes: 12 additions & 0 deletions src/api/openai-router.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
const {
convertOpenAIToAnthropic,
convertAnthropicToOpenAI,
convertAnthropicStreamChunkToOpenAI

Check failure on line 25 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (23.x)

'convertAnthropicStreamChunkToOpenAI' is assigned a value but never used

Check failure on line 25 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (22.x)

'convertAnthropicStreamChunkToOpenAI' is assigned a value but never used

Check failure on line 25 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (20.x)

'convertAnthropicStreamChunkToOpenAI' is assigned a value but never used

Check failure on line 25 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (24.x)

'convertAnthropicStreamChunkToOpenAI' is assigned a value but never used
} = require("../clients/openai-format");

const router = express.Router();
Expand Down Expand Up @@ -326,7 +326,7 @@
* @param {Object} headers - Request headers
* @returns {boolean}
*/
function isKnownClient(headers) {

Check failure on line 329 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (23.x)

'isKnownClient' is defined but never used

Check failure on line 329 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (22.x)

'isKnownClient' is defined but never used

Check failure on line 329 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (20.x)

'isKnownClient' is defined but never used

Check failure on line 329 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (24.x)

'isKnownClient' is defined but never used
return detectClient(headers) !== "unknown";
}

Expand Down Expand Up @@ -559,8 +559,8 @@
logger.error({
error: streamError.message,
stack: streamError.stack,
resultWasNull: !result,

Check failure on line 562 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (23.x)

'result' is not defined

Check failure on line 562 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (22.x)

'result' is not defined

Check failure on line 562 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (20.x)

'result' is not defined

Check failure on line 562 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (24.x)

'result' is not defined
resultBodyWasNull: result && !result.body,

Check failure on line 563 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (23.x)

'result' is not defined

Check failure on line 563 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (22.x)

'result' is not defined

Check failure on line 563 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (20.x)

'result' is not defined

Check failure on line 563 in src/api/openai-router.js

View workflow job for this annotation

GitHub Actions / Run Tests (24.x)

'result' is not defined
resultKeys: result ? Object.keys(result) : null
}, "=== STREAMING ERROR ===");

Expand Down Expand Up @@ -767,6 +767,18 @@
});
}

// Check Moonshot (Kimi)
if (config.moonshot?.apiKey) {
providers.push({
name: "moonshot",
type: "moonshot-ai",
models: [
config.moonshot.model || "kimi-k2.5",
"kimi-k2.5"
]
});
}

// Check Vertex AI (Google Cloud)
if (config.vertex?.projectId) {
providers.push({
Expand Down
14 changes: 14 additions & 0 deletions src/api/providers-handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,20 @@ function getConfiguredProviders() {
});
}

// Check Moonshot (Kimi)
if (config.moonshot?.apiKey) {
providers.push({
name: "moonshot",
type: "moonshot-ai",
baseUrl: config.moonshot.endpoint || "https://api.moonshot.ai/api/anthropic",
enabled: true,
models: [
{ id: config.moonshot.model || "kimi-k2.5", name: "Configured Model" },
{ id: "kimi-k2.5", name: "Kimi K2.5" },
]
});
}

// Check Vertex AI (Google Cloud)
if (config.vertex?.projectId) {
const region = config.vertex.region || "us-east5";
Expand Down
82 changes: 64 additions & 18 deletions src/clients/databricks.js
Original file line number Diff line number Diff line change
Expand Up @@ -1261,16 +1261,21 @@ async function invokeBedrock(body) {
* Z.AI offers GLM models through an Anthropic-compatible API at ~1/7 the cost.
* Minimal transformation needed - mostly passthrough with model mapping.
*/
async function invokeZai(body) {
if (!config.zai?.apiKey) {
throw new Error("Z.AI API key is not configured. Set ZAI_API_KEY in your .env file.");
async function invokeZai(body, providerOptions = {}) {
const providerConfig = providerOptions.config || config.zai || {};
const providerName = providerOptions.providerName || "Z.AI";
const defaultEndpoint = providerOptions.defaultEndpoint || "https://api.z.ai/api/anthropic/v1/messages";
const defaultModel = providerOptions.defaultModel || "glm-4.7";

if (!providerConfig.apiKey) {
throw new Error(`${providerName} API key is not configured.`);
}

const endpoint = config.zai.endpoint || "https://api.z.ai/api/anthropic/v1/messages";
const endpoint = providerConfig.endpoint || defaultEndpoint;
const isOpenAIFormat = endpoint.includes("/chat/completions");

// Model mapping: Anthropic names → Z.AI names (lowercase)
const modelMap = {
const modelMap = providerOptions.modelMap || {
"claude-sonnet-4-5-20250929": "glm-4.7",
"claude-sonnet-4-5": "glm-4.7",
"claude-sonnet-4.5": "glm-4.7",
Expand All @@ -1280,8 +1285,9 @@ async function invokeZai(body) {
"claude-3-haiku": "glm-4.5-air",
};

const requestedModel = body.model || config.zai.model;
let mappedModel = modelMap[requestedModel] || config.zai.model || "glm-4.7";
const requestedModel = body.model || providerConfig.model;
// If operator explicitly sets provider model, honor it over Claude-name mapping.
let mappedModel = providerConfig.model || modelMap[requestedModel] || defaultModel;
mappedModel = mappedModel.toLowerCase();

let zaiBody;
Expand Down Expand Up @@ -1362,26 +1368,42 @@ async function invokeZai(body) {

headers = {
"Content-Type": "application/json",
"Authorization": `Bearer ${config.zai.apiKey}`,
"Authorization": `Bearer ${providerConfig.apiKey}`,
};
} else {
// Anthropic format endpoint
zaiBody = { ...body };
zaiBody.model = mappedModel;

const hasToolHistory = Array.isArray(zaiBody.messages)
&& zaiBody.messages.some((message) => {
if (!message || !Array.isArray(message.content)) return false;
return message.content.some((block) => (
block?.type === "tool_use"
|| block?.type === "tool_result"
|| block?.type === "tool_reference"
));
});

// Inject standard tools if client didn't send any (passthrough mode)
if (!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0) {
// IMPORTANT: do not inject on tool-followup turns, because the model
// must continue against the exact previously-declared tool schema.
if ((!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0) && !hasToolHistory) {
zaiBody.tools = STANDARD_TOOLS;
logger.info({
injectedToolCount: STANDARD_TOOLS.length,
injectedToolNames: STANDARD_TOOLS.map(t => t.name),
reason: "Client did not send tools (passthrough mode)"
}, "=== INJECTING STANDARD TOOLS (Z.AI Anthropic) ===");
}, `=== INJECTING STANDARD TOOLS (${providerName} Anthropic) ===`);
} else if ((!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0) && hasToolHistory) {
logger.info({
reason: "Skipped tool injection on tool-followup turn",
}, `=== TOOL INJECTION SKIPPED (${providerName} Anthropic) ===`);
}

headers = {
"Content-Type": "application/json",
"x-api-key": config.zai.apiKey,
"x-api-key": providerConfig.apiKey,
"anthropic-version": "2023-06-01",
};
}
Expand All @@ -1401,20 +1423,20 @@ async function invokeZai(body) {
toolNames: zaiBody.tools?.map(t => t.function?.name || t.name),
toolChoice: zaiBody.tool_choice,
fullRequest: JSON.stringify(zaiBody).substring(0, 500),
}, "=== Z.AI REQUEST ===");
}, `=== ${providerName} REQUEST ===`);

logger.debug({
zaiBody: JSON.stringify(zaiBody).substring(0, 1000),
}, "Z.AI request body (truncated)");
}, `${providerName} request body (truncated)`);

// Use semaphore to limit concurrent Z.AI requests (prevents rate limiting)
// Use semaphore to limit concurrent requests (prevents rate limiting)
return zaiSemaphore.run(async () => {
logger.debug({
queueLength: zaiSemaphore.queue.length,
currentConcurrent: zaiSemaphore.current,
}, "Z.AI semaphore status");
}, `${providerName} semaphore status`);

const response = await performJsonRequest(endpoint, { headers, body: zaiBody }, "Z.AI");
const response = await performJsonRequest(endpoint, { headers, body: zaiBody }, providerName);

logger.info({
responseOk: response?.ok,
Expand All @@ -1423,14 +1445,14 @@ async function invokeZai(body) {
rawContent: response?.json?.choices?.[0]?.message?.content,
hasReasoning: !!response?.json?.choices?.[0]?.message?.reasoning_content,
isOpenAIFormat,
}, "=== Z.AI RAW RESPONSE ===");
}, `=== ${providerName} RAW RESPONSE ===`);

// Convert OpenAI response back to Anthropic format if needed
if (isOpenAIFormat && response?.ok && response?.json) {
const anthropicJson = convertOpenAIToAnthropic(response.json);
logger.info({
convertedContent: JSON.stringify(anthropicJson.content).substring(0, 200),
}, "=== Z.AI CONVERTED RESPONSE ===");
}, `=== ${providerName} CONVERTED RESPONSE ===`);
// Return in the same format as other providers (with ok, status, json)
return {
ok: response.ok,
Expand All @@ -1446,6 +1468,26 @@ async function invokeZai(body) {
});
}

async function invokeMoonshot(body) {
const moonshotModelMap = {
"claude-sonnet-4-5-20250929": "kimi-k2.5",
"claude-sonnet-4-5": "kimi-k2.5",
"claude-sonnet-4.5": "kimi-k2.5",
"claude-3-5-sonnet": "kimi-k2.5",
"claude-haiku-4-5-20251001": "kimi-k2.5",
"claude-haiku-4-5": "kimi-k2.5",
"claude-3-haiku": "kimi-k2.5",
};

return invokeZai(body, {
providerName: "Moonshot",
config: config.moonshot,
defaultEndpoint: "https://api.moonshot.ai/anthropic/v1/messages",
defaultModel: "kimi-k2.5",
modelMap: moonshotModelMap,
});
}



/**
Expand Down Expand Up @@ -1883,6 +1925,8 @@ async function invokeModel(body, options = {}) {
return await invokeBedrock(body);
} else if (initialProvider === "zai") {
return await invokeZai(body);
} else if (initialProvider === "moonshot") {
return await invokeMoonshot(body);
} else if (initialProvider === "vertex") {
return await invokeVertex(body);
}
Expand Down Expand Up @@ -1972,6 +2016,8 @@ async function invokeModel(body, options = {}) {
return await invokeLlamaCpp(body);
} else if (fallbackProvider === "zai") {
return await invokeZai(body);
} else if (fallbackProvider === "moonshot") {
return await invokeMoonshot(body);
} else if (fallbackProvider === "vertex") {
return await invokeVertex(body);
}
Expand Down
1 change: 1 addition & 0 deletions src/clients/ollama-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const TOOL_CAPABLE_MODELS = new Set([
"llama3.1",
"llama3.2",
"qwen2.5",
"qwen3",
"mistral",
"mistral-nemo",
"firefunction-v2",
Expand Down
27 changes: 25 additions & 2 deletions src/config/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ function resolveConfigPath(targetPath) {
return path.resolve(normalised);
}

const SUPPORTED_MODEL_PROVIDERS = new Set(["databricks", "azure-anthropic", "ollama", "openrouter", "azure-openai", "openai", "llamacpp", "lmstudio", "bedrock", "zai", "vertex"]);
const SUPPORTED_MODEL_PROVIDERS = new Set(["databricks", "azure-anthropic", "ollama", "openrouter", "azure-openai", "openai", "llamacpp", "lmstudio", "bedrock", "zai", "moonshot", "vertex"]);
const rawModelProvider = (process.env.MODEL_PROVIDER ?? "databricks").toLowerCase();

// Validate MODEL_PROVIDER early with a clear error message
Expand Down Expand Up @@ -132,6 +132,11 @@ const zaiApiKey = process.env.ZAI_API_KEY?.trim() || null;
const zaiEndpoint = process.env.ZAI_ENDPOINT?.trim() || "https://api.z.ai/api/anthropic/v1/messages";
const zaiModel = process.env.ZAI_MODEL?.trim() || "GLM-4.7";

// Moonshot configuration - Anthropic-compatible API for Kimi models
const moonshotApiKey = process.env.MOONSHOT_API_KEY?.trim() || null;
const moonshotEndpoint = process.env.MOONSHOT_ENDPOINT?.trim() || "https://api.moonshot.ai/anthropic/v1/messages";
const moonshotModel = process.env.MOONSHOT_MODEL?.trim() || "kimi-k2.5";

// Vertex AI (Google Gemini) configuration
const vertexApiKey = process.env.VERTEX_API_KEY?.trim() || process.env.GOOGLE_API_KEY?.trim() || null;
const vertexModel = process.env.VERTEX_MODEL?.trim() || "gemini-2.0-flash";
Expand Down Expand Up @@ -305,6 +310,12 @@ if (modelProvider === "bedrock" && !bedrockApiKey) {
);
}

if (modelProvider === "moonshot" && !moonshotApiKey) {
throw new Error(
"Set MOONSHOT_API_KEY before starting the proxy.",
);
}

// Validate hybrid routing configuration
if (preferOllama) {
if (!ollamaEndpoint) {
Expand All @@ -319,7 +330,7 @@ if (preferOllama) {
// Prevent local providers from being used as fallback (they can fail just like Ollama)
const localProviders = ["ollama", "llamacpp", "lmstudio"];
if (fallbackEnabled && localProviders.includes(fallbackProvider)) {
throw new Error(`FALLBACK_PROVIDER cannot be '${fallbackProvider}' (local providers should not be fallbacks). Use cloud providers: databricks, azure-anthropic, azure-openai, openrouter, openai, bedrock`);
throw new Error(`FALLBACK_PROVIDER cannot be '${fallbackProvider}' (local providers should not be fallbacks). Use cloud providers: databricks, azure-anthropic, azure-openai, openrouter, openai, bedrock, zai, moonshot, vertex`);
}

// Ensure fallback provider is properly configured (only if fallback is enabled)
Expand All @@ -336,6 +347,9 @@ if (preferOllama) {
if (fallbackProvider === "bedrock" && !bedrockApiKey) {
throw new Error("FALLBACK_PROVIDER is set to 'bedrock' but AWS_BEDROCK_API_KEY is not configured. Please set this environment variable or choose a different fallback provider.");
}
if (fallbackProvider === "moonshot" && !moonshotApiKey) {
throw new Error("FALLBACK_PROVIDER is set to 'moonshot' but MOONSHOT_API_KEY is not configured. Please set this environment variable or choose a different fallback provider.");
}
}
}

Expand Down Expand Up @@ -589,6 +603,11 @@ var config = {
endpoint: zaiEndpoint,
model: zaiModel,
},
moonshot: {
apiKey: moonshotApiKey,
endpoint: moonshotEndpoint,
model: moonshotModel,
},
vertex: {
apiKey: vertexApiKey,
model: vertexModel,
Expand Down Expand Up @@ -878,7 +897,11 @@ function reloadConfig() {
config.openai.apiKey = process.env.OPENAI_API_KEY?.trim() || null;
config.bedrock.apiKey = process.env.AWS_BEDROCK_API_KEY?.trim() || null;
config.zai.apiKey = process.env.ZAI_API_KEY?.trim() || null;
config.zai.endpoint = process.env.ZAI_ENDPOINT?.trim() || "https://api.z.ai/api/anthropic/v1/messages";
config.zai.model = process.env.ZAI_MODEL?.trim() || "GLM-4.7";
config.moonshot.apiKey = process.env.MOONSHOT_API_KEY?.trim() || null;
config.moonshot.endpoint = process.env.MOONSHOT_ENDPOINT?.trim() || "https://api.moonshot.ai/anthropic/v1/messages";
config.moonshot.model = process.env.MOONSHOT_MODEL?.trim() || "kimi-k2.5";
config.vertex.apiKey = process.env.VERTEX_API_KEY?.trim() || process.env.GOOGLE_API_KEY?.trim() || null;
config.vertex.model = process.env.VERTEX_MODEL?.trim() || "gemini-2.0-flash";

Expand Down
Loading
Loading