From aea46652bc48c8b5f7df263da1a73e749691a6d2 Mon Sep 17 00:00:00 2001 From: shrijam12 Date: Sat, 7 Feb 2026 16:54:18 +0530 Subject: [PATCH 1/5] feat: Add Ollama provider support - Add OllamaProvider class with OpenAI-compatible API support - Register Ollama in ProviderRegistry with model selection rules - Add Ollama configuration to AppConfig (baseUrl, apiKey, enabled) - Add Ollama to chat_completions_providers_v1.json catalog with 16 popular models - Add ollama.yaml pricing file (free/local models) - Update ProviderName type to include 'ollama' - Add OLLAMA_BASE_URL and OLLAMA_API_KEY to .env.example Ollama runs models locally and exposes an OpenAI-compatible API at http://localhost:11434/v1 by default. Users can configure a custom base URL via OLLAMA_BASE_URL environment variable. --- .env.example | 3 + gateway/src/costs/ollama.yaml | 59 ++++ .../src/domain/providers/ollama-provider.ts | 104 +++++++ .../src/domain/services/provider-registry.ts | 5 +- .../src/infrastructure/config/app-config.ts | 275 +++++++++--------- .../chat_completions_providers_v1.json | 38 +++ shared/types/types.ts | 2 +- 7 files changed, 349 insertions(+), 137 deletions(-) create mode 100644 gateway/src/costs/ollama.yaml create mode 100644 gateway/src/domain/providers/ollama-provider.ts diff --git a/.env.example b/.env.example index 1b31994..9cd322e 100644 --- a/.env.example +++ b/.env.example @@ -4,6 +4,9 @@ XAI_API_KEY=your_key_here OPENROUTER_API_KEY=your_key_here ZAI_API_KEY=your_key_here GOOGLE_API_KEY=your_key_here +# Ollama (local models — no API key needed, just set the base URL) +# OLLAMA_BASE_URL=http://localhost:11434/v1 +# OLLAMA_API_KEY= # optional, only if behind an auth proxy PORT=3001 # Optional x402 passthrough for OpenRouter access X402_BASE_URL=x402_supported_provider_url diff --git a/gateway/src/costs/ollama.yaml b/gateway/src/costs/ollama.yaml new file mode 100644 index 0000000..2d7bb29 --- /dev/null +++ b/gateway/src/costs/ollama.yaml @@ -0,0 +1,59 @@ +provider: "ollama" +currency: "USD" +unit: "MTok" +models: + # Ollama runs models locally — all costs are zero. + # Users may add custom model entries here if needed. + llama3.3: + input: 0.00 + output: 0.00 + llama3.2: + input: 0.00 + output: 0.00 + llama3.1: + input: 0.00 + output: 0.00 + llama3: + input: 0.00 + output: 0.00 + gemma3: + input: 0.00 + output: 0.00 + gemma2: + input: 0.00 + output: 0.00 + qwen3: + input: 0.00 + output: 0.00 + qwen2.5-coder: + input: 0.00 + output: 0.00 + deepseek-r1: + input: 0.00 + output: 0.00 + deepseek-coder-v2: + input: 0.00 + output: 0.00 + phi4: + input: 0.00 + output: 0.00 + phi3: + input: 0.00 + output: 0.00 + mistral: + input: 0.00 + output: 0.00 + mixtral: + input: 0.00 + output: 0.00 + codellama: + input: 0.00 + output: 0.00 + starcoder2: + input: 0.00 + output: 0.00 +metadata: + last_updated: "2026-02-03" + source: "https://ollama.com" + notes: "Ollama runs models locally. All API costs are zero — hardware costs are borne by the user." + version: "1.0" diff --git a/gateway/src/domain/providers/ollama-provider.ts b/gateway/src/domain/providers/ollama-provider.ts new file mode 100644 index 0000000..23df2cb --- /dev/null +++ b/gateway/src/domain/providers/ollama-provider.ts @@ -0,0 +1,104 @@ +import { BaseProvider } from './base-provider.js'; +import { CanonicalRequest, CanonicalResponse } from 'shared/types/index.js'; +import { getConfig } from '../../infrastructure/config/app-config.js'; + +interface OllamaRequest { + model: string; + messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string; }>; + max_tokens?: number; + temperature?: number; + stream?: boolean; + stop?: string | string[]; +} + +interface OllamaResponse { + id: string; + object: string; + created: number; + model: string; + choices: Array<{ + index: number; + message: { role: string; content: string; }; + finish_reason: string; + }>; + usage: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + }; +} + +export class OllamaProvider extends BaseProvider { + readonly name = 'ollama'; + + // Ollama exposes an OpenAI-compatible API at /v1 + protected get baseUrl(): string { + return getConfig().providers.ollama.baseUrl; + } + + // Ollama doesn't require an API key by default, but we return a + // dummy value so BaseProvider.isConfigured() stays true when the + // base URL is set. Users can optionally supply a real key if they + // put Ollama behind an auth proxy. + protected get apiKey(): string | undefined { + return getConfig().providers.ollama.apiKey || 'ollama'; + } + + /** + * Ollama is considered configured when the user has explicitly + * enabled it by setting OLLAMA_BASE_URL (even without an API key). + */ + isConfigured(): boolean { + return getConfig().providers.ollama.enabled; + } + + protected transformRequest(request: CanonicalRequest): OllamaRequest { + const messages = request.messages.map(msg => ({ + role: msg.role, + content: msg.content + .filter(c => c.type === 'text') + .map(c => c.text) + .join('') + })); + + return { + model: request.model, + messages, + max_tokens: request.maxTokens, + temperature: request.temperature, + stream: request.stream || false, + stop: request.stopSequences + }; + } + + protected transformResponse(response: OllamaResponse): CanonicalResponse { + const choice = response.choices[0]; + + return { + id: response.id, + model: response.model, + created: response.created, + message: { + role: 'assistant', + content: [{ + type: 'text', + text: choice.message.content + }] + }, + finishReason: this.mapFinishReason(choice.finish_reason), + usage: { + inputTokens: response.usage?.prompt_tokens ?? 0, + outputTokens: response.usage?.completion_tokens ?? 0, + totalTokens: response.usage?.total_tokens ?? 0 + } + }; + } + + private mapFinishReason(reason: string): 'stop' | 'length' | 'tool_calls' | 'error' { + switch (reason) { + case 'stop': return 'stop'; + case 'length': return 'length'; + default: return 'stop'; + } + } +} diff --git a/gateway/src/domain/services/provider-registry.ts b/gateway/src/domain/services/provider-registry.ts index 3a2486f..e2a227e 100644 --- a/gateway/src/domain/services/provider-registry.ts +++ b/gateway/src/domain/services/provider-registry.ts @@ -5,6 +5,7 @@ import { OpenRouterProvider } from '../providers/openrouter-provider.js'; import { XAIProvider } from '../providers/xai-provider.js'; import { ZAIProvider } from '../providers/zai-provider.js'; import { GoogleProvider } from '../providers/google-provider.js'; +import { OllamaProvider } from '../providers/ollama-provider.js'; export enum Provider { ANTHROPIC = 'anthropic', @@ -12,7 +13,8 @@ export enum Provider { OPENROUTER = 'openrouter', XAI = 'xAI', ZAI = 'zai', - GOOGLE = 'google' + GOOGLE = 'google', + OLLAMA = 'ollama' } export interface ProviderSelectionRule { @@ -83,6 +85,7 @@ export function createDefaultProviderRegistry(): ProviderRegistry { { id: Provider.XAI, create: () => new XAIProvider(), selectionRules: [{ match: model => model.includes('grok-') || model.includes('grok_beta') }] }, { id: Provider.ZAI, create: () => new ZAIProvider() }, { id: Provider.GOOGLE, create: () => new GoogleProvider(), selectionRules: [{ match: model => model.toLowerCase().includes('gemini') }] }, + { id: Provider.OLLAMA, create: () => new OllamaProvider(), selectionRules: [{ match: model => model.startsWith('ollama/') }] }, ]; return new ProviderRegistry(plugins); diff --git a/gateway/src/infrastructure/config/app-config.ts b/gateway/src/infrastructure/config/app-config.ts index f52f4f9..c256a1c 100644 --- a/gateway/src/infrastructure/config/app-config.ts +++ b/gateway/src/infrastructure/config/app-config.ts @@ -1,43 +1,43 @@ -/** - * Centralized application configuration - * All environment variables are validated and accessed through this class - */ - -export class AppConfig { - // Server configuration - readonly server = { - port: this.getNumber('PORT', 3001), - environment: this.getString('NODE_ENV', 'development'), - isDevelopment: this.getString('NODE_ENV', 'development') === 'development', - isProduction: this.getString('NODE_ENV', 'development') === 'production', - version: this.getOptionalString('npm_package_version') || 'dev', - }; - - // x402 Payment configuration - readonly x402 = { - enabled: this.has('PRIVATE_KEY'), - privateKey: this.getOptionalString('PRIVATE_KEY'), - baseUrl: this.getString('X402_BASE_URL', 'https://x402.ekailabs.xyz'), - - // Helper methods - get chatCompletionsUrl() { - return `${this.baseUrl}/v1/chat/completions`; - }, - get messagesUrl() { - return `${this.baseUrl}/v1/messages`; - }, - }; - - // Provider API Keys - readonly providers = { - anthropic: { - apiKey: this.getOptionalString('ANTHROPIC_API_KEY'), - enabled: this.has('ANTHROPIC_API_KEY'), - }, - openai: { - apiKey: this.getOptionalString('OPENAI_API_KEY'), - enabled: this.has('OPENAI_API_KEY'), - }, +/** + * Centralized application configuration + * All environment variables are validated and accessed through this class + */ + +export class AppConfig { + // Server configuration + readonly server = { + port: this.getNumber('PORT', 3001), + environment: this.getString('NODE_ENV', 'development'), + isDevelopment: this.getString('NODE_ENV', 'development') === 'development', + isProduction: this.getString('NODE_ENV', 'development') === 'production', + version: this.getOptionalString('npm_package_version') || 'dev', + }; + + // x402 Payment configuration + readonly x402 = { + enabled: this.has('PRIVATE_KEY'), + privateKey: this.getOptionalString('PRIVATE_KEY'), + baseUrl: this.getString('X402_BASE_URL', 'https://x402.ekailabs.xyz'), + + // Helper methods + get chatCompletionsUrl() { + return `${this.baseUrl}/v1/chat/completions`; + }, + get messagesUrl() { + return `${this.baseUrl}/v1/messages`; + }, + }; + + // Provider API Keys + readonly providers = { + anthropic: { + apiKey: this.getOptionalString('ANTHROPIC_API_KEY'), + enabled: this.has('ANTHROPIC_API_KEY'), + }, + openai: { + apiKey: this.getOptionalString('OPENAI_API_KEY'), + enabled: this.has('OPENAI_API_KEY'), + }, openrouter: { apiKey: this.getOptionalString('OPENROUTER_API_KEY'), enabled: this.has('OPENROUTER_API_KEY'), @@ -54,21 +54,26 @@ export class AppConfig { apiKey: this.getOptionalString('GOOGLE_API_KEY'), enabled: this.has('GOOGLE_API_KEY'), }, + ollama: { + baseUrl: this.getString('OLLAMA_BASE_URL', 'http://localhost:11434/v1'), + apiKey: this.getOptionalString('OLLAMA_API_KEY'), + enabled: this.has('OLLAMA_BASE_URL'), + }, + }; + + // Telemetry configuration + readonly telemetry = { + enabled: this.getBoolean('ENABLE_TELEMETRY', true), + endpoint: this.getOptionalString('TELEMETRY_ENDPOINT'), + }; + + // OpenRouter-specific configuration + readonly openrouter = { + skipPricingRefresh: this.getBoolean('SKIP_OPENROUTER_PRICING_REFRESH', false), + pricingTimeoutMs: this.getNumber('OPENROUTER_PRICING_TIMEOUT_MS', 4000), + pricingRetries: this.getNumber('OPENROUTER_PRICING_RETRIES', 2), }; - - // Telemetry configuration - readonly telemetry = { - enabled: this.getBoolean('ENABLE_TELEMETRY', true), - endpoint: this.getOptionalString('TELEMETRY_ENDPOINT'), - }; - - // OpenRouter-specific configuration - readonly openrouter = { - skipPricingRefresh: this.getBoolean('SKIP_OPENROUTER_PRICING_REFRESH', false), - pricingTimeoutMs: this.getNumber('OPENROUTER_PRICING_TIMEOUT_MS', 4000), - pricingRetries: this.getNumber('OPENROUTER_PRICING_RETRIES', 2), - }; - + // Feature flags readonly features = { usageTracking: this.getBoolean('ENABLE_USAGE_TRACKING', true), @@ -79,84 +84,84 @@ export class AppConfig { backend: this.getString('MEMORY_BACKEND', 'file'), maxItems: this.getNumber('MEMORY_MAX_ITEMS', 20), } as const; - - // Helper methods - private has(key: string): boolean { - return !!process.env[key]; - } - - private getString(key: string, defaultValue: string): string; - private getString(key: string): string; - private getString(key: string, defaultValue?: string): string { - const value = process.env[key] || defaultValue; - if (value === undefined) { - throw new Error(`Missing required environment variable: ${key}`); - } - return value; - } - - private getOptionalString(key: string): string | undefined { - return process.env[key]; - } - - private getNumber(key: string, defaultValue: number): number { - const value = process.env[key]; - if (!value) return defaultValue; - const num = parseInt(value, 10); - if (isNaN(num)) { - throw new Error(`Invalid number for environment variable ${key}: ${value}`); - } - return num; - } - - private getBoolean(key: string, defaultValue: boolean): boolean { - const value = process.env[key]; - if (!value) return defaultValue; - return value.toLowerCase() === 'true' || value === '1'; - } - - /** - * Validate that at least one authentication method is configured - */ - validate(): void { - const hasApiKeys = Object.values(this.providers).some(p => p.enabled); - const hasX402 = this.x402.enabled; - - if (!hasApiKeys && !hasX402) { - throw new Error( - 'No authentication configured. Set either:\n' + - ' 1. At least one provider API key (ANTHROPIC_API_KEY, OPENAI_API_KEY, etc.)\n' + - ' 2. PRIVATE_KEY for x402 payment mode' - ); - } - } - - /** - * Get human-readable mode description - */ - getMode(): 'x402-only' | 'hybrid' | 'byok' { - const hasApiKeys = Object.values(this.providers).some(p => p.enabled); - const hasX402 = this.x402.enabled; - - if (!hasApiKeys && hasX402) return 'x402-only'; - if (hasApiKeys && hasX402) return 'hybrid'; - return 'byok'; - } -} - -// Singleton instance -let configInstance: AppConfig | null = null; - -export function getConfig(): AppConfig { - if (!configInstance) { - configInstance = new AppConfig(); - configInstance.validate(); - } - return configInstance; -} - -// For testing - reset config -export function resetConfig(): void { - configInstance = null; -} - + + // Helper methods + private has(key: string): boolean { + return !!process.env[key]; + } + + private getString(key: string, defaultValue: string): string; + private getString(key: string): string; + private getString(key: string, defaultValue?: string): string { + const value = process.env[key] || defaultValue; + if (value === undefined) { + throw new Error(`Missing required environment variable: ${key}`); + } + return value; + } + + private getOptionalString(key: string): string | undefined { + return process.env[key]; + } + + private getNumber(key: string, defaultValue: number): number { + const value = process.env[key]; + if (!value) return defaultValue; + const num = parseInt(value, 10); + if (isNaN(num)) { + throw new Error(`Invalid number for environment variable ${key}: ${value}`); + } + return num; + } + + private getBoolean(key: string, defaultValue: boolean): boolean { + const value = process.env[key]; + if (!value) return defaultValue; + return value.toLowerCase() === 'true' || value === '1'; + } + + /** + * Validate that at least one authentication method is configured + */ + validate(): void { + const hasApiKeys = Object.values(this.providers).some(p => p.enabled); + const hasX402 = this.x402.enabled; + + if (!hasApiKeys && !hasX402) { + throw new Error( + 'No authentication configured. Set either:\n' + + ' 1. At least one provider API key (ANTHROPIC_API_KEY, OPENAI_API_KEY, etc.)\n' + + ' 2. PRIVATE_KEY for x402 payment mode' + ); + } + } + + /** + * Get human-readable mode description + */ + getMode(): 'x402-only' | 'hybrid' | 'byok' { + const hasApiKeys = Object.values(this.providers).some(p => p.enabled); + const hasX402 = this.x402.enabled; + + if (!hasApiKeys && hasX402) return 'x402-only'; + if (hasApiKeys && hasX402) return 'hybrid'; + return 'byok'; + } +} + +// Singleton instance +let configInstance: AppConfig | null = null; + +export function getConfig(): AppConfig { + if (!configInstance) { + configInstance = new AppConfig(); + configInstance.validate(); + } + return configInstance; +} + +// For testing - reset config +export function resetConfig(): void { + configInstance = null; +} + diff --git a/model_catalog/chat_completions_providers_v1.json b/model_catalog/chat_completions_providers_v1.json index 8b260cf..b31f2fb 100644 --- a/model_catalog/chat_completions_providers_v1.json +++ b/model_catalog/chat_completions_providers_v1.json @@ -156,6 +156,44 @@ "format": "openai_chat" } } + }, + { + "provider": "ollama", + "models": [ + "llama3.3", + "llama3.2", + "llama3.1", + "llama3", + "gemma3", + "gemma2", + "qwen3", + "qwen2.5-coder", + "deepseek-r1", + "deepseek-coder-v2", + "phi4", + "phi3", + "mistral", + "mixtral", + "codellama", + "starcoder2" + ], + "chat_completions": { + "base_url": "http://localhost:11434/v1/chat/completions", + "auth": { + "env_var": "OLLAMA_API_KEY", + "header": "Authorization", + "scheme": "Bearer" + }, + "supported_client_formats": ["openai"], + "payload_defaults": { + "stream_options": { + "include_usage": true + } + }, + "usage": { + "format": "openai_chat" + } + } } ] } diff --git a/shared/types/types.ts b/shared/types/types.ts index 94985ff..c98df8f 100644 --- a/shared/types/types.ts +++ b/shared/types/types.ts @@ -31,7 +31,7 @@ export interface ChatCompletionResponse { } -export type ProviderName = 'openai' | 'openrouter' | 'anthropic'; +export type ProviderName = 'openai' | 'openrouter' | 'anthropic' | 'ollama'; // Removed conversation types - no conversation storage From 90be7c12ea651d3521e3d3d36ad8f72827ff05d3 Mon Sep 17 00:00:00 2001 From: shrijam12 Date: Mon, 9 Feb 2026 22:11:12 +0530 Subject: [PATCH 2/5] feat: Add Responses API support for Ollama - Added Ollama to responses_providers_v1.json catalog - Created OllamaResponsesPassthrough class implementing Responses API - Registered Ollama in responses-passthrough-registry.ts Ollama supports the OpenResponses API specification at /v1/responses endpoint, providing future-proof support as /chat/completions may be deprecated. --- .../ollama-responses-passthrough.ts | 284 ++++++++++++++++++ .../responses-passthrough-registry.ts | 2 + model_catalog/responses_providers_v1.json | 30 ++ 3 files changed, 316 insertions(+) create mode 100644 gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts diff --git a/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts b/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts new file mode 100644 index 0000000..6acf733 --- /dev/null +++ b/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts @@ -0,0 +1,284 @@ +import { Response as ExpressResponse } from 'express'; +import { logger } from '../utils/logger.js'; +import { AuthenticationError, ProviderError } from '../../shared/errors/index.js'; +import { CONTENT_TYPES } from '../../domain/types/provider.js'; +import { getConfig } from '../config/app-config.js'; +import { ResponsesPassthrough, ResponsesPassthroughConfig } from './responses-passthrough.js'; +import { injectMemoryContext, persistMemory } from '../memory/memory-helper.js'; + +export class OllamaResponsesPassthrough implements ResponsesPassthrough { + constructor(private readonly config: ResponsesPassthroughConfig) {} + + private get baseUrl(): string { + return this.config.baseUrl; + } + + private get apiKey(): string | undefined { + const envVar = this.config.auth?.envVar; + if (envVar) { + const token = process.env[envVar]; + if (token) return token; + } + + // Ollama doesn't require an API key by default (runs locally) + // Return undefined if no key is configured + return getConfig().providers.ollama.apiKey; + } + + private buildAuthHeader(): string | undefined { + const token = this.apiKey; + if (!token) return undefined; // Ollama doesn't require auth by default + + const { auth } = this.config; + if (!auth) { + return `Bearer ${token}`; + } + + if (auth.template) { + return auth.template.replace('{{token}}', token); + } + + if (auth.scheme) { + return `${auth.scheme} ${token}`.trim(); + } + + return token; + } + + private buildHeaders(): Record { + const headers: Record = { + 'Content-Type': 'application/json', + ...this.config.staticHeaders, + }; + + const authHeader = this.buildAuthHeader(); + if (authHeader) { + const headerName = this.config.auth?.header ?? 'Authorization'; + headers[headerName] = authHeader; + } + + return headers; + } + + // Store usage data for tracking + private usage: { + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; + } | null = null; + + // Buffer to handle multi-chunk SSE events + private eventBuffer: string = ''; + private assistantResponseBuffer: string = ''; + + private async makeRequest(body: any, stream: boolean): Promise { + const response = await fetch(this.baseUrl, { + method: 'POST', + headers: this.buildHeaders(), + body: JSON.stringify({ ...body, stream, store: false }) // Not storing responses + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new ProviderError('ollama', errorText || `HTTP ${response.status}`, response.status, { endpoint: this.baseUrl }); + } + + return response; + } + + private trackUsage(text: string, model: string, clientIp?: string): void { + try { + // Add to buffer to handle multi-chunk events + this.eventBuffer += text; + + // Extract assistant response content from text.delta events + const textDeltaMatch = /"type":"response\.text\.delta"[^}]*"text":"([^"]+)"/g; + let match; + while ((match = textDeltaMatch.exec(text)) !== null) { + this.assistantResponseBuffer += match[1]; + } + + // Look for the exact response.completed event + if (this.eventBuffer.includes('"type":"response.completed"')) { + + // Find the start of the JSON object + const startIndex = this.eventBuffer.indexOf('{"type":"response.completed"'); + if (startIndex === -1) return; + + // Find the end by counting braces + let braceCount = 0; + let endIndex = -1; + + for (let i = startIndex; i < this.eventBuffer.length; i++) { + if (this.eventBuffer[i] === '{') braceCount++; + if (this.eventBuffer[i] === '}') braceCount--; + + if (braceCount === 0) { + endIndex = i; + break; + } + } + + if (endIndex === -1) return; // Incomplete JSON, wait for more chunks + + // Extract the complete JSON + const jsonString = this.eventBuffer.substring(startIndex, endIndex + 1); + + logger.debug('JSON response found', { provider: 'ollama', operation: 'response_parsing', module: 'ollama-responses-passthrough' }); + + try { + const data = JSON.parse(jsonString); + logger.debug('Response parsed successfully', { provider: 'ollama', operation: 'usage_extraction', module: 'ollama-responses-passthrough' }); + + // Extract usage data from response.usage + if (data.response?.usage) { + const usage = data.response.usage; + const totalInputTokens = usage.input_tokens || 0; + const cachedTokens = usage.input_tokens_details?.cached_tokens || 0; + const nonCachedInputTokens = totalInputTokens - cachedTokens; + const outputTokens = usage.output_tokens || 0; + const totalTokens = usage.total_tokens || (totalInputTokens + outputTokens); + const reasoningTokens = usage.output_tokens_details?.reasoning_tokens || 0; + + logger.debug('Usage tracking from response', { + provider: 'ollama', + model, + totalInputTokens, + nonCachedInputTokens, + cachedTokens, + outputTokens, + totalTokens, + reasoningTokens, + module: 'ollama-responses-passthrough' + }); + + import('../utils/usage-tracker.js').then(({ usageTracker }) => { + usageTracker.trackUsage( + model, + 'ollama', + nonCachedInputTokens, + outputTokens, + cachedTokens, + 0, // cache read tokens + clientIp + ); + }).catch((error) => { + logger.error('Usage tracking failed', error, { provider: 'ollama', operation: 'passthrough', module: 'ollama-responses-passthrough' }); + }); + } else { + logger.warn('No usage data in response', { provider: 'ollama', operation: 'passthrough', module: 'ollama-responses-passthrough' }); + } + } catch (parseError) { + logger.error('JSON parse error', parseError, { provider: 'ollama', operation: 'response_parsing', module: 'ollama-responses-passthrough' }); + } + + // Clear buffer after processing + this.eventBuffer = ''; + } + } catch (error) { + logger.error('Usage tracking failed', error, { provider: 'ollama', operation: 'passthrough', module: 'ollama-responses-passthrough' }); + } + } + + async handleDirectRequest(request: any, res: ExpressResponse, clientIp?: string): Promise { + // Reset usage tracking for new request + this.usage = null; + this.eventBuffer = ''; + this.assistantResponseBuffer = ''; + + injectMemoryContext(request, { + provider: this.config.provider, + defaultUserId: 'default', + extractCurrentUserInputs: req => extractResponsesUserInputs(req), + applyMemoryContext: (req, context) => { + if (req.instructions) { + req.instructions = `${context}\n\n---\n\n${req.instructions}`; + } else { + req.instructions = context; + } + } + }); + + if (request.stream) { + const response = await this.makeRequest(request, true); + + res.writeHead(200, { + 'Content-Type': CONTENT_TYPES.EVENT_STREAM, + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'Access-Control-Allow-Origin': '*', + }); + + // Manual stream processing for usage tracking + const reader = response.body!.getReader(); + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + const text = new TextDecoder().decode(value); + setImmediate(() => this.trackUsage(text, request.model, clientIp)); + + res.write(value); + } + res.end(); + + persistMemory(request, this.assistantResponseBuffer, { + provider: this.config.provider, + defaultUserId: 'default', + extractUserContent: req => req.input || '', + metadataBuilder: req => ({ + model: req.model, + provider: this.config.provider, + }), + }); + } else { + const response = await this.makeRequest(request, false); + const json = await response.json(); + + // Track usage for non-streaming requests + if (json.usage) { + const totalInputTokens = json.usage.input_tokens || 0; + const cachedTokens = json.usage.input_tokens_details?.cached_tokens || 0; + const nonCachedInputTokens = totalInputTokens - cachedTokens; + const outputTokens = json.usage.output_tokens || 0; + const totalTokens = json.usage.total_tokens || (totalInputTokens + outputTokens); + const reasoningTokens = json.usage.output_tokens_details?.reasoning_tokens || 0; + + logger.debug('Tracking non-streaming usage', { + provider: 'ollama', + model: request.model, + totalInputTokens, + nonCachedInputTokens, + cachedTokens, + outputTokens, + totalTokens, + reasoningTokens, + module: 'ollama-responses-passthrough' + }); + + import('../utils/usage-tracker.js').then(({ usageTracker }) => { + usageTracker.trackUsage(request.model, 'ollama', nonCachedInputTokens, outputTokens, cachedTokens, 0, clientIp); + }).catch(() => {}); + } + + const assistantResponse = json?.output?.[0]?.content?.[0]?.text || ''; + persistMemory(request, assistantResponse, { + provider: this.config.provider, + defaultUserId: 'default', + extractUserContent: req => req.input || '', + metadataBuilder: req => ({ + model: req.model, + provider: this.config.provider, + }), + }); + + res.json(json); + } + } +} + +function extractResponsesUserInputs(request: any): string[] { + const content = (request.input || '').trim(); + return content ? [content] : []; +} diff --git a/gateway/src/infrastructure/passthrough/responses-passthrough-registry.ts b/gateway/src/infrastructure/passthrough/responses-passthrough-registry.ts index 6d38f0e..d67bc09 100644 --- a/gateway/src/infrastructure/passthrough/responses-passthrough-registry.ts +++ b/gateway/src/infrastructure/passthrough/responses-passthrough-registry.ts @@ -1,5 +1,6 @@ import { ResponsesPassthrough, ResponsesPassthroughConfig } from './responses-passthrough.js'; import { OpenAIResponsesPassthrough } from './openai-responses-passthrough.js'; +import { OllamaResponsesPassthrough } from './ollama-responses-passthrough.js'; import { loadResponsesProviderDefinitions, ResponsesProviderDefinition } from './responses-provider-config.js'; import { logger } from '../utils/logger.js'; @@ -10,6 +11,7 @@ interface ProviderEntry { const passthroughFactories: Record ResponsesPassthrough> = { openai: (config) => new OpenAIResponsesPassthrough(config), + ollama: (config) => new OllamaResponsesPassthrough(config), }; export class ResponsesPassthroughRegistry { diff --git a/model_catalog/responses_providers_v1.json b/model_catalog/responses_providers_v1.json index 71086a7..472f6b8 100644 --- a/model_catalog/responses_providers_v1.json +++ b/model_catalog/responses_providers_v1.json @@ -15,6 +15,36 @@ }, "supported_client_formats": ["openai_responses"] } + }, + { + "provider": "ollama", + "models": [ + "llama3.3", + "llama3.2", + "llama3.1", + "llama3", + "gemma3", + "gemma2", + "qwen3", + "qwen2.5-coder", + "deepseek-r1", + "deepseek-coder-v2", + "phi4", + "phi3", + "mistral", + "mixtral", + "codellama", + "starcoder2" + ], + "responses": { + "base_url": "http://localhost:11434/v1/responses", + "auth": { + "env_var": "OLLAMA_API_KEY", + "header": "Authorization", + "scheme": "Bearer" + }, + "supported_client_formats": ["openai_responses"] + } } ] } From b22d91d267ac12a3f09aa5f204f14e45019f6ece Mon Sep 17 00:00:00 2001 From: shrijam12 Date: Mon, 9 Feb 2026 22:32:29 +0530 Subject: [PATCH 3/5] refactor: Remove comments from Ollama provider files --- .../src/domain/providers/ollama-provider.ts | 9 -- .../ollama-responses-passthrough.ts | 97 ++++++------------- 2 files changed, 31 insertions(+), 75 deletions(-) diff --git a/gateway/src/domain/providers/ollama-provider.ts b/gateway/src/domain/providers/ollama-provider.ts index 23df2cb..ee43e6d 100644 --- a/gateway/src/domain/providers/ollama-provider.ts +++ b/gateway/src/domain/providers/ollama-provider.ts @@ -31,23 +31,14 @@ interface OllamaResponse { export class OllamaProvider extends BaseProvider { readonly name = 'ollama'; - // Ollama exposes an OpenAI-compatible API at /v1 protected get baseUrl(): string { return getConfig().providers.ollama.baseUrl; } - // Ollama doesn't require an API key by default, but we return a - // dummy value so BaseProvider.isConfigured() stays true when the - // base URL is set. Users can optionally supply a real key if they - // put Ollama behind an auth proxy. protected get apiKey(): string | undefined { return getConfig().providers.ollama.apiKey || 'ollama'; } - /** - * Ollama is considered configured when the user has explicitly - * enabled it by setting OLLAMA_BASE_URL (even without an API key). - */ isConfigured(): boolean { return getConfig().providers.ollama.enabled; } diff --git a/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts b/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts index 6acf733..a692b5d 100644 --- a/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts +++ b/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts @@ -1,6 +1,6 @@ import { Response as ExpressResponse } from 'express'; import { logger } from '../utils/logger.js'; -import { AuthenticationError, ProviderError } from '../../shared/errors/index.js'; +import { ProviderError } from '../../shared/errors/index.js'; import { CONTENT_TYPES } from '../../domain/types/provider.js'; import { getConfig } from '../config/app-config.js'; import { ResponsesPassthrough, ResponsesPassthroughConfig } from './responses-passthrough.js'; @@ -10,39 +10,31 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough { constructor(private readonly config: ResponsesPassthroughConfig) {} private get baseUrl(): string { - return this.config.baseUrl; - } - - private get apiKey(): string | undefined { - const envVar = this.config.auth?.envVar; - if (envVar) { - const token = process.env[envVar]; - if (token) return token; + if (this.config.baseUrl) { + return this.config.baseUrl; } - - // Ollama doesn't require an API key by default (runs locally) - // Return undefined if no key is configured - return getConfig().providers.ollama.apiKey; + const configBaseUrl = getConfig().providers.ollama.baseUrl; + return configBaseUrl.replace(/\/v1\/?$/, '/v1/responses'); } - private buildAuthHeader(): string | undefined { - const token = this.apiKey; - if (!token) return undefined; // Ollama doesn't require auth by default - + private buildAuthHeader(): string { const { auth } = this.config; if (!auth) { - return `Bearer ${token}`; - } - - if (auth.template) { - return auth.template.replace('{{token}}', token); + return ''; } - if (auth.scheme) { - return `${auth.scheme} ${token}`.trim(); + const envVar = auth.envVar; + if (envVar) { + const token = process.env[envVar]; + if (token) { + if (auth.scheme) { + return `${auth.scheme} ${token}`.trim(); + } + return token; + } } - return token; + return ''; } private buildHeaders(): Record { @@ -51,23 +43,20 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough { ...this.config.staticHeaders, }; + const headerName = this.config.auth?.header ?? 'Authorization'; const authHeader = this.buildAuthHeader(); if (authHeader) { - const headerName = this.config.auth?.header ?? 'Authorization'; headers[headerName] = authHeader; } - return headers; } - // Store usage data for tracking private usage: { inputTokens?: number; outputTokens?: number; totalTokens?: number; } | null = null; - // Buffer to handle multi-chunk SSE events private eventBuffer: string = ''; private assistantResponseBuffer: string = ''; @@ -75,7 +64,7 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough { const response = await fetch(this.baseUrl, { method: 'POST', headers: this.buildHeaders(), - body: JSON.stringify({ ...body, stream, store: false }) // Not storing responses + body: JSON.stringify({ ...body, stream, store: false }) }); if (!response.ok) { @@ -88,24 +77,18 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough { private trackUsage(text: string, model: string, clientIp?: string): void { try { - // Add to buffer to handle multi-chunk events this.eventBuffer += text; - // Extract assistant response content from text.delta events const textDeltaMatch = /"type":"response\.text\.delta"[^}]*"text":"([^"]+)"/g; let match; while ((match = textDeltaMatch.exec(text)) !== null) { this.assistantResponseBuffer += match[1]; } - // Look for the exact response.completed event if (this.eventBuffer.includes('"type":"response.completed"')) { - - // Find the start of the JSON object const startIndex = this.eventBuffer.indexOf('{"type":"response.completed"'); if (startIndex === -1) return; - // Find the end by counting braces let braceCount = 0; let endIndex = -1; @@ -119,9 +102,8 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough { } } - if (endIndex === -1) return; // Incomplete JSON, wait for more chunks + if (endIndex === -1) return; - // Extract the complete JSON const jsonString = this.eventBuffer.substring(startIndex, endIndex + 1); logger.debug('JSON response found', { provider: 'ollama', operation: 'response_parsing', module: 'ollama-responses-passthrough' }); @@ -130,25 +112,18 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough { const data = JSON.parse(jsonString); logger.debug('Response parsed successfully', { provider: 'ollama', operation: 'usage_extraction', module: 'ollama-responses-passthrough' }); - // Extract usage data from response.usage if (data.response?.usage) { const usage = data.response.usage; - const totalInputTokens = usage.input_tokens || 0; - const cachedTokens = usage.input_tokens_details?.cached_tokens || 0; - const nonCachedInputTokens = totalInputTokens - cachedTokens; + const inputTokens = usage.input_tokens || 0; const outputTokens = usage.output_tokens || 0; - const totalTokens = usage.total_tokens || (totalInputTokens + outputTokens); - const reasoningTokens = usage.output_tokens_details?.reasoning_tokens || 0; + const totalTokens = usage.total_tokens || (inputTokens + outputTokens); logger.debug('Usage tracking from response', { provider: 'ollama', model, - totalInputTokens, - nonCachedInputTokens, - cachedTokens, + inputTokens, outputTokens, totalTokens, - reasoningTokens, module: 'ollama-responses-passthrough' }); @@ -156,10 +131,10 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough { usageTracker.trackUsage( model, 'ollama', - nonCachedInputTokens, + inputTokens, outputTokens, - cachedTokens, - 0, // cache read tokens + 0, + 0, clientIp ); }).catch((error) => { @@ -172,7 +147,6 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough { logger.error('JSON parse error', parseError, { provider: 'ollama', operation: 'response_parsing', module: 'ollama-responses-passthrough' }); } - // Clear buffer after processing this.eventBuffer = ''; } } catch (error) { @@ -181,7 +155,6 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough { } async handleDirectRequest(request: any, res: ExpressResponse, clientIp?: string): Promise { - // Reset usage tracking for new request this.usage = null; this.eventBuffer = ''; this.assistantResponseBuffer = ''; @@ -209,7 +182,6 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough { 'Access-Control-Allow-Origin': '*', }); - // Manual stream processing for usage tracking const reader = response.body!.getReader(); while (true) { @@ -236,33 +208,26 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough { const response = await this.makeRequest(request, false); const json = await response.json(); - // Track usage for non-streaming requests if (json.usage) { - const totalInputTokens = json.usage.input_tokens || 0; - const cachedTokens = json.usage.input_tokens_details?.cached_tokens || 0; - const nonCachedInputTokens = totalInputTokens - cachedTokens; + const inputTokens = json.usage.input_tokens || 0; const outputTokens = json.usage.output_tokens || 0; - const totalTokens = json.usage.total_tokens || (totalInputTokens + outputTokens); - const reasoningTokens = json.usage.output_tokens_details?.reasoning_tokens || 0; + const totalTokens = json.usage.total_tokens || (inputTokens + outputTokens); logger.debug('Tracking non-streaming usage', { provider: 'ollama', model: request.model, - totalInputTokens, - nonCachedInputTokens, - cachedTokens, + inputTokens, outputTokens, totalTokens, - reasoningTokens, module: 'ollama-responses-passthrough' }); import('../utils/usage-tracker.js').then(({ usageTracker }) => { - usageTracker.trackUsage(request.model, 'ollama', nonCachedInputTokens, outputTokens, cachedTokens, 0, clientIp); + usageTracker.trackUsage(request.model, 'ollama', inputTokens, outputTokens, 0, 0, clientIp); }).catch(() => {}); } - const assistantResponse = json?.output?.[0]?.content?.[0]?.text || ''; + const assistantResponse = json?.output?.[0]?.content?.[0]?.text || json?.output_text || ''; persistMemory(request, assistantResponse, { provider: this.config.provider, defaultUserId: 'default', From 6eaefb0f4e2ef762a0d667be678e007c719f9bee Mon Sep 17 00:00:00 2001 From: shrijam12 Date: Wed, 11 Feb 2026 01:52:50 +0530 Subject: [PATCH 4/5] feat: add multi-key management with priority-based selection Fixes #11. Users can store multiple API keys per provider with priority ordering. OAuth/subscription tokens are used first, then keys by priority, with automatic fallback. Keys are managed via REST API (GET/POST/DELETE /keys, PUT /keys/:id/priority). --- .gitignore | 3 + gateway/src/app/handlers/key-handler.ts | 95 +++++++ gateway/src/index.ts | 6 + .../src/infrastructure/auth/key-manager.ts | 83 ++++++ gateway/src/infrastructure/auth/key-store.ts | 126 +++++++++ .../chat-completions-passthrough.ts | 26 +- .../passthrough/messages-passthrough.ts | 27 +- .../openai-responses-passthrough.ts | 248 +++++++++--------- 8 files changed, 477 insertions(+), 137 deletions(-) create mode 100644 gateway/src/app/handlers/key-handler.ts create mode 100644 gateway/src/infrastructure/auth/key-manager.ts create mode 100644 gateway/src/infrastructure/auth/key-store.ts diff --git a/.gitignore b/.gitignore index 7ce81ac..ca84aea 100644 --- a/.gitignore +++ b/.gitignore @@ -63,6 +63,9 @@ Thumbs.db *.tmp *.temp +# Key and token storage +.ekai/ + # AI configurations CLAUDE.md AGENTS.md diff --git a/gateway/src/app/handlers/key-handler.ts b/gateway/src/app/handlers/key-handler.ts new file mode 100644 index 0000000..582e582 --- /dev/null +++ b/gateway/src/app/handlers/key-handler.ts @@ -0,0 +1,95 @@ +import { Request, Response } from 'express'; +import { addKey, removeKey, getAllKeys, updateKeyPriority, maskKey } from '../../infrastructure/auth/key-store.js'; +import { logger } from '../../infrastructure/utils/logger.js'; + +const VALID_PROVIDERS = ['anthropic', 'openai', 'openrouter', 'xai', 'zai', 'google', 'ollama']; + +export async function handleListKeys(req: Request, res: Response): Promise { + try { + const keys = getAllKeys().map(k => ({ + id: k.id, + provider: k.provider, + label: k.label, + maskedKey: maskKey(k.key), + priority: k.priority, + source: k.source, + addedAt: k.addedAt, + })); + res.json({ keys }); + } catch (error) { + logger.error('Failed to list keys', error, { module: 'key-handler' }); + res.status(500).json({ error: 'Failed to list keys' }); + } +} + +export async function handleAddKey(req: Request, res: Response): Promise { + try { + const { provider, key, label, priority } = req.body; + + if (!provider || !key) { + res.status(400).json({ error: 'provider and key are required' }); + return; + } + + if (!VALID_PROVIDERS.includes(provider)) { + res.status(400).json({ error: `Invalid provider. Use one of: ${VALID_PROVIDERS.join(', ')}` }); + return; + } + + if (typeof key !== 'string' || key.trim().length === 0) { + res.status(400).json({ error: 'key must be a non-empty string' }); + return; + } + + const stored = addKey(provider, key.trim(), label, priority); + res.status(201).json({ + id: stored.id, + provider: stored.provider, + label: stored.label, + maskedKey: maskKey(stored.key), + priority: stored.priority, + source: stored.source, + addedAt: stored.addedAt, + }); + } catch (error) { + logger.error('Failed to add key', error, { module: 'key-handler' }); + res.status(500).json({ error: 'Failed to add key' }); + } +} + +export async function handleRemoveKey(req: Request, res: Response): Promise { + try { + const { id } = req.params; + const removed = removeKey(id); + if (!removed) { + res.status(404).json({ error: 'Key not found' }); + return; + } + res.json({ status: 'removed', id }); + } catch (error) { + logger.error('Failed to remove key', error, { module: 'key-handler' }); + res.status(500).json({ error: 'Failed to remove key' }); + } +} + +export async function handleUpdateKeyPriority(req: Request, res: Response): Promise { + try { + const { id } = req.params; + const { priority } = req.body; + + if (typeof priority !== 'number' || priority < 0) { + res.status(400).json({ error: 'priority must be a non-negative number' }); + return; + } + + const updated = updateKeyPriority(id, priority); + if (!updated) { + res.status(404).json({ error: 'Key not found' }); + return; + } + res.json({ status: 'updated', id, priority }); + } catch (error) { + logger.error('Failed to update key priority', error, { module: 'key-handler' }); + res.status(500).json({ error: 'Failed to update key priority' }); + } +} diff --git a/gateway/src/index.ts b/gateway/src/index.ts index 863a5f9..6042097 100644 --- a/gateway/src/index.ts +++ b/gateway/src/index.ts @@ -32,6 +32,7 @@ import { handleUsageRequest } from './app/handlers/usage-handler.js'; import { handleConfigStatus } from './app/handlers/config-handler.js'; import { handleModelsRequest } from './app/handlers/models-handler.js'; import { handleGetBudget, handleUpdateBudget } from './app/handlers/budget-handler.js'; +import { handleListKeys, handleAddKey, handleRemoveKey, handleUpdateKeyPriority } from './app/handlers/key-handler.js'; import { logger } from './infrastructure/utils/logger.js'; import { requestContext } from './infrastructure/middleware/request-context.js'; import { requestLogging } from './infrastructure/middleware/request-logging.js'; @@ -92,6 +93,11 @@ async function bootstrap(): Promise { app.get('/budget', handleGetBudget); app.put('/budget', handleUpdateBudget); + app.get('/keys', handleListKeys); + app.post('/keys', handleAddKey); + app.delete('/keys/:id', handleRemoveKey); + app.put('/keys/:id/priority', handleUpdateKeyPriority); + // Error handler MUST be last middleware app.use(errorHandler); diff --git a/gateway/src/infrastructure/auth/key-manager.ts b/gateway/src/infrastructure/auth/key-manager.ts new file mode 100644 index 0000000..03ccbff --- /dev/null +++ b/gateway/src/infrastructure/auth/key-manager.ts @@ -0,0 +1,83 @@ +import { getKeysForProvider, seedFromEnv, StoredKey } from './key-store.js'; +import { logger } from '../utils/logger.js'; + +const ENV_MAP: Record = { + anthropic: 'ANTHROPIC_API_KEY', + openai: 'OPENAI_API_KEY', + openrouter: 'OPENROUTER_API_KEY', + xai: 'XAI_API_KEY', + zai: 'ZAI_API_KEY', + google: 'GOOGLE_API_KEY', + ollama: 'OLLAMA_API_KEY', +}; + +const exhaustedKeys = new Map(); +const EXHAUSTED_COOLDOWN_MS = 300000; + +let seeded = false; + +function seedAllFromEnv(): void { + if (seeded) return; + seeded = true; + for (const [provider, envVar] of Object.entries(ENV_MAP)) { + seedFromEnv(provider, envVar); + } +} + +export function markKeyExhausted(provider: string, keyId: string): void { + exhaustedKeys.set(`${provider}:${keyId}`, Date.now()); + logger.info('Key marked exhausted', { provider, keyId, cooldownMs: EXHAUSTED_COOLDOWN_MS, module: 'key-manager' }); +} + +function isKeyExhausted(provider: string, keyId: string): boolean { + const ts = exhaustedKeys.get(`${provider}:${keyId}`); + if (!ts) return false; + if (Date.now() - ts > EXHAUSTED_COOLDOWN_MS) { + exhaustedKeys.delete(`${provider}:${keyId}`); + return false; + } + return true; +} + +export async function resolveKeyForProvider(provider: string): Promise { + seedAllFromEnv(); + + if (provider === 'openai' || provider === 'anthropic') { + try { + const oauthPath = new URL('./oauth-service.js', import.meta.url).href; + const mod: any = await import(/* @vite-ignore */ oauthPath).catch(() => null); + if (mod?.getValidAccessToken) { + const oauthToken = await mod.getValidAccessToken(provider); + if (oauthToken) { + logger.debug('Using OAuth token', { provider, module: 'key-manager' }); + return oauthToken; + } + } + } catch {} + } + + const keys = getKeysForProvider(provider); + for (const key of keys) { + if (!isKeyExhausted(provider, key.id)) { + logger.debug('Using key', { provider, label: key.label, priority: key.priority, module: 'key-manager' }); + return key.key; + } + } + + if (keys.length > 0) { + logger.warn('All keys exhausted, using first key as fallback', { provider, module: 'key-manager' }); + return keys[0].key; + } + + return undefined; +} + +export function getKeyCountForProvider(provider: string): number { + seedAllFromEnv(); + return getKeysForProvider(provider).length; +} + +export function hasAnyKeyForProvider(provider: string): boolean { + seedAllFromEnv(); + return getKeysForProvider(provider).length > 0; +} diff --git a/gateway/src/infrastructure/auth/key-store.ts b/gateway/src/infrastructure/auth/key-store.ts new file mode 100644 index 0000000..93f7164 --- /dev/null +++ b/gateway/src/infrastructure/auth/key-store.ts @@ -0,0 +1,126 @@ +import fs from 'fs'; +import path from 'path'; +import crypto from 'crypto'; +import { logger } from '../utils/logger.js'; + +export interface StoredKey { + id: string; + provider: string; + label: string; + key: string; + priority: number; + source: 'manual' | 'env'; + addedAt: string; +} + +interface KeyStoreData { + keys: StoredKey[]; +} + +const STORE_DIR = path.join(process.cwd(), '.ekai'); +const STORE_PATH = path.join(STORE_DIR, 'keys.json'); + +function ensureStoreDir(): void { + if (!fs.existsSync(STORE_DIR)) { + fs.mkdirSync(STORE_DIR, { recursive: true }); + } +} + +function readStore(): KeyStoreData { + try { + if (fs.existsSync(STORE_PATH)) { + return JSON.parse(fs.readFileSync(STORE_PATH, 'utf-8')); + } + } catch (error) { + logger.error('Failed to read key store', error, { module: 'key-store' }); + } + return { keys: [] }; +} + +function writeStore(data: KeyStoreData): void { + ensureStoreDir(); + fs.writeFileSync(STORE_PATH, JSON.stringify(data, null, 2), { mode: 0o600 }); +} + +export function addKey(provider: string, key: string, label?: string, priority?: number): StoredKey { + const store = readStore(); + const existing = store.keys.find(k => k.provider === provider && k.key === key); + if (existing) return existing; + + const maxPriority = store.keys + .filter(k => k.provider === provider) + .reduce((max, k) => Math.max(max, k.priority), 0); + + const entry: StoredKey = { + id: crypto.randomUUID(), + provider, + label: label || `${provider}-key-${store.keys.filter(k => k.provider === provider).length + 1}`, + key, + priority: priority ?? maxPriority + 1, + source: 'manual', + addedAt: new Date().toISOString(), + }; + + store.keys.push(entry); + writeStore(store); + logger.info('Key added', { provider, label: entry.label, module: 'key-store' }); + return entry; +} + +export function removeKey(id: string): boolean { + const store = readStore(); + const idx = store.keys.findIndex(k => k.id === id); + if (idx === -1) return false; + const removed = store.keys.splice(idx, 1)[0]; + writeStore(store); + logger.info('Key removed', { provider: removed.provider, label: removed.label, module: 'key-store' }); + return true; +} + +export function getKeysForProvider(provider: string): StoredKey[] { + const store = readStore(); + return store.keys + .filter(k => k.provider === provider) + .sort((a, b) => a.priority - b.priority); +} + +export function getAllKeys(): StoredKey[] { + return readStore().keys.sort((a, b) => { + if (a.provider !== b.provider) return a.provider.localeCompare(b.provider); + return a.priority - b.priority; + }); +} + +export function updateKeyPriority(id: string, priority: number): boolean { + const store = readStore(); + const key = store.keys.find(k => k.id === id); + if (!key) return false; + key.priority = priority; + writeStore(store); + return true; +} + +export function maskKey(key: string): string { + if (key.length <= 8) return '****'; + return key.slice(0, 4) + '...' + key.slice(-4); +} + +export function seedFromEnv(provider: string, envVar: string): void { + const value = process.env[envVar]; + if (!value) return; + const store = readStore(); + const exists = store.keys.some(k => k.provider === provider && k.key === value); + if (exists) return; + + const entry: StoredKey = { + id: crypto.randomUUID(), + provider, + label: `${provider}-env`, + key: value, + priority: 999, + source: 'env', + addedAt: new Date().toISOString(), + }; + store.keys.push(entry); + writeStore(store); +} diff --git a/gateway/src/infrastructure/passthrough/chat-completions-passthrough.ts b/gateway/src/infrastructure/passthrough/chat-completions-passthrough.ts index a7e06b7..6e48edd 100644 --- a/gateway/src/infrastructure/passthrough/chat-completions-passthrough.ts +++ b/gateway/src/infrastructure/passthrough/chat-completions-passthrough.ts @@ -118,7 +118,10 @@ export class ChatCompletionsPassthrough { } } + private resolvedKey: string | undefined; + private get apiKey(): string | undefined { + if (this.resolvedKey) return this.resolvedKey; const auth = this.config.auth; if (!auth) return undefined; const token = process.env[auth.envVar]; @@ -130,10 +133,13 @@ export class ChatCompletionsPassthrough { private buildAuthHeader(): string | undefined { const auth = this.config.auth; - if (!auth) return undefined; + const token = this.resolvedKey || this.apiKey; + if (!token) return undefined; + if (this.resolvedKey) return `Bearer ${this.resolvedKey}`; + + if (!auth) return undefined; const { scheme, template } = auth; - const token = this.apiKey!; if (template) { return template.replace('{{token}}', token); @@ -146,7 +152,7 @@ export class ChatCompletionsPassthrough { return token; } - private buildHeaders(): Record { + private async buildHeaders(): Promise> { const headers: Record = { 'Content-Type': 'application/json', }; @@ -155,9 +161,17 @@ export class ChatCompletionsPassthrough { Object.assign(headers, this.config.staticHeaders); } + try { + const { resolveKeyForProvider } = await import('../auth/key-manager.js'); + this.resolvedKey = await resolveKeyForProvider(this.config.provider); + } catch { + this.resolvedKey = undefined; + } + const authHeader = this.buildAuthHeader(); - if (authHeader && this.config.auth) { - headers[this.config.auth.header] = authHeader; + if (authHeader) { + const headerName = this.config.auth?.header ?? 'Authorization'; + headers[headerName] = authHeader; } return headers; @@ -214,7 +228,7 @@ export class ChatCompletionsPassthrough { try { response = await fetchFunction(this.config.baseUrl, { method: 'POST', - headers: this.buildHeaders(), + headers: await this.buildHeaders(), body: JSON.stringify(this.buildPayload(body, stream)), }); } catch (error) { diff --git a/gateway/src/infrastructure/passthrough/messages-passthrough.ts b/gateway/src/infrastructure/passthrough/messages-passthrough.ts index 8d21764..3b0f3f9 100644 --- a/gateway/src/infrastructure/passthrough/messages-passthrough.ts +++ b/gateway/src/infrastructure/passthrough/messages-passthrough.ts @@ -100,7 +100,10 @@ export class MessagesPassthrough { return this.config.baseUrl; } + private resolvedKey: string | undefined; + private get apiKey(): string | undefined { + if (this.resolvedKey) return this.resolvedKey; if (!this.config.auth) return undefined; const envVar = this.config.auth.envVar; const token = process.env[envVar]; @@ -111,11 +114,13 @@ export class MessagesPassthrough { } private buildAuthHeader(): string | undefined { + const token = this.resolvedKey || this.apiKey; + if (!token) return undefined; + + if (this.resolvedKey) return `Bearer ${this.resolvedKey}`; + if (!this.config.auth) return undefined; const { scheme, template } = this.config.auth; - const token = this.apiKey; - - if (!token) return undefined; if (template) { return template.replace('{{token}}', token); @@ -128,14 +133,22 @@ export class MessagesPassthrough { return token; } - private buildHeaders(): Record { + private async buildHeaders(): Promise> { const headers: Record = { 'Content-Type': 'application/json', ...this.config.staticHeaders, }; - // Only add auth header if auth is configured (not x402 mode) - if (this.config.auth) { + try { + const { resolveKeyForProvider } = await import('../auth/key-manager.js'); + this.resolvedKey = await resolveKeyForProvider(this.config.provider); + } catch { + this.resolvedKey = undefined; + } + + if (this.resolvedKey && this.config.provider === 'anthropic') { + headers['x-api-key'] = this.resolvedKey; + } else if (this.config.auth) { const authHeader = this.buildAuthHeader(); if (authHeader) { headers[this.config.auth.header] = authHeader; @@ -284,7 +297,7 @@ export class MessagesPassthrough { response = await fetchFunction(this.resolveBaseUrl(), { method: 'POST', - headers: this.buildHeaders(), + headers: await this.buildHeaders(), body: payloadJson, }); } catch (error) { diff --git a/gateway/src/infrastructure/passthrough/openai-responses-passthrough.ts b/gateway/src/infrastructure/passthrough/openai-responses-passthrough.ts index aa6d3e1..522e8af 100644 --- a/gateway/src/infrastructure/passthrough/openai-responses-passthrough.ts +++ b/gateway/src/infrastructure/passthrough/openai-responses-passthrough.ts @@ -1,59 +1,59 @@ -import { Response as ExpressResponse } from 'express'; -import { logger } from '../utils/logger.js'; -import { AuthenticationError, ProviderError } from '../../shared/errors/index.js'; -import { CONTENT_TYPES } from '../../domain/types/provider.js'; -import { getConfig } from '../config/app-config.js'; -import { ResponsesPassthrough, ResponsesPassthroughConfig } from './responses-passthrough.js'; -import { injectMemoryContext, persistMemory } from '../memory/memory-helper.js'; - -export class OpenAIResponsesPassthrough implements ResponsesPassthrough { - constructor(private readonly config: ResponsesPassthroughConfig) {} - - private get baseUrl(): string { - return this.config.baseUrl; - } - - private get apiKey(): string { - const envVar = this.config.auth?.envVar; - if (envVar) { - const token = process.env[envVar]; - if (token) return token; - } - - const fallback = getConfig().providers.openai.apiKey; - if (fallback) return fallback; - - throw new AuthenticationError('OpenAI API key not configured', { provider: this.config.provider }); - } - - private buildAuthHeader(): string { - const token = this.apiKey; - const { auth } = this.config; - if (!auth) { - return `Bearer ${token}`; - } - - if (auth.template) { - return auth.template.replace('{{token}}', token); - } - - if (auth.scheme) { - return `${auth.scheme} ${token}`.trim(); - } - - return token; - } - - private buildHeaders(): Record { - const headers: Record = { - 'Content-Type': 'application/json', - ...this.config.staticHeaders, - }; - - const headerName = this.config.auth?.header ?? 'Authorization'; - headers[headerName] = this.buildAuthHeader(); - return headers; - } +import { Response as ExpressResponse } from 'express'; +import { logger } from '../utils/logger.js'; +import { AuthenticationError, ProviderError } from '../../shared/errors/index.js'; +import { CONTENT_TYPES } from '../../domain/types/provider.js'; +import { getConfig } from '../config/app-config.js'; +import { ResponsesPassthrough, ResponsesPassthroughConfig } from './responses-passthrough.js'; +import { injectMemoryContext, persistMemory } from '../memory/memory-helper.js'; + +export class OpenAIResponsesPassthrough implements ResponsesPassthrough { + constructor(private readonly config: ResponsesPassthroughConfig) {} + + private get baseUrl(): string { + return this.config.baseUrl; + } + + private resolvedKey: string | undefined; + + private get apiKey(): string { + if (this.resolvedKey) return this.resolvedKey; + const envVar = this.config.auth?.envVar; + if (envVar) { + const token = process.env[envVar]; + if (token) return token; + } + const fallback = getConfig().providers.openai.apiKey; + if (fallback) return fallback; + throw new AuthenticationError('OpenAI API key not configured', { provider: this.config.provider }); + } + + private buildAuthHeader(): string { + if (this.resolvedKey) return `Bearer ${this.resolvedKey}`; + const token = this.apiKey; + const { auth } = this.config; + if (!auth) return `Bearer ${token}`; + if (auth.template) return auth.template.replace('{{token}}', token); + if (auth.scheme) return `${auth.scheme} ${token}`.trim(); + return token; + } + + private async buildHeaders(): Promise> { + const headers: Record = { + 'Content-Type': 'application/json', + ...this.config.staticHeaders, + }; + + try { + const { resolveKeyForProvider } = await import('../auth/key-manager.js'); + this.resolvedKey = await resolveKeyForProvider(this.config.provider); + } catch { + this.resolvedKey = undefined; + } + + const headerName = this.config.auth?.header ?? 'Authorization'; + headers[headerName] = this.buildAuthHeader(); + return headers; + } // Store usage data for tracking private usage: { @@ -63,15 +63,15 @@ export class OpenAIResponsesPassthrough implements ResponsesPassthrough { } | null = null; // Buffer to handle multi-chunk SSE events - private eventBuffer: string = ''; - private assistantResponseBuffer: string = ''; + private eventBuffer: string = ''; + private assistantResponseBuffer: string = ''; private async makeRequest(body: any, stream: boolean): Promise { - const response = await fetch(this.baseUrl, { - method: 'POST', - headers: this.buildHeaders(), - body: JSON.stringify({ ...body, stream, store: false }) // Not storing responses - }); + const response = await fetch(this.baseUrl, { + method: 'POST', + headers: await this.buildHeaders(), + body: JSON.stringify({ ...body, stream, store: false }), + }); if (!response.ok) { const errorText = await response.text(); @@ -81,17 +81,17 @@ export class OpenAIResponsesPassthrough implements ResponsesPassthrough { return response; } - private trackUsage(text: string, model: string, clientIp?: string): void { - try { - // Add to buffer to handle multi-chunk events - this.eventBuffer += text; - - // Extract assistant response content from text.delta events - const textDeltaMatch = /"type":"response\.text\.delta"[^}]*"text":"([^"]+)"/g; - let match; - while ((match = textDeltaMatch.exec(text)) !== null) { - this.assistantResponseBuffer += match[1]; - } + private trackUsage(text: string, model: string, clientIp?: string): void { + try { + // Add to buffer to handle multi-chunk events + this.eventBuffer += text; + + // Extract assistant response content from text.delta events + const textDeltaMatch = /"type":"response\.text\.delta"[^}]*"text":"([^"]+)"/g; + let match; + while ((match = textDeltaMatch.exec(text)) !== null) { + this.assistantResponseBuffer += match[1]; + } // Look for the exact response.completed event if (this.eventBuffer.includes('"type":"response.completed"')) { @@ -178,22 +178,22 @@ export class OpenAIResponsesPassthrough implements ResponsesPassthrough { async handleDirectRequest(request: any, res: ExpressResponse, clientIp?: string): Promise { // Reset usage tracking for new request - this.usage = null; - this.eventBuffer = ''; - this.assistantResponseBuffer = ''; - - injectMemoryContext(request, { - provider: this.config.provider, - defaultUserId: 'default', - extractCurrentUserInputs: req => extractResponsesUserInputs(req), - applyMemoryContext: (req, context) => { - if (req.instructions) { - req.instructions = `${context}\n\n---\n\n${req.instructions}`; - } else { - req.instructions = context; - } - } - }); + this.usage = null; + this.eventBuffer = ''; + this.assistantResponseBuffer = ''; + + injectMemoryContext(request, { + provider: this.config.provider, + defaultUserId: 'default', + extractCurrentUserInputs: req => extractResponsesUserInputs(req), + applyMemoryContext: (req, context) => { + if (req.instructions) { + req.instructions = `${context}\n\n---\n\n${req.instructions}`; + } else { + req.instructions = context; + } + } + }); if (request.stream) { const response = await this.makeRequest(request, true); @@ -217,20 +217,20 @@ export class OpenAIResponsesPassthrough implements ResponsesPassthrough { res.write(value); } - res.end(); - - persistMemory(request, this.assistantResponseBuffer, { - provider: this.config.provider, - defaultUserId: 'default', - extractUserContent: req => req.input || '', - metadataBuilder: req => ({ - model: req.model, - provider: this.config.provider, - }), - }); - } else { - const response = await this.makeRequest(request, false); - const json = await response.json(); + res.end(); + + persistMemory(request, this.assistantResponseBuffer, { + provider: this.config.provider, + defaultUserId: 'default', + extractUserContent: req => req.input || '', + metadataBuilder: req => ({ + model: req.model, + provider: this.config.provider, + }), + }); + } else { + const response = await this.makeRequest(request, false); + const json = await response.json(); // Track usage for non-streaming requests if (json.usage) { @@ -258,23 +258,23 @@ export class OpenAIResponsesPassthrough implements ResponsesPassthrough { }).catch(() => {}); } - const assistantResponse = json?.output?.[0]?.content?.[0]?.text || ''; - persistMemory(request, assistantResponse, { - provider: this.config.provider, - defaultUserId: 'default', - extractUserContent: req => req.input || '', - metadataBuilder: req => ({ - model: req.model, - provider: this.config.provider, - }), - }); - - res.json(json); - } - } -} - -function extractResponsesUserInputs(request: any): string[] { - const content = (request.input || '').trim(); - return content ? [content] : []; -} + const assistantResponse = json?.output?.[0]?.content?.[0]?.text || ''; + persistMemory(request, assistantResponse, { + provider: this.config.provider, + defaultUserId: 'default', + extractUserContent: req => req.input || '', + metadataBuilder: req => ({ + model: req.model, + provider: this.config.provider, + }), + }); + + res.json(json); + } + } +} + +function extractResponsesUserInputs(request: any): string[] { + const content = (request.input || '').trim(); + return content ? [content] : []; +} From 7b2d69b45f85d036eac6adb2adae23d95c93e576 Mon Sep 17 00:00:00 2001 From: Shashank <13179671+sm86@users.noreply.github.com> Date: Fri, 6 Mar 2026 02:02:23 -0500 Subject: [PATCH 5/5] Update README.md link to new work https://github.com/ekailabs/contexto --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index a71413f..baf8f5b 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,10 @@ [![GitHub stars](https://img.shields.io/github/stars/ekailabs/ekai-gateway.svg?style=social)](https://github.com/ekailabs/ekai-gateway) [![Discord](https://img.shields.io/badge/Discord-Join%20Server-7289da?logo=discord&logoColor=white)](https://discord.com/invite/5VsUUEfbJk) +Archieved Repo: +For our latest work, check https://github.com/ekailabs/contexto + + Multi-provider AI proxy with usage dashboard supporting Anthropic, OpenAI, Google Gemini, xAI, and OpenRouter models through OpenAI-compatible and Anthropic-compatible APIs. **Designed for self-hosted personal use** - run your own instance to securely proxy AI requests using your API keys.