diff --git a/.env.example b/.env.example index 863f578..9ed94d9 100644 --- a/.env.example +++ b/.env.example @@ -16,7 +16,6 @@ GOOGLE_API_KEY=your_key_here # Memory is embedded in the OpenRouter process (no separate service). # MEMORY_DB_PATH=./memory.db # SQLite path for memory store (used by OpenRouter) - # Optional x402 passthrough for OpenRouter access X402_BASE_URL=x402_supported_provider_url PRIVATE_KEY= diff --git a/.gitignore b/.gitignore index 569c4a6..e168151 100644 --- a/.gitignore +++ b/.gitignore @@ -64,6 +64,9 @@ Thumbs.db *.tmp *.temp +# Key and token storage +.ekai/ + # AI configurations CLAUDE.md AGENTS.md diff --git a/gateway/src/app/handlers/key-handler.ts b/gateway/src/app/handlers/key-handler.ts new file mode 100644 index 0000000..582e582 --- /dev/null +++ b/gateway/src/app/handlers/key-handler.ts @@ -0,0 +1,95 @@ +import { Request, Response } from 'express'; +import { addKey, removeKey, getAllKeys, updateKeyPriority, maskKey } from '../../infrastructure/auth/key-store.js'; +import { logger } from '../../infrastructure/utils/logger.js'; + +const VALID_PROVIDERS = ['anthropic', 'openai', 'openrouter', 'xai', 'zai', 'google', 'ollama']; + +export async function handleListKeys(req: Request, res: Response): Promise { + try { + const keys = getAllKeys().map(k => ({ + id: k.id, + provider: k.provider, + label: k.label, + maskedKey: maskKey(k.key), + priority: k.priority, + source: k.source, + addedAt: k.addedAt, + })); + res.json({ keys }); + } catch (error) { + logger.error('Failed to list keys', error, { module: 'key-handler' }); + res.status(500).json({ error: 'Failed to list keys' }); + } +} + +export async function handleAddKey(req: Request, res: Response): Promise { + try { + const { provider, key, label, priority } = req.body; + + if (!provider || !key) { + res.status(400).json({ error: 'provider and key are required' }); + return; + } + + if (!VALID_PROVIDERS.includes(provider)) { + res.status(400).json({ error: `Invalid provider. Use one of: ${VALID_PROVIDERS.join(', ')}` }); + return; + } + + if (typeof key !== 'string' || key.trim().length === 0) { + res.status(400).json({ error: 'key must be a non-empty string' }); + return; + } + + const stored = addKey(provider, key.trim(), label, priority); + res.status(201).json({ + id: stored.id, + provider: stored.provider, + label: stored.label, + maskedKey: maskKey(stored.key), + priority: stored.priority, + source: stored.source, + addedAt: stored.addedAt, + }); + } catch (error) { + logger.error('Failed to add key', error, { module: 'key-handler' }); + res.status(500).json({ error: 'Failed to add key' }); + } +} + +export async function handleRemoveKey(req: Request, res: Response): Promise { + try { + const { id } = req.params; + const removed = removeKey(id); + if (!removed) { + res.status(404).json({ error: 'Key not found' }); + return; + } + res.json({ status: 'removed', id }); + } catch (error) { + logger.error('Failed to remove key', error, { module: 'key-handler' }); + res.status(500).json({ error: 'Failed to remove key' }); + } +} + +export async function handleUpdateKeyPriority(req: Request, res: Response): Promise { + try { + const { id } = req.params; + const { priority } = req.body; + + if (typeof priority !== 'number' || priority < 0) { + res.status(400).json({ error: 'priority must be a non-negative number' }); + return; + } + + const updated = updateKeyPriority(id, priority); + if (!updated) { + res.status(404).json({ error: 'Key not found' }); + return; + } + res.json({ status: 'updated', id, priority }); + } catch (error) { + logger.error('Failed to update key priority', error, { module: 'key-handler' }); + res.status(500).json({ error: 'Failed to update key priority' }); + } +} diff --git a/gateway/src/costs/ollama.yaml b/gateway/src/costs/ollama.yaml new file mode 100644 index 0000000..2d7bb29 --- /dev/null +++ b/gateway/src/costs/ollama.yaml @@ -0,0 +1,59 @@ +provider: "ollama" +currency: "USD" +unit: "MTok" +models: + # Ollama runs models locally — all costs are zero. + # Users may add custom model entries here if needed. + llama3.3: + input: 0.00 + output: 0.00 + llama3.2: + input: 0.00 + output: 0.00 + llama3.1: + input: 0.00 + output: 0.00 + llama3: + input: 0.00 + output: 0.00 + gemma3: + input: 0.00 + output: 0.00 + gemma2: + input: 0.00 + output: 0.00 + qwen3: + input: 0.00 + output: 0.00 + qwen2.5-coder: + input: 0.00 + output: 0.00 + deepseek-r1: + input: 0.00 + output: 0.00 + deepseek-coder-v2: + input: 0.00 + output: 0.00 + phi4: + input: 0.00 + output: 0.00 + phi3: + input: 0.00 + output: 0.00 + mistral: + input: 0.00 + output: 0.00 + mixtral: + input: 0.00 + output: 0.00 + codellama: + input: 0.00 + output: 0.00 + starcoder2: + input: 0.00 + output: 0.00 +metadata: + last_updated: "2026-02-03" + source: "https://ollama.com" + notes: "Ollama runs models locally. All API costs are zero — hardware costs are borne by the user." + version: "1.0" diff --git a/gateway/src/domain/providers/ollama-provider.ts b/gateway/src/domain/providers/ollama-provider.ts new file mode 100644 index 0000000..ee43e6d --- /dev/null +++ b/gateway/src/domain/providers/ollama-provider.ts @@ -0,0 +1,95 @@ +import { BaseProvider } from './base-provider.js'; +import { CanonicalRequest, CanonicalResponse } from 'shared/types/index.js'; +import { getConfig } from '../../infrastructure/config/app-config.js'; + +interface OllamaRequest { + model: string; + messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string; }>; + max_tokens?: number; + temperature?: number; + stream?: boolean; + stop?: string | string[]; +} + +interface OllamaResponse { + id: string; + object: string; + created: number; + model: string; + choices: Array<{ + index: number; + message: { role: string; content: string; }; + finish_reason: string; + }>; + usage: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + }; +} + +export class OllamaProvider extends BaseProvider { + readonly name = 'ollama'; + + protected get baseUrl(): string { + return getConfig().providers.ollama.baseUrl; + } + + protected get apiKey(): string | undefined { + return getConfig().providers.ollama.apiKey || 'ollama'; + } + + isConfigured(): boolean { + return getConfig().providers.ollama.enabled; + } + + protected transformRequest(request: CanonicalRequest): OllamaRequest { + const messages = request.messages.map(msg => ({ + role: msg.role, + content: msg.content + .filter(c => c.type === 'text') + .map(c => c.text) + .join('') + })); + + return { + model: request.model, + messages, + max_tokens: request.maxTokens, + temperature: request.temperature, + stream: request.stream || false, + stop: request.stopSequences + }; + } + + protected transformResponse(response: OllamaResponse): CanonicalResponse { + const choice = response.choices[0]; + + return { + id: response.id, + model: response.model, + created: response.created, + message: { + role: 'assistant', + content: [{ + type: 'text', + text: choice.message.content + }] + }, + finishReason: this.mapFinishReason(choice.finish_reason), + usage: { + inputTokens: response.usage?.prompt_tokens ?? 0, + outputTokens: response.usage?.completion_tokens ?? 0, + totalTokens: response.usage?.total_tokens ?? 0 + } + }; + } + + private mapFinishReason(reason: string): 'stop' | 'length' | 'tool_calls' | 'error' { + switch (reason) { + case 'stop': return 'stop'; + case 'length': return 'length'; + default: return 'stop'; + } + } +} diff --git a/gateway/src/infrastructure/auth/key-manager.ts b/gateway/src/infrastructure/auth/key-manager.ts new file mode 100644 index 0000000..03ccbff --- /dev/null +++ b/gateway/src/infrastructure/auth/key-manager.ts @@ -0,0 +1,83 @@ +import { getKeysForProvider, seedFromEnv, StoredKey } from './key-store.js'; +import { logger } from '../utils/logger.js'; + +const ENV_MAP: Record = { + anthropic: 'ANTHROPIC_API_KEY', + openai: 'OPENAI_API_KEY', + openrouter: 'OPENROUTER_API_KEY', + xai: 'XAI_API_KEY', + zai: 'ZAI_API_KEY', + google: 'GOOGLE_API_KEY', + ollama: 'OLLAMA_API_KEY', +}; + +const exhaustedKeys = new Map(); +const EXHAUSTED_COOLDOWN_MS = 300000; + +let seeded = false; + +function seedAllFromEnv(): void { + if (seeded) return; + seeded = true; + for (const [provider, envVar] of Object.entries(ENV_MAP)) { + seedFromEnv(provider, envVar); + } +} + +export function markKeyExhausted(provider: string, keyId: string): void { + exhaustedKeys.set(`${provider}:${keyId}`, Date.now()); + logger.info('Key marked exhausted', { provider, keyId, cooldownMs: EXHAUSTED_COOLDOWN_MS, module: 'key-manager' }); +} + +function isKeyExhausted(provider: string, keyId: string): boolean { + const ts = exhaustedKeys.get(`${provider}:${keyId}`); + if (!ts) return false; + if (Date.now() - ts > EXHAUSTED_COOLDOWN_MS) { + exhaustedKeys.delete(`${provider}:${keyId}`); + return false; + } + return true; +} + +export async function resolveKeyForProvider(provider: string): Promise { + seedAllFromEnv(); + + if (provider === 'openai' || provider === 'anthropic') { + try { + const oauthPath = new URL('./oauth-service.js', import.meta.url).href; + const mod: any = await import(/* @vite-ignore */ oauthPath).catch(() => null); + if (mod?.getValidAccessToken) { + const oauthToken = await mod.getValidAccessToken(provider); + if (oauthToken) { + logger.debug('Using OAuth token', { provider, module: 'key-manager' }); + return oauthToken; + } + } + } catch {} + } + + const keys = getKeysForProvider(provider); + for (const key of keys) { + if (!isKeyExhausted(provider, key.id)) { + logger.debug('Using key', { provider, label: key.label, priority: key.priority, module: 'key-manager' }); + return key.key; + } + } + + if (keys.length > 0) { + logger.warn('All keys exhausted, using first key as fallback', { provider, module: 'key-manager' }); + return keys[0].key; + } + + return undefined; +} + +export function getKeyCountForProvider(provider: string): number { + seedAllFromEnv(); + return getKeysForProvider(provider).length; +} + +export function hasAnyKeyForProvider(provider: string): boolean { + seedAllFromEnv(); + return getKeysForProvider(provider).length > 0; +} diff --git a/gateway/src/infrastructure/auth/key-store.ts b/gateway/src/infrastructure/auth/key-store.ts new file mode 100644 index 0000000..93f7164 --- /dev/null +++ b/gateway/src/infrastructure/auth/key-store.ts @@ -0,0 +1,126 @@ +import fs from 'fs'; +import path from 'path'; +import crypto from 'crypto'; +import { logger } from '../utils/logger.js'; + +export interface StoredKey { + id: string; + provider: string; + label: string; + key: string; + priority: number; + source: 'manual' | 'env'; + addedAt: string; +} + +interface KeyStoreData { + keys: StoredKey[]; +} + +const STORE_DIR = path.join(process.cwd(), '.ekai'); +const STORE_PATH = path.join(STORE_DIR, 'keys.json'); + +function ensureStoreDir(): void { + if (!fs.existsSync(STORE_DIR)) { + fs.mkdirSync(STORE_DIR, { recursive: true }); + } +} + +function readStore(): KeyStoreData { + try { + if (fs.existsSync(STORE_PATH)) { + return JSON.parse(fs.readFileSync(STORE_PATH, 'utf-8')); + } + } catch (error) { + logger.error('Failed to read key store', error, { module: 'key-store' }); + } + return { keys: [] }; +} + +function writeStore(data: KeyStoreData): void { + ensureStoreDir(); + fs.writeFileSync(STORE_PATH, JSON.stringify(data, null, 2), { mode: 0o600 }); +} + +export function addKey(provider: string, key: string, label?: string, priority?: number): StoredKey { + const store = readStore(); + const existing = store.keys.find(k => k.provider === provider && k.key === key); + if (existing) return existing; + + const maxPriority = store.keys + .filter(k => k.provider === provider) + .reduce((max, k) => Math.max(max, k.priority), 0); + + const entry: StoredKey = { + id: crypto.randomUUID(), + provider, + label: label || `${provider}-key-${store.keys.filter(k => k.provider === provider).length + 1}`, + key, + priority: priority ?? maxPriority + 1, + source: 'manual', + addedAt: new Date().toISOString(), + }; + + store.keys.push(entry); + writeStore(store); + logger.info('Key added', { provider, label: entry.label, module: 'key-store' }); + return entry; +} + +export function removeKey(id: string): boolean { + const store = readStore(); + const idx = store.keys.findIndex(k => k.id === id); + if (idx === -1) return false; + const removed = store.keys.splice(idx, 1)[0]; + writeStore(store); + logger.info('Key removed', { provider: removed.provider, label: removed.label, module: 'key-store' }); + return true; +} + +export function getKeysForProvider(provider: string): StoredKey[] { + const store = readStore(); + return store.keys + .filter(k => k.provider === provider) + .sort((a, b) => a.priority - b.priority); +} + +export function getAllKeys(): StoredKey[] { + return readStore().keys.sort((a, b) => { + if (a.provider !== b.provider) return a.provider.localeCompare(b.provider); + return a.priority - b.priority; + }); +} + +export function updateKeyPriority(id: string, priority: number): boolean { + const store = readStore(); + const key = store.keys.find(k => k.id === id); + if (!key) return false; + key.priority = priority; + writeStore(store); + return true; +} + +export function maskKey(key: string): string { + if (key.length <= 8) return '****'; + return key.slice(0, 4) + '...' + key.slice(-4); +} + +export function seedFromEnv(provider: string, envVar: string): void { + const value = process.env[envVar]; + if (!value) return; + const store = readStore(); + const exists = store.keys.some(k => k.provider === provider && k.key === value); + if (exists) return; + + const entry: StoredKey = { + id: crypto.randomUUID(), + provider, + label: `${provider}-env`, + key: value, + priority: 999, + source: 'env', + addedAt: new Date().toISOString(), + }; + store.keys.push(entry); + writeStore(store); +} diff --git a/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts b/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts new file mode 100644 index 0000000..a692b5d --- /dev/null +++ b/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts @@ -0,0 +1,249 @@ +import { Response as ExpressResponse } from 'express'; +import { logger } from '../utils/logger.js'; +import { ProviderError } from '../../shared/errors/index.js'; +import { CONTENT_TYPES } from '../../domain/types/provider.js'; +import { getConfig } from '../config/app-config.js'; +import { ResponsesPassthrough, ResponsesPassthroughConfig } from './responses-passthrough.js'; +import { injectMemoryContext, persistMemory } from '../memory/memory-helper.js'; + +export class OllamaResponsesPassthrough implements ResponsesPassthrough { + constructor(private readonly config: ResponsesPassthroughConfig) {} + + private get baseUrl(): string { + if (this.config.baseUrl) { + return this.config.baseUrl; + } + const configBaseUrl = getConfig().providers.ollama.baseUrl; + return configBaseUrl.replace(/\/v1\/?$/, '/v1/responses'); + } + + private buildAuthHeader(): string { + const { auth } = this.config; + if (!auth) { + return ''; + } + + const envVar = auth.envVar; + if (envVar) { + const token = process.env[envVar]; + if (token) { + if (auth.scheme) { + return `${auth.scheme} ${token}`.trim(); + } + return token; + } + } + + return ''; + } + + private buildHeaders(): Record { + const headers: Record = { + 'Content-Type': 'application/json', + ...this.config.staticHeaders, + }; + + const headerName = this.config.auth?.header ?? 'Authorization'; + const authHeader = this.buildAuthHeader(); + if (authHeader) { + headers[headerName] = authHeader; + } + return headers; + } + + private usage: { + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; + } | null = null; + + private eventBuffer: string = ''; + private assistantResponseBuffer: string = ''; + + private async makeRequest(body: any, stream: boolean): Promise { + const response = await fetch(this.baseUrl, { + method: 'POST', + headers: this.buildHeaders(), + body: JSON.stringify({ ...body, stream, store: false }) + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new ProviderError('ollama', errorText || `HTTP ${response.status}`, response.status, { endpoint: this.baseUrl }); + } + + return response; + } + + private trackUsage(text: string, model: string, clientIp?: string): void { + try { + this.eventBuffer += text; + + const textDeltaMatch = /"type":"response\.text\.delta"[^}]*"text":"([^"]+)"/g; + let match; + while ((match = textDeltaMatch.exec(text)) !== null) { + this.assistantResponseBuffer += match[1]; + } + + if (this.eventBuffer.includes('"type":"response.completed"')) { + const startIndex = this.eventBuffer.indexOf('{"type":"response.completed"'); + if (startIndex === -1) return; + + let braceCount = 0; + let endIndex = -1; + + for (let i = startIndex; i < this.eventBuffer.length; i++) { + if (this.eventBuffer[i] === '{') braceCount++; + if (this.eventBuffer[i] === '}') braceCount--; + + if (braceCount === 0) { + endIndex = i; + break; + } + } + + if (endIndex === -1) return; + + const jsonString = this.eventBuffer.substring(startIndex, endIndex + 1); + + logger.debug('JSON response found', { provider: 'ollama', operation: 'response_parsing', module: 'ollama-responses-passthrough' }); + + try { + const data = JSON.parse(jsonString); + logger.debug('Response parsed successfully', { provider: 'ollama', operation: 'usage_extraction', module: 'ollama-responses-passthrough' }); + + if (data.response?.usage) { + const usage = data.response.usage; + const inputTokens = usage.input_tokens || 0; + const outputTokens = usage.output_tokens || 0; + const totalTokens = usage.total_tokens || (inputTokens + outputTokens); + + logger.debug('Usage tracking from response', { + provider: 'ollama', + model, + inputTokens, + outputTokens, + totalTokens, + module: 'ollama-responses-passthrough' + }); + + import('../utils/usage-tracker.js').then(({ usageTracker }) => { + usageTracker.trackUsage( + model, + 'ollama', + inputTokens, + outputTokens, + 0, + 0, + clientIp + ); + }).catch((error) => { + logger.error('Usage tracking failed', error, { provider: 'ollama', operation: 'passthrough', module: 'ollama-responses-passthrough' }); + }); + } else { + logger.warn('No usage data in response', { provider: 'ollama', operation: 'passthrough', module: 'ollama-responses-passthrough' }); + } + } catch (parseError) { + logger.error('JSON parse error', parseError, { provider: 'ollama', operation: 'response_parsing', module: 'ollama-responses-passthrough' }); + } + + this.eventBuffer = ''; + } + } catch (error) { + logger.error('Usage tracking failed', error, { provider: 'ollama', operation: 'passthrough', module: 'ollama-responses-passthrough' }); + } + } + + async handleDirectRequest(request: any, res: ExpressResponse, clientIp?: string): Promise { + this.usage = null; + this.eventBuffer = ''; + this.assistantResponseBuffer = ''; + + injectMemoryContext(request, { + provider: this.config.provider, + defaultUserId: 'default', + extractCurrentUserInputs: req => extractResponsesUserInputs(req), + applyMemoryContext: (req, context) => { + if (req.instructions) { + req.instructions = `${context}\n\n---\n\n${req.instructions}`; + } else { + req.instructions = context; + } + } + }); + + if (request.stream) { + const response = await this.makeRequest(request, true); + + res.writeHead(200, { + 'Content-Type': CONTENT_TYPES.EVENT_STREAM, + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'Access-Control-Allow-Origin': '*', + }); + + const reader = response.body!.getReader(); + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + const text = new TextDecoder().decode(value); + setImmediate(() => this.trackUsage(text, request.model, clientIp)); + + res.write(value); + } + res.end(); + + persistMemory(request, this.assistantResponseBuffer, { + provider: this.config.provider, + defaultUserId: 'default', + extractUserContent: req => req.input || '', + metadataBuilder: req => ({ + model: req.model, + provider: this.config.provider, + }), + }); + } else { + const response = await this.makeRequest(request, false); + const json = await response.json(); + + if (json.usage) { + const inputTokens = json.usage.input_tokens || 0; + const outputTokens = json.usage.output_tokens || 0; + const totalTokens = json.usage.total_tokens || (inputTokens + outputTokens); + + logger.debug('Tracking non-streaming usage', { + provider: 'ollama', + model: request.model, + inputTokens, + outputTokens, + totalTokens, + module: 'ollama-responses-passthrough' + }); + + import('../utils/usage-tracker.js').then(({ usageTracker }) => { + usageTracker.trackUsage(request.model, 'ollama', inputTokens, outputTokens, 0, 0, clientIp); + }).catch(() => {}); + } + + const assistantResponse = json?.output?.[0]?.content?.[0]?.text || json?.output_text || ''; + persistMemory(request, assistantResponse, { + provider: this.config.provider, + defaultUserId: 'default', + extractUserContent: req => req.input || '', + metadataBuilder: req => ({ + model: req.model, + provider: this.config.provider, + }), + }); + + res.json(json); + } + } +} + +function extractResponsesUserInputs(request: any): string[] { + const content = (request.input || '').trim(); + return content ? [content] : []; +}