From aea46652bc48c8b5f7df263da1a73e749691a6d2 Mon Sep 17 00:00:00 2001
From: shrijam12 <shrijambhale8@gmail.com>
Date: Sat, 7 Feb 2026 16:54:18 +0530
Subject: [PATCH 1/5] feat: Add Ollama provider support

- Add OllamaProvider class with OpenAI-compatible API support
- Register Ollama in ProviderRegistry with model selection rules
- Add Ollama configuration to AppConfig (baseUrl, apiKey, enabled)
- Add Ollama to chat_completions_providers_v1.json catalog with 16 popular models
- Add ollama.yaml pricing file (free/local models)
- Update ProviderName type to include 'ollama'
- Add OLLAMA_BASE_URL and OLLAMA_API_KEY to .env.example

Ollama runs models locally and exposes an OpenAI-compatible API at
http://localhost:11434/v1 by default. Users can configure a custom
base URL via OLLAMA_BASE_URL environment variable.
---
 .env.example                                  |   3 +
 gateway/src/costs/ollama.yaml                 |  59 ++++
 .../src/domain/providers/ollama-provider.ts   | 104 +++++++
 .../src/domain/services/provider-registry.ts  |   5 +-
 .../src/infrastructure/config/app-config.ts   | 275 +++++++++---------
 .../chat_completions_providers_v1.json        |  38 +++
 shared/types/types.ts                         |   2 +-
 7 files changed, 349 insertions(+), 137 deletions(-)
 create mode 100644 gateway/src/costs/ollama.yaml
 create mode 100644 gateway/src/domain/providers/ollama-provider.ts

diff --git a/.env.example b/.env.example
index 1b31994..9cd322e 100644
--- a/.env.example
+++ b/.env.example
@@ -4,6 +4,9 @@ XAI_API_KEY=your_key_here
 OPENROUTER_API_KEY=your_key_here
 ZAI_API_KEY=your_key_here
 GOOGLE_API_KEY=your_key_here
+# Ollama (local models — no API key needed, just set the base URL)
+# OLLAMA_BASE_URL=http://localhost:11434/v1
+# OLLAMA_API_KEY=               # optional, only if behind an auth proxy
 PORT=3001
 # Optional x402 passthrough for OpenRouter access
 X402_BASE_URL=x402_supported_provider_url
diff --git a/gateway/src/costs/ollama.yaml b/gateway/src/costs/ollama.yaml
new file mode 100644
index 0000000..2d7bb29
--- /dev/null
+++ b/gateway/src/costs/ollama.yaml
@@ -0,0 +1,59 @@
+provider: "ollama"
+currency: "USD"
+unit: "MTok"
+models:
+  # Ollama runs models locally — all costs are zero.
+  # Users may add custom model entries here if needed.
+  llama3.3:
+    input: 0.00
+    output: 0.00
+  llama3.2:
+    input: 0.00
+    output: 0.00
+  llama3.1:
+    input: 0.00
+    output: 0.00
+  llama3:
+    input: 0.00
+    output: 0.00
+  gemma3:
+    input: 0.00
+    output: 0.00
+  gemma2:
+    input: 0.00
+    output: 0.00
+  qwen3:
+    input: 0.00
+    output: 0.00
+  qwen2.5-coder:
+    input: 0.00
+    output: 0.00
+  deepseek-r1:
+    input: 0.00
+    output: 0.00
+  deepseek-coder-v2:
+    input: 0.00
+    output: 0.00
+  phi4:
+    input: 0.00
+    output: 0.00
+  phi3:
+    input: 0.00
+    output: 0.00
+  mistral:
+    input: 0.00
+    output: 0.00
+  mixtral:
+    input: 0.00
+    output: 0.00
+  codellama:
+    input: 0.00
+    output: 0.00
+  starcoder2:
+    input: 0.00
+    output: 0.00
+metadata:
+  last_updated: "2026-02-03"
+  source: "https://ollama.com"
+  notes: "Ollama runs models locally. All API costs are zero — hardware costs are borne by the user."
+  version: "1.0"
diff --git a/gateway/src/domain/providers/ollama-provider.ts b/gateway/src/domain/providers/ollama-provider.ts
new file mode 100644
index 0000000..23df2cb
--- /dev/null
+++ b/gateway/src/domain/providers/ollama-provider.ts
@@ -0,0 +1,104 @@
+import { BaseProvider } from './base-provider.js';
+import { CanonicalRequest, CanonicalResponse } from 'shared/types/index.js';
+import { getConfig } from '../../infrastructure/config/app-config.js';
+
+interface OllamaRequest {
+  model: string;
+  messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string; }>;
+  max_tokens?: number;
+  temperature?: number;
+  stream?: boolean;
+  stop?: string | string[];
+}
+
+interface OllamaResponse {
+  id: string;
+  object: string;
+  created: number;
+  model: string;
+  choices: Array<{
+    index: number;
+    message: { role: string; content: string; };
+    finish_reason: string;
+  }>;
+  usage: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  };
+}
+
+export class OllamaProvider extends BaseProvider {
+  readonly name = 'ollama';
+
+  // Ollama exposes an OpenAI-compatible API at /v1
+  protected get baseUrl(): string {
+    return getConfig().providers.ollama.baseUrl;
+  }
+
+  // Ollama doesn't require an API key by default, but we return a
+  // dummy value so BaseProvider.isConfigured() stays true when the
+  // base URL is set.  Users can optionally supply a real key if they
+  // put Ollama behind an auth proxy.
+  protected get apiKey(): string | undefined {
+    return getConfig().providers.ollama.apiKey || 'ollama';
+  }
+
+  /**
+   * Ollama is considered configured when the user has explicitly
+   * enabled it by setting OLLAMA_BASE_URL (even without an API key).
+   */
+  isConfigured(): boolean {
+    return getConfig().providers.ollama.enabled;
+  }
+
+  protected transformRequest(request: CanonicalRequest): OllamaRequest {
+    const messages = request.messages.map(msg => ({
+      role: msg.role,
+      content: msg.content
+        .filter(c => c.type === 'text')
+        .map(c => c.text)
+        .join('')
+    }));
+
+    return {
+      model: request.model,
+      messages,
+      max_tokens: request.maxTokens,
+      temperature: request.temperature,
+      stream: request.stream || false,
+      stop: request.stopSequences
+    };
+  }
+
+  protected transformResponse(response: OllamaResponse): CanonicalResponse {
+    const choice = response.choices[0];
+
+    return {
+      id: response.id,
+      model: response.model,
+      created: response.created,
+      message: {
+        role: 'assistant',
+        content: [{
+          type: 'text',
+          text: choice.message.content
+        }]
+      },
+      finishReason: this.mapFinishReason(choice.finish_reason),
+      usage: {
+        inputTokens: response.usage?.prompt_tokens ?? 0,
+        outputTokens: response.usage?.completion_tokens ?? 0,
+        totalTokens: response.usage?.total_tokens ?? 0
+      }
+    };
+  }
+
+  private mapFinishReason(reason: string): 'stop' | 'length' | 'tool_calls' | 'error' {
+    switch (reason) {
+      case 'stop': return 'stop';
+      case 'length': return 'length';
+      default: return 'stop';
+    }
+  }
+}
diff --git a/gateway/src/domain/services/provider-registry.ts b/gateway/src/domain/services/provider-registry.ts
index 3a2486f..e2a227e 100644
--- a/gateway/src/domain/services/provider-registry.ts
+++ b/gateway/src/domain/services/provider-registry.ts
@@ -5,6 +5,7 @@ import { OpenRouterProvider } from '../providers/openrouter-provider.js';
 import { XAIProvider } from '../providers/xai-provider.js';
 import { ZAIProvider } from '../providers/zai-provider.js';
 import { GoogleProvider } from '../providers/google-provider.js';
+import { OllamaProvider } from '../providers/ollama-provider.js';
 
 export enum Provider {
   ANTHROPIC = 'anthropic',
@@ -12,7 +13,8 @@ export enum Provider {
   OPENROUTER = 'openrouter',
   XAI = 'xAI',
   ZAI = 'zai',
-  GOOGLE = 'google'
+  GOOGLE = 'google',
+  OLLAMA = 'ollama'
 }
 
 export interface ProviderSelectionRule {
@@ -83,6 +85,7 @@ export function createDefaultProviderRegistry(): ProviderRegistry {
     { id: Provider.XAI, create: () => new XAIProvider(), selectionRules: [{ match: model => model.includes('grok-') || model.includes('grok_beta') }] },
     { id: Provider.ZAI, create: () => new ZAIProvider() },
     { id: Provider.GOOGLE, create: () => new GoogleProvider(), selectionRules: [{ match: model => model.toLowerCase().includes('gemini') }] },
+    { id: Provider.OLLAMA, create: () => new OllamaProvider(), selectionRules: [{ match: model => model.startsWith('ollama/') }] },
   ];
 
   return new ProviderRegistry(plugins);
diff --git a/gateway/src/infrastructure/config/app-config.ts b/gateway/src/infrastructure/config/app-config.ts
index f52f4f9..c256a1c 100644
--- a/gateway/src/infrastructure/config/app-config.ts
+++ b/gateway/src/infrastructure/config/app-config.ts
@@ -1,43 +1,43 @@
-/**
- * Centralized application configuration
- * All environment variables are validated and accessed through this class
- */
-
-export class AppConfig {
-  // Server configuration
-  readonly server = {
-    port: this.getNumber('PORT', 3001),
-    environment: this.getString('NODE_ENV', 'development'),
-    isDevelopment: this.getString('NODE_ENV', 'development') === 'development',
-    isProduction: this.getString('NODE_ENV', 'development') === 'production',
-    version: this.getOptionalString('npm_package_version') || 'dev',
-  };
-
-  // x402 Payment configuration
-  readonly x402 = {
-    enabled: this.has('PRIVATE_KEY'),
-    privateKey: this.getOptionalString('PRIVATE_KEY'),
-    baseUrl: this.getString('X402_BASE_URL', 'https://x402.ekailabs.xyz'),
-    
-    // Helper methods
-    get chatCompletionsUrl() {
-      return `${this.baseUrl}/v1/chat/completions`;
-    },
-    get messagesUrl() {
-      return `${this.baseUrl}/v1/messages`;
-    },
-  };
-
-  // Provider API Keys
-  readonly providers = {
-    anthropic: {
-      apiKey: this.getOptionalString('ANTHROPIC_API_KEY'),
-      enabled: this.has('ANTHROPIC_API_KEY'),
-    },
-    openai: {
-      apiKey: this.getOptionalString('OPENAI_API_KEY'),
-      enabled: this.has('OPENAI_API_KEY'),
-    },
+/**
+ * Centralized application configuration
+ * All environment variables are validated and accessed through this class
+ */
+
+export class AppConfig {
+  // Server configuration
+  readonly server = {
+    port: this.getNumber('PORT', 3001),
+    environment: this.getString('NODE_ENV', 'development'),
+    isDevelopment: this.getString('NODE_ENV', 'development') === 'development',
+    isProduction: this.getString('NODE_ENV', 'development') === 'production',
+    version: this.getOptionalString('npm_package_version') || 'dev',
+  };
+
+  // x402 Payment configuration
+  readonly x402 = {
+    enabled: this.has('PRIVATE_KEY'),
+    privateKey: this.getOptionalString('PRIVATE_KEY'),
+    baseUrl: this.getString('X402_BASE_URL', 'https://x402.ekailabs.xyz'),
+    
+    // Helper methods
+    get chatCompletionsUrl() {
+      return `${this.baseUrl}/v1/chat/completions`;
+    },
+    get messagesUrl() {
+      return `${this.baseUrl}/v1/messages`;
+    },
+  };
+
+  // Provider API Keys
+  readonly providers = {
+    anthropic: {
+      apiKey: this.getOptionalString('ANTHROPIC_API_KEY'),
+      enabled: this.has('ANTHROPIC_API_KEY'),
+    },
+    openai: {
+      apiKey: this.getOptionalString('OPENAI_API_KEY'),
+      enabled: this.has('OPENAI_API_KEY'),
+    },
     openrouter: {
       apiKey: this.getOptionalString('OPENROUTER_API_KEY'),
       enabled: this.has('OPENROUTER_API_KEY'),
@@ -54,21 +54,26 @@ export class AppConfig {
       apiKey: this.getOptionalString('GOOGLE_API_KEY'),
       enabled: this.has('GOOGLE_API_KEY'),
     },
+    ollama: {
+      baseUrl: this.getString('OLLAMA_BASE_URL', 'http://localhost:11434/v1'),
+      apiKey: this.getOptionalString('OLLAMA_API_KEY'),
+      enabled: this.has('OLLAMA_BASE_URL'),
+    },
+  };
+
+  // Telemetry configuration
+  readonly telemetry = {
+    enabled: this.getBoolean('ENABLE_TELEMETRY', true),
+    endpoint: this.getOptionalString('TELEMETRY_ENDPOINT'),
+  };
+
+  // OpenRouter-specific configuration
+  readonly openrouter = {
+    skipPricingRefresh: this.getBoolean('SKIP_OPENROUTER_PRICING_REFRESH', false),
+    pricingTimeoutMs: this.getNumber('OPENROUTER_PRICING_TIMEOUT_MS', 4000),
+    pricingRetries: this.getNumber('OPENROUTER_PRICING_RETRIES', 2),
   };
-
-  // Telemetry configuration
-  readonly telemetry = {
-    enabled: this.getBoolean('ENABLE_TELEMETRY', true),
-    endpoint: this.getOptionalString('TELEMETRY_ENDPOINT'),
-  };
-
-  // OpenRouter-specific configuration
-  readonly openrouter = {
-    skipPricingRefresh: this.getBoolean('SKIP_OPENROUTER_PRICING_REFRESH', false),
-    pricingTimeoutMs: this.getNumber('OPENROUTER_PRICING_TIMEOUT_MS', 4000),
-    pricingRetries: this.getNumber('OPENROUTER_PRICING_RETRIES', 2),
-  };
-
+
   // Feature flags
   readonly features = {
     usageTracking: this.getBoolean('ENABLE_USAGE_TRACKING', true),
@@ -79,84 +84,84 @@ export class AppConfig {
     backend: this.getString('MEMORY_BACKEND', 'file'),
     maxItems: this.getNumber('MEMORY_MAX_ITEMS', 20),
   } as const;
-
-  // Helper methods
-  private has(key: string): boolean {
-    return !!process.env[key];
-  }
-
-  private getString(key: string, defaultValue: string): string;
-  private getString(key: string): string;
-  private getString(key: string, defaultValue?: string): string {
-    const value = process.env[key] || defaultValue;
-    if (value === undefined) {
-      throw new Error(`Missing required environment variable: ${key}`);
-    }
-    return value;
-  }
-
-  private getOptionalString(key: string): string | undefined {
-    return process.env[key];
-  }
-
-  private getNumber(key: string, defaultValue: number): number {
-    const value = process.env[key];
-    if (!value) return defaultValue;
-    const num = parseInt(value, 10);
-    if (isNaN(num)) {
-      throw new Error(`Invalid number for environment variable ${key}: ${value}`);
-    }
-    return num;
-  }
-
-  private getBoolean(key: string, defaultValue: boolean): boolean {
-    const value = process.env[key];
-    if (!value) return defaultValue;
-    return value.toLowerCase() === 'true' || value === '1';
-  }
-
-  /**
-   * Validate that at least one authentication method is configured
-   */
-  validate(): void {
-    const hasApiKeys = Object.values(this.providers).some(p => p.enabled);
-    const hasX402 = this.x402.enabled;
-
-    if (!hasApiKeys && !hasX402) {
-      throw new Error(
-        'No authentication configured. Set either:\n' +
-        '  1. At least one provider API key (ANTHROPIC_API_KEY, OPENAI_API_KEY, etc.)\n' +
-        '  2. PRIVATE_KEY for x402 payment mode'
-      );
-    }
-  }
-
-  /**
-   * Get human-readable mode description
-   */
-  getMode(): 'x402-only' | 'hybrid' | 'byok' {
-    const hasApiKeys = Object.values(this.providers).some(p => p.enabled);
-    const hasX402 = this.x402.enabled;
-
-    if (!hasApiKeys && hasX402) return 'x402-only';
-    if (hasApiKeys && hasX402) return 'hybrid';
-    return 'byok';
-  }
-}
-
-// Singleton instance
-let configInstance: AppConfig | null = null;
-
-export function getConfig(): AppConfig {
-  if (!configInstance) {
-    configInstance = new AppConfig();
-    configInstance.validate();
-  }
-  return configInstance;
-}
-
-// For testing - reset config
-export function resetConfig(): void {
-  configInstance = null;
-}
-
+
+  // Helper methods
+  private has(key: string): boolean {
+    return !!process.env[key];
+  }
+
+  private getString(key: string, defaultValue: string): string;
+  private getString(key: string): string;
+  private getString(key: string, defaultValue?: string): string {
+    const value = process.env[key] || defaultValue;
+    if (value === undefined) {
+      throw new Error(`Missing required environment variable: ${key}`);
+    }
+    return value;
+  }
+
+  private getOptionalString(key: string): string | undefined {
+    return process.env[key];
+  }
+
+  private getNumber(key: string, defaultValue: number): number {
+    const value = process.env[key];
+    if (!value) return defaultValue;
+    const num = parseInt(value, 10);
+    if (isNaN(num)) {
+      throw new Error(`Invalid number for environment variable ${key}: ${value}`);
+    }
+    return num;
+  }
+
+  private getBoolean(key: string, defaultValue: boolean): boolean {
+    const value = process.env[key];
+    if (!value) return defaultValue;
+    return value.toLowerCase() === 'true' || value === '1';
+  }
+
+  /**
+   * Validate that at least one authentication method is configured
+   */
+  validate(): void {
+    const hasApiKeys = Object.values(this.providers).some(p => p.enabled);
+    const hasX402 = this.x402.enabled;
+
+    if (!hasApiKeys && !hasX402) {
+      throw new Error(
+        'No authentication configured. Set either:\n' +
+        '  1. At least one provider API key (ANTHROPIC_API_KEY, OPENAI_API_KEY, etc.)\n' +
+        '  2. PRIVATE_KEY for x402 payment mode'
+      );
+    }
+  }
+
+  /**
+   * Get human-readable mode description
+   */
+  getMode(): 'x402-only' | 'hybrid' | 'byok' {
+    const hasApiKeys = Object.values(this.providers).some(p => p.enabled);
+    const hasX402 = this.x402.enabled;
+
+    if (!hasApiKeys && hasX402) return 'x402-only';
+    if (hasApiKeys && hasX402) return 'hybrid';
+    return 'byok';
+  }
+}
+
+// Singleton instance
+let configInstance: AppConfig | null = null;
+
+export function getConfig(): AppConfig {
+  if (!configInstance) {
+    configInstance = new AppConfig();
+    configInstance.validate();
+  }
+  return configInstance;
+}
+
+// For testing - reset config
+export function resetConfig(): void {
+  configInstance = null;
+}
+
diff --git a/model_catalog/chat_completions_providers_v1.json b/model_catalog/chat_completions_providers_v1.json
index 8b260cf..b31f2fb 100644
--- a/model_catalog/chat_completions_providers_v1.json
+++ b/model_catalog/chat_completions_providers_v1.json
@@ -156,6 +156,44 @@
           "format": "openai_chat"
         }
       }
+    },
+    {
+      "provider": "ollama",
+      "models": [
+        "llama3.3",
+        "llama3.2",
+        "llama3.1",
+        "llama3",
+        "gemma3",
+        "gemma2",
+        "qwen3",
+        "qwen2.5-coder",
+        "deepseek-r1",
+        "deepseek-coder-v2",
+        "phi4",
+        "phi3",
+        "mistral",
+        "mixtral",
+        "codellama",
+        "starcoder2"
+      ],
+      "chat_completions": {
+        "base_url": "http://localhost:11434/v1/chat/completions",
+        "auth": {
+          "env_var": "OLLAMA_API_KEY",
+          "header": "Authorization",
+          "scheme": "Bearer"
+        },
+        "supported_client_formats": ["openai"],
+        "payload_defaults": {
+          "stream_options": {
+            "include_usage": true
+          }
+        },
+        "usage": {
+          "format": "openai_chat"
+        }
+      }
     }
   ]
 }
diff --git a/shared/types/types.ts b/shared/types/types.ts
index 94985ff..c98df8f 100644
--- a/shared/types/types.ts
+++ b/shared/types/types.ts
@@ -31,7 +31,7 @@ export interface ChatCompletionResponse {
 }
 
 
-export type ProviderName = 'openai' | 'openrouter' | 'anthropic';
+export type ProviderName = 'openai' | 'openrouter' | 'anthropic' | 'ollama';
 
 // Removed conversation types - no conversation storage
 

From 90be7c12ea651d3521e3d3d36ad8f72827ff05d3 Mon Sep 17 00:00:00 2001
From: shrijam12 <shrijambhale8@gmail.com>
Date: Mon, 9 Feb 2026 22:11:12 +0530
Subject: [PATCH 2/5] feat: Add Responses API support for Ollama

- Added Ollama to responses_providers_v1.json catalog
- Created OllamaResponsesPassthrough class implementing Responses API
- Registered Ollama in responses-passthrough-registry.ts

Ollama supports the OpenResponses API specification at /v1/responses endpoint,
providing future-proof support as /chat/completions may be deprecated.
---
 .../ollama-responses-passthrough.ts           | 284 ++++++++++++++++++
 .../responses-passthrough-registry.ts         |   2 +
 model_catalog/responses_providers_v1.json     |  30 ++
 3 files changed, 316 insertions(+)
 create mode 100644 gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts

diff --git a/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts b/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts
new file mode 100644
index 0000000..6acf733
--- /dev/null
+++ b/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts
@@ -0,0 +1,284 @@
+import { Response as ExpressResponse } from 'express';
+import { logger } from '../utils/logger.js';
+import { AuthenticationError, ProviderError } from '../../shared/errors/index.js';
+import { CONTENT_TYPES } from '../../domain/types/provider.js';
+import { getConfig } from '../config/app-config.js';
+import { ResponsesPassthrough, ResponsesPassthroughConfig } from './responses-passthrough.js';
+import { injectMemoryContext, persistMemory } from '../memory/memory-helper.js';
+
+export class OllamaResponsesPassthrough implements ResponsesPassthrough {
+  constructor(private readonly config: ResponsesPassthroughConfig) {}
+
+  private get baseUrl(): string {
+    return this.config.baseUrl;
+  }
+
+  private get apiKey(): string | undefined {
+    const envVar = this.config.auth?.envVar;
+    if (envVar) {
+      const token = process.env[envVar];
+      if (token) return token;
+    }
+
+    // Ollama doesn't require an API key by default (runs locally)
+    // Return undefined if no key is configured
+    return getConfig().providers.ollama.apiKey;
+  }
+
+  private buildAuthHeader(): string | undefined {
+    const token = this.apiKey;
+    if (!token) return undefined; // Ollama doesn't require auth by default
+
+    const { auth } = this.config;
+    if (!auth) {
+      return `Bearer ${token}`;
+    }
+
+    if (auth.template) {
+      return auth.template.replace('{{token}}', token);
+    }
+
+    if (auth.scheme) {
+      return `${auth.scheme} ${token}`.trim();
+    }
+
+    return token;
+  }
+
+  private buildHeaders(): Record<string, string> {
+    const headers: Record<string, string> = {
+      'Content-Type': 'application/json',
+      ...this.config.staticHeaders,
+    };
+
+    const authHeader = this.buildAuthHeader();
+    if (authHeader) {
+      const headerName = this.config.auth?.header ?? 'Authorization';
+      headers[headerName] = authHeader;
+    }
+
+    return headers;
+  }
+
+  // Store usage data for tracking
+  private usage: {
+    inputTokens?: number;
+    outputTokens?: number;
+    totalTokens?: number;
+  } | null = null;
+
+  // Buffer to handle multi-chunk SSE events
+  private eventBuffer: string = '';
+  private assistantResponseBuffer: string = '';
+
+  private async makeRequest(body: any, stream: boolean): Promise<globalThis.Response> {
+    const response = await fetch(this.baseUrl, {
+      method: 'POST',
+      headers: this.buildHeaders(),
+      body: JSON.stringify({ ...body, stream, store: false }) // Not storing responses
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      throw new ProviderError('ollama', errorText || `HTTP ${response.status}`, response.status, { endpoint: this.baseUrl });
+    }
+
+    return response;
+  }
+
+  private trackUsage(text: string, model: string, clientIp?: string): void {
+    try {
+      // Add to buffer to handle multi-chunk events
+      this.eventBuffer += text;
+      
+      // Extract assistant response content from text.delta events
+      const textDeltaMatch = /"type":"response\.text\.delta"[^}]*"text":"([^"]+)"/g;
+      let match;
+      while ((match = textDeltaMatch.exec(text)) !== null) {
+        this.assistantResponseBuffer += match[1];
+      }
+      
+      // Look for the exact response.completed event
+      if (this.eventBuffer.includes('"type":"response.completed"')) {
+        
+        // Find the start of the JSON object
+        const startIndex = this.eventBuffer.indexOf('{"type":"response.completed"');
+        if (startIndex === -1) return;
+        
+        // Find the end by counting braces
+        let braceCount = 0;
+        let endIndex = -1;
+        
+        for (let i = startIndex; i < this.eventBuffer.length; i++) {
+          if (this.eventBuffer[i] === '{') braceCount++;
+          if (this.eventBuffer[i] === '}') braceCount--;
+          
+          if (braceCount === 0) {
+            endIndex = i;
+            break;
+          }
+        }
+        
+        if (endIndex === -1) return; // Incomplete JSON, wait for more chunks
+        
+        // Extract the complete JSON
+        const jsonString = this.eventBuffer.substring(startIndex, endIndex + 1);
+        
+        logger.debug('JSON response found', { provider: 'ollama', operation: 'response_parsing', module: 'ollama-responses-passthrough' });
+        
+        try {
+          const data = JSON.parse(jsonString);
+          logger.debug('Response parsed successfully', { provider: 'ollama', operation: 'usage_extraction', module: 'ollama-responses-passthrough' });
+          
+          // Extract usage data from response.usage
+          if (data.response?.usage) {
+            const usage = data.response.usage;
+            const totalInputTokens = usage.input_tokens || 0;
+            const cachedTokens = usage.input_tokens_details?.cached_tokens || 0;
+            const nonCachedInputTokens = totalInputTokens - cachedTokens;
+            const outputTokens = usage.output_tokens || 0;
+            const totalTokens = usage.total_tokens || (totalInputTokens + outputTokens);
+            const reasoningTokens = usage.output_tokens_details?.reasoning_tokens || 0;
+
+            logger.debug('Usage tracking from response', {
+              provider: 'ollama',
+              model,
+              totalInputTokens,
+              nonCachedInputTokens,
+              cachedTokens,
+              outputTokens,
+              totalTokens,
+              reasoningTokens,
+              module: 'ollama-responses-passthrough'
+            });
+
+            import('../utils/usage-tracker.js').then(({ usageTracker }) => {
+              usageTracker.trackUsage(
+                model,
+                'ollama',
+                nonCachedInputTokens,
+                outputTokens,
+                cachedTokens,
+                0, // cache read tokens
+                clientIp
+              );
+            }).catch((error) => {
+              logger.error('Usage tracking failed', error, { provider: 'ollama', operation: 'passthrough', module: 'ollama-responses-passthrough' });
+            });
+          } else {
+            logger.warn('No usage data in response', { provider: 'ollama', operation: 'passthrough', module: 'ollama-responses-passthrough' });
+          }
+        } catch (parseError) {
+          logger.error('JSON parse error', parseError, { provider: 'ollama', operation: 'response_parsing', module: 'ollama-responses-passthrough' });
+        }
+        
+        // Clear buffer after processing
+        this.eventBuffer = '';
+      }
+    } catch (error) {
+      logger.error('Usage tracking failed', error, { provider: 'ollama', operation: 'passthrough', module: 'ollama-responses-passthrough' });
+    }
+  }
+
+  async handleDirectRequest(request: any, res: ExpressResponse, clientIp?: string): Promise<void> {
+    // Reset usage tracking for new request
+    this.usage = null;
+    this.eventBuffer = '';
+    this.assistantResponseBuffer = '';
+
+    injectMemoryContext(request, {
+      provider: this.config.provider,
+      defaultUserId: 'default',
+      extractCurrentUserInputs: req => extractResponsesUserInputs(req),
+      applyMemoryContext: (req, context) => {
+        if (req.instructions) {
+          req.instructions = `${context}\n\n---\n\n${req.instructions}`;
+        } else {
+          req.instructions = context;
+        }
+      }
+    });
+
+    if (request.stream) {
+      const response = await this.makeRequest(request, true);
+
+      res.writeHead(200, {
+        'Content-Type': CONTENT_TYPES.EVENT_STREAM,
+        'Cache-Control': 'no-cache',
+        'Connection': 'keep-alive',
+        'Access-Control-Allow-Origin': '*',
+      });
+
+      // Manual stream processing for usage tracking
+      const reader = response.body!.getReader();
+      
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        
+        const text = new TextDecoder().decode(value);
+        setImmediate(() => this.trackUsage(text, request.model, clientIp));
+        
+        res.write(value);
+      }
+      res.end();
+
+      persistMemory(request, this.assistantResponseBuffer, {
+        provider: this.config.provider,
+        defaultUserId: 'default',
+        extractUserContent: req => req.input || '',
+        metadataBuilder: req => ({
+          model: req.model,
+          provider: this.config.provider,
+        }),
+      });
+    } else {
+      const response = await this.makeRequest(request, false);
+      const json = await response.json();
+
+      // Track usage for non-streaming requests
+      if (json.usage) {
+        const totalInputTokens = json.usage.input_tokens || 0;
+        const cachedTokens = json.usage.input_tokens_details?.cached_tokens || 0;
+        const nonCachedInputTokens = totalInputTokens - cachedTokens;
+        const outputTokens = json.usage.output_tokens || 0;
+        const totalTokens = json.usage.total_tokens || (totalInputTokens + outputTokens);
+        const reasoningTokens = json.usage.output_tokens_details?.reasoning_tokens || 0;
+
+        logger.debug('Tracking non-streaming usage', {
+          provider: 'ollama',
+          model: request.model,
+          totalInputTokens,
+          nonCachedInputTokens,
+          cachedTokens,
+          outputTokens,
+          totalTokens,
+          reasoningTokens,
+          module: 'ollama-responses-passthrough'
+        });
+
+        import('../utils/usage-tracker.js').then(({ usageTracker }) => {
+          usageTracker.trackUsage(request.model, 'ollama', nonCachedInputTokens, outputTokens, cachedTokens, 0, clientIp);
+        }).catch(() => {});
+      }
+
+      const assistantResponse = json?.output?.[0]?.content?.[0]?.text || '';
+      persistMemory(request, assistantResponse, {
+        provider: this.config.provider,
+        defaultUserId: 'default',
+        extractUserContent: req => req.input || '',
+        metadataBuilder: req => ({
+          model: req.model,
+          provider: this.config.provider,
+        }),
+      });
+
+      res.json(json);
+    }
+  }
+}
+
+function extractResponsesUserInputs(request: any): string[] {
+  const content = (request.input || '').trim();
+  return content ? [content] : [];
+}
diff --git a/gateway/src/infrastructure/passthrough/responses-passthrough-registry.ts b/gateway/src/infrastructure/passthrough/responses-passthrough-registry.ts
index 6d38f0e..d67bc09 100644
--- a/gateway/src/infrastructure/passthrough/responses-passthrough-registry.ts
+++ b/gateway/src/infrastructure/passthrough/responses-passthrough-registry.ts
@@ -1,5 +1,6 @@
 import { ResponsesPassthrough, ResponsesPassthroughConfig } from './responses-passthrough.js';
 import { OpenAIResponsesPassthrough } from './openai-responses-passthrough.js';
+import { OllamaResponsesPassthrough } from './ollama-responses-passthrough.js';
 import { loadResponsesProviderDefinitions, ResponsesProviderDefinition } from './responses-provider-config.js';
 import { logger } from '../utils/logger.js';
 
@@ -10,6 +11,7 @@ interface ProviderEntry {
 
 const passthroughFactories: Record<string, (config: ResponsesPassthroughConfig) => ResponsesPassthrough> = {
   openai: (config) => new OpenAIResponsesPassthrough(config),
+  ollama: (config) => new OllamaResponsesPassthrough(config),
 };
 
 export class ResponsesPassthroughRegistry {
diff --git a/model_catalog/responses_providers_v1.json b/model_catalog/responses_providers_v1.json
index 71086a7..472f6b8 100644
--- a/model_catalog/responses_providers_v1.json
+++ b/model_catalog/responses_providers_v1.json
@@ -15,6 +15,36 @@
         },
         "supported_client_formats": ["openai_responses"]
       }
+    },
+    {
+      "provider": "ollama",
+      "models": [
+        "llama3.3",
+        "llama3.2",
+        "llama3.1",
+        "llama3",
+        "gemma3",
+        "gemma2",
+        "qwen3",
+        "qwen2.5-coder",
+        "deepseek-r1",
+        "deepseek-coder-v2",
+        "phi4",
+        "phi3",
+        "mistral",
+        "mixtral",
+        "codellama",
+        "starcoder2"
+      ],
+      "responses": {
+        "base_url": "http://localhost:11434/v1/responses",
+        "auth": {
+          "env_var": "OLLAMA_API_KEY",
+          "header": "Authorization",
+          "scheme": "Bearer"
+        },
+        "supported_client_formats": ["openai_responses"]
+      }
     }
   ]
 }

From b22d91d267ac12a3f09aa5f204f14e45019f6ece Mon Sep 17 00:00:00 2001
From: shrijam12 <shrijambhale8@gmail.com>
Date: Mon, 9 Feb 2026 22:32:29 +0530
Subject: [PATCH 3/5] refactor: Remove comments from Ollama provider files

---
 .../src/domain/providers/ollama-provider.ts   |  9 --
 .../ollama-responses-passthrough.ts           | 97 ++++++-------------
 2 files changed, 31 insertions(+), 75 deletions(-)

diff --git a/gateway/src/domain/providers/ollama-provider.ts b/gateway/src/domain/providers/ollama-provider.ts
index 23df2cb..ee43e6d 100644
--- a/gateway/src/domain/providers/ollama-provider.ts
+++ b/gateway/src/domain/providers/ollama-provider.ts
@@ -31,23 +31,14 @@ interface OllamaResponse {
 export class OllamaProvider extends BaseProvider {
   readonly name = 'ollama';
 
-  // Ollama exposes an OpenAI-compatible API at /v1
   protected get baseUrl(): string {
     return getConfig().providers.ollama.baseUrl;
   }
 
-  // Ollama doesn't require an API key by default, but we return a
-  // dummy value so BaseProvider.isConfigured() stays true when the
-  // base URL is set.  Users can optionally supply a real key if they
-  // put Ollama behind an auth proxy.
   protected get apiKey(): string | undefined {
     return getConfig().providers.ollama.apiKey || 'ollama';
   }
 
-  /**
-   * Ollama is considered configured when the user has explicitly
-   * enabled it by setting OLLAMA_BASE_URL (even without an API key).
-   */
   isConfigured(): boolean {
     return getConfig().providers.ollama.enabled;
   }
diff --git a/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts b/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts
index 6acf733..a692b5d 100644
--- a/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts
+++ b/gateway/src/infrastructure/passthrough/ollama-responses-passthrough.ts
@@ -1,6 +1,6 @@
 import { Response as ExpressResponse } from 'express';
 import { logger } from '../utils/logger.js';
-import { AuthenticationError, ProviderError } from '../../shared/errors/index.js';
+import { ProviderError } from '../../shared/errors/index.js';
 import { CONTENT_TYPES } from '../../domain/types/provider.js';
 import { getConfig } from '../config/app-config.js';
 import { ResponsesPassthrough, ResponsesPassthroughConfig } from './responses-passthrough.js';
@@ -10,39 +10,31 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough {
   constructor(private readonly config: ResponsesPassthroughConfig) {}
 
   private get baseUrl(): string {
-    return this.config.baseUrl;
-  }
-
-  private get apiKey(): string | undefined {
-    const envVar = this.config.auth?.envVar;
-    if (envVar) {
-      const token = process.env[envVar];
-      if (token) return token;
+    if (this.config.baseUrl) {
+      return this.config.baseUrl;
     }
-
-    // Ollama doesn't require an API key by default (runs locally)
-    // Return undefined if no key is configured
-    return getConfig().providers.ollama.apiKey;
+    const configBaseUrl = getConfig().providers.ollama.baseUrl;
+    return configBaseUrl.replace(/\/v1\/?$/, '/v1/responses');
   }
 
-  private buildAuthHeader(): string | undefined {
-    const token = this.apiKey;
-    if (!token) return undefined; // Ollama doesn't require auth by default
-
+  private buildAuthHeader(): string {
     const { auth } = this.config;
     if (!auth) {
-      return `Bearer ${token}`;
-    }
-
-    if (auth.template) {
-      return auth.template.replace('{{token}}', token);
+      return '';
     }
 
-    if (auth.scheme) {
-      return `${auth.scheme} ${token}`.trim();
+    const envVar = auth.envVar;
+    if (envVar) {
+      const token = process.env[envVar];
+      if (token) {
+        if (auth.scheme) {
+          return `${auth.scheme} ${token}`.trim();
+        }
+        return token;
+      }
     }
 
-    return token;
+    return '';
   }
 
   private buildHeaders(): Record<string, string> {
@@ -51,23 +43,20 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough {
       ...this.config.staticHeaders,
     };
 
+    const headerName = this.config.auth?.header ?? 'Authorization';
     const authHeader = this.buildAuthHeader();
     if (authHeader) {
-      const headerName = this.config.auth?.header ?? 'Authorization';
       headers[headerName] = authHeader;
     }
-
     return headers;
   }
 
-  // Store usage data for tracking
   private usage: {
     inputTokens?: number;
     outputTokens?: number;
     totalTokens?: number;
   } | null = null;
 
-  // Buffer to handle multi-chunk SSE events
   private eventBuffer: string = '';
   private assistantResponseBuffer: string = '';
 
@@ -75,7 +64,7 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough {
     const response = await fetch(this.baseUrl, {
       method: 'POST',
       headers: this.buildHeaders(),
-      body: JSON.stringify({ ...body, stream, store: false }) // Not storing responses
+      body: JSON.stringify({ ...body, stream, store: false })
     });
 
     if (!response.ok) {
@@ -88,24 +77,18 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough {
 
   private trackUsage(text: string, model: string, clientIp?: string): void {
     try {
-      // Add to buffer to handle multi-chunk events
       this.eventBuffer += text;
       
-      // Extract assistant response content from text.delta events
       const textDeltaMatch = /"type":"response\.text\.delta"[^}]*"text":"([^"]+)"/g;
       let match;
       while ((match = textDeltaMatch.exec(text)) !== null) {
         this.assistantResponseBuffer += match[1];
       }
       
-      // Look for the exact response.completed event
       if (this.eventBuffer.includes('"type":"response.completed"')) {
-        
-        // Find the start of the JSON object
         const startIndex = this.eventBuffer.indexOf('{"type":"response.completed"');
         if (startIndex === -1) return;
         
-        // Find the end by counting braces
         let braceCount = 0;
         let endIndex = -1;
         
@@ -119,9 +102,8 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough {
           }
         }
         
-        if (endIndex === -1) return; // Incomplete JSON, wait for more chunks
+        if (endIndex === -1) return;
         
-        // Extract the complete JSON
         const jsonString = this.eventBuffer.substring(startIndex, endIndex + 1);
         
         logger.debug('JSON response found', { provider: 'ollama', operation: 'response_parsing', module: 'ollama-responses-passthrough' });
@@ -130,25 +112,18 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough {
           const data = JSON.parse(jsonString);
           logger.debug('Response parsed successfully', { provider: 'ollama', operation: 'usage_extraction', module: 'ollama-responses-passthrough' });
           
-          // Extract usage data from response.usage
           if (data.response?.usage) {
             const usage = data.response.usage;
-            const totalInputTokens = usage.input_tokens || 0;
-            const cachedTokens = usage.input_tokens_details?.cached_tokens || 0;
-            const nonCachedInputTokens = totalInputTokens - cachedTokens;
+            const inputTokens = usage.input_tokens || 0;
             const outputTokens = usage.output_tokens || 0;
-            const totalTokens = usage.total_tokens || (totalInputTokens + outputTokens);
-            const reasoningTokens = usage.output_tokens_details?.reasoning_tokens || 0;
+            const totalTokens = usage.total_tokens || (inputTokens + outputTokens);
 
             logger.debug('Usage tracking from response', {
               provider: 'ollama',
               model,
-              totalInputTokens,
-              nonCachedInputTokens,
-              cachedTokens,
+              inputTokens,
               outputTokens,
               totalTokens,
-              reasoningTokens,
               module: 'ollama-responses-passthrough'
             });
 
@@ -156,10 +131,10 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough {
               usageTracker.trackUsage(
                 model,
                 'ollama',
-                nonCachedInputTokens,
+                inputTokens,
                 outputTokens,
-                cachedTokens,
-                0, // cache read tokens
+                0,
+                0,
                 clientIp
               );
             }).catch((error) => {
@@ -172,7 +147,6 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough {
           logger.error('JSON parse error', parseError, { provider: 'ollama', operation: 'response_parsing', module: 'ollama-responses-passthrough' });
         }
         
-        // Clear buffer after processing
         this.eventBuffer = '';
       }
     } catch (error) {
@@ -181,7 +155,6 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough {
   }
 
   async handleDirectRequest(request: any, res: ExpressResponse, clientIp?: string): Promise<void> {
-    // Reset usage tracking for new request
     this.usage = null;
     this.eventBuffer = '';
     this.assistantResponseBuffer = '';
@@ -209,7 +182,6 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough {
         'Access-Control-Allow-Origin': '*',
       });
 
-      // Manual stream processing for usage tracking
       const reader = response.body!.getReader();
       
       while (true) {
@@ -236,33 +208,26 @@ export class OllamaResponsesPassthrough implements ResponsesPassthrough {
       const response = await this.makeRequest(request, false);
       const json = await response.json();
 
-      // Track usage for non-streaming requests
       if (json.usage) {
-        const totalInputTokens = json.usage.input_tokens || 0;
-        const cachedTokens = json.usage.input_tokens_details?.cached_tokens || 0;
-        const nonCachedInputTokens = totalInputTokens - cachedTokens;
+        const inputTokens = json.usage.input_tokens || 0;
         const outputTokens = json.usage.output_tokens || 0;
-        const totalTokens = json.usage.total_tokens || (totalInputTokens + outputTokens);
-        const reasoningTokens = json.usage.output_tokens_details?.reasoning_tokens || 0;
+        const totalTokens = json.usage.total_tokens || (inputTokens + outputTokens);
 
         logger.debug('Tracking non-streaming usage', {
           provider: 'ollama',
           model: request.model,
-          totalInputTokens,
-          nonCachedInputTokens,
-          cachedTokens,
+          inputTokens,
           outputTokens,
           totalTokens,
-          reasoningTokens,
           module: 'ollama-responses-passthrough'
         });
 
         import('../utils/usage-tracker.js').then(({ usageTracker }) => {
-          usageTracker.trackUsage(request.model, 'ollama', nonCachedInputTokens, outputTokens, cachedTokens, 0, clientIp);
+          usageTracker.trackUsage(request.model, 'ollama', inputTokens, outputTokens, 0, 0, clientIp);
         }).catch(() => {});
       }
 
-      const assistantResponse = json?.output?.[0]?.content?.[0]?.text || '';
+      const assistantResponse = json?.output?.[0]?.content?.[0]?.text || json?.output_text || '';
       persistMemory(request, assistantResponse, {
         provider: this.config.provider,
         defaultUserId: 'default',

From 6eaefb0f4e2ef762a0d667be678e007c719f9bee Mon Sep 17 00:00:00 2001
From: shrijam12 <shrijambhale8@gmail.com>
Date: Wed, 11 Feb 2026 01:52:50 +0530
Subject: [PATCH 4/5] feat: add multi-key management with priority-based
 selection

Fixes #11. Users can store multiple API keys per provider with priority ordering.
OAuth/subscription tokens are used first, then keys by priority, with automatic
fallback. Keys are managed via REST API (GET/POST/DELETE /keys, PUT /keys/:id/priority).
---
 .gitignore                                    |   3 +
 gateway/src/app/handlers/key-handler.ts       |  95 +++++++
 gateway/src/index.ts                          |   6 +
 .../src/infrastructure/auth/key-manager.ts    |  83 ++++++
 gateway/src/infrastructure/auth/key-store.ts  | 126 +++++++++
 .../chat-completions-passthrough.ts           |  26 +-
 .../passthrough/messages-passthrough.ts       |  27 +-
 .../openai-responses-passthrough.ts           | 248 +++++++++---------
 8 files changed, 477 insertions(+), 137 deletions(-)
 create mode 100644 gateway/src/app/handlers/key-handler.ts
 create mode 100644 gateway/src/infrastructure/auth/key-manager.ts
 create mode 100644 gateway/src/infrastructure/auth/key-store.ts

diff --git a/.gitignore b/.gitignore
index 7ce81ac..ca84aea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -63,6 +63,9 @@ Thumbs.db
 *.tmp
 *.temp
 
+# Key and token storage
+.ekai/
+
 # AI configurations
 CLAUDE.md
 AGENTS.md
diff --git a/gateway/src/app/handlers/key-handler.ts b/gateway/src/app/handlers/key-handler.ts
new file mode 100644
index 0000000..582e582
--- /dev/null
+++ b/gateway/src/app/handlers/key-handler.ts
@@ -0,0 +1,95 @@
+import { Request, Response } from 'express';
+import { addKey, removeKey, getAllKeys, updateKeyPriority, maskKey } from '../../infrastructure/auth/key-store.js';
+import { logger } from '../../infrastructure/utils/logger.js';
+
+const VALID_PROVIDERS = ['anthropic', 'openai', 'openrouter', 'xai', 'zai', 'google', 'ollama'];
+
+export async function handleListKeys(req: Request, res: Response): Promise<void> {
+  try {
+    const keys = getAllKeys().map(k => ({
+      id: k.id,
+      provider: k.provider,
+      label: k.label,
+      maskedKey: maskKey(k.key),
+      priority: k.priority,
+      source: k.source,
+      addedAt: k.addedAt,
+    }));
+    res.json({ keys });
+  } catch (error) {
+    logger.error('Failed to list keys', error, { module: 'key-handler' });
+    res.status(500).json({ error: 'Failed to list keys' });
+  }
+}
+
+export async function handleAddKey(req: Request, res: Response): Promise<void> {
+  try {
+    const { provider, key, label, priority } = req.body;
+
+    if (!provider || !key) {
+      res.status(400).json({ error: 'provider and key are required' });
+      return;
+    }
+
+    if (!VALID_PROVIDERS.includes(provider)) {
+      res.status(400).json({ error: `Invalid provider. Use one of: ${VALID_PROVIDERS.join(', ')}` });
+      return;
+    }
+
+    if (typeof key !== 'string' || key.trim().length === 0) {
+      res.status(400).json({ error: 'key must be a non-empty string' });
+      return;
+    }
+
+    const stored = addKey(provider, key.trim(), label, priority);
+    res.status(201).json({
+      id: stored.id,
+      provider: stored.provider,
+      label: stored.label,
+      maskedKey: maskKey(stored.key),
+      priority: stored.priority,
+      source: stored.source,
+      addedAt: stored.addedAt,
+    });
+  } catch (error) {
+    logger.error('Failed to add key', error, { module: 'key-handler' });
+    res.status(500).json({ error: 'Failed to add key' });
+  }
+}
+
+export async function handleRemoveKey(req: Request, res: Response): Promise<void> {
+  try {
+    const { id } = req.params;
+    const removed = removeKey(id);
+    if (!removed) {
+      res.status(404).json({ error: 'Key not found' });
+      return;
+    }
+    res.json({ status: 'removed', id });
+  } catch (error) {
+    logger.error('Failed to remove key', error, { module: 'key-handler' });
+    res.status(500).json({ error: 'Failed to remove key' });
+  }
+}
+
+export async function handleUpdateKeyPriority(req: Request, res: Response): Promise<void> {
+  try {
+    const { id } = req.params;
+    const { priority } = req.body;
+
+    if (typeof priority !== 'number' || priority < 0) {
+      res.status(400).json({ error: 'priority must be a non-negative number' });
+      return;
+    }
+
+    const updated = updateKeyPriority(id, priority);
+    if (!updated) {
+      res.status(404).json({ error: 'Key not found' });
+      return;
+    }
+    res.json({ status: 'updated', id, priority });
+  } catch (error) {
+    logger.error('Failed to update key priority', error, { module: 'key-handler' });
+    res.status(500).json({ error: 'Failed to update key priority' });
+  }
+}
diff --git a/gateway/src/index.ts b/gateway/src/index.ts
index 863a5f9..6042097 100644
--- a/gateway/src/index.ts
+++ b/gateway/src/index.ts
@@ -32,6 +32,7 @@ import { handleUsageRequest } from './app/handlers/usage-handler.js';
 import { handleConfigStatus } from './app/handlers/config-handler.js';
 import { handleModelsRequest } from './app/handlers/models-handler.js';
 import { handleGetBudget, handleUpdateBudget } from './app/handlers/budget-handler.js';
+import { handleListKeys, handleAddKey, handleRemoveKey, handleUpdateKeyPriority } from './app/handlers/key-handler.js';
 import { logger } from './infrastructure/utils/logger.js';
 import { requestContext } from './infrastructure/middleware/request-context.js';
 import { requestLogging } from './infrastructure/middleware/request-logging.js';
@@ -92,6 +93,11 @@ async function bootstrap(): Promise<void> {
   app.get('/budget', handleGetBudget);
   app.put('/budget', handleUpdateBudget);
 
+  app.get('/keys', handleListKeys);
+  app.post('/keys', handleAddKey);
+  app.delete('/keys/:id', handleRemoveKey);
+  app.put('/keys/:id/priority', handleUpdateKeyPriority);
+
   // Error handler MUST be last middleware
   app.use(errorHandler);
 
diff --git a/gateway/src/infrastructure/auth/key-manager.ts b/gateway/src/infrastructure/auth/key-manager.ts
new file mode 100644
index 0000000..03ccbff
--- /dev/null
+++ b/gateway/src/infrastructure/auth/key-manager.ts
@@ -0,0 +1,83 @@
+import { getKeysForProvider, seedFromEnv, StoredKey } from './key-store.js';
+import { logger } from '../utils/logger.js';
+
+const ENV_MAP: Record<string, string> = {
+  anthropic: 'ANTHROPIC_API_KEY',
+  openai: 'OPENAI_API_KEY',
+  openrouter: 'OPENROUTER_API_KEY',
+  xai: 'XAI_API_KEY',
+  zai: 'ZAI_API_KEY',
+  google: 'GOOGLE_API_KEY',
+  ollama: 'OLLAMA_API_KEY',
+};
+
+const exhaustedKeys = new Map<string, number>();
+const EXHAUSTED_COOLDOWN_MS = 300000;
+
+let seeded = false;
+
+function seedAllFromEnv(): void {
+  if (seeded) return;
+  seeded = true;
+  for (const [provider, envVar] of Object.entries(ENV_MAP)) {
+    seedFromEnv(provider, envVar);
+  }
+}
+
+export function markKeyExhausted(provider: string, keyId: string): void {
+  exhaustedKeys.set(`${provider}:${keyId}`, Date.now());
+  logger.info('Key marked exhausted', { provider, keyId, cooldownMs: EXHAUSTED_COOLDOWN_MS, module: 'key-manager' });
+}
+
+function isKeyExhausted(provider: string, keyId: string): boolean {
+  const ts = exhaustedKeys.get(`${provider}:${keyId}`);
+  if (!ts) return false;
+  if (Date.now() - ts > EXHAUSTED_COOLDOWN_MS) {
+    exhaustedKeys.delete(`${provider}:${keyId}`);
+    return false;
+  }
+  return true;
+}
+
+export async function resolveKeyForProvider(provider: string): Promise<string | undefined> {
+  seedAllFromEnv();
+
+  if (provider === 'openai' || provider === 'anthropic') {
+    try {
+      const oauthPath = new URL('./oauth-service.js', import.meta.url).href;
+      const mod: any = await import(/* @vite-ignore */ oauthPath).catch(() => null);
+      if (mod?.getValidAccessToken) {
+        const oauthToken = await mod.getValidAccessToken(provider);
+        if (oauthToken) {
+          logger.debug('Using OAuth token', { provider, module: 'key-manager' });
+          return oauthToken;
+        }
+      }
+    } catch {}
+  }
+
+  const keys = getKeysForProvider(provider);
+  for (const key of keys) {
+    if (!isKeyExhausted(provider, key.id)) {
+      logger.debug('Using key', { provider, label: key.label, priority: key.priority, module: 'key-manager' });
+      return key.key;
+    }
+  }
+
+  if (keys.length > 0) {
+    logger.warn('All keys exhausted, using first key as fallback', { provider, module: 'key-manager' });
+    return keys[0].key;
+  }
+
+  return undefined;
+}
+
+export function getKeyCountForProvider(provider: string): number {
+  seedAllFromEnv();
+  return getKeysForProvider(provider).length;
+}
+
+export function hasAnyKeyForProvider(provider: string): boolean {
+  seedAllFromEnv();
+  return getKeysForProvider(provider).length > 0;
+}
diff --git a/gateway/src/infrastructure/auth/key-store.ts b/gateway/src/infrastructure/auth/key-store.ts
new file mode 100644
index 0000000..93f7164
--- /dev/null
+++ b/gateway/src/infrastructure/auth/key-store.ts
@@ -0,0 +1,126 @@
+import fs from 'fs';
+import path from 'path';
+import crypto from 'crypto';
+import { logger } from '../utils/logger.js';
+
+export interface StoredKey {
+  id: string;
+  provider: string;
+  label: string;
+  key: string;
+  priority: number;
+  source: 'manual' | 'env';
+  addedAt: string;
+}
+
+interface KeyStoreData {
+  keys: StoredKey[];
+}
+
+const STORE_DIR = path.join(process.cwd(), '.ekai');
+const STORE_PATH = path.join(STORE_DIR, 'keys.json');
+
+function ensureStoreDir(): void {
+  if (!fs.existsSync(STORE_DIR)) {
+    fs.mkdirSync(STORE_DIR, { recursive: true });
+  }
+}
+
+function readStore(): KeyStoreData {
+  try {
+    if (fs.existsSync(STORE_PATH)) {
+      return JSON.parse(fs.readFileSync(STORE_PATH, 'utf-8'));
+    }
+  } catch (error) {
+    logger.error('Failed to read key store', error, { module: 'key-store' });
+  }
+  return { keys: [] };
+}
+
+function writeStore(data: KeyStoreData): void {
+  ensureStoreDir();
+  fs.writeFileSync(STORE_PATH, JSON.stringify(data, null, 2), { mode: 0o600 });
+}
+
+export function addKey(provider: string, key: string, label?: string, priority?: number): StoredKey {
+  const store = readStore();
+  const existing = store.keys.find(k => k.provider === provider && k.key === key);
+  if (existing) return existing;
+
+  const maxPriority = store.keys
+    .filter(k => k.provider === provider)
+    .reduce((max, k) => Math.max(max, k.priority), 0);
+
+  const entry: StoredKey = {
+    id: crypto.randomUUID(),
+    provider,
+    label: label || `${provider}-key-${store.keys.filter(k => k.provider === provider).length + 1}`,
+    key,
+    priority: priority ?? maxPriority + 1,
+    source: 'manual',
+    addedAt: new Date().toISOString(),
+  };
+
+  store.keys.push(entry);
+  writeStore(store);
+  logger.info('Key added', { provider, label: entry.label, module: 'key-store' });
+  return entry;
+}
+
+export function removeKey(id: string): boolean {
+  const store = readStore();
+  const idx = store.keys.findIndex(k => k.id === id);
+  if (idx === -1) return false;
+  const removed = store.keys.splice(idx, 1)[0];
+  writeStore(store);
+  logger.info('Key removed', { provider: removed.provider, label: removed.label, module: 'key-store' });
+  return true;
+}
+
+export function getKeysForProvider(provider: string): StoredKey[] {
+  const store = readStore();
+  return store.keys
+    .filter(k => k.provider === provider)
+    .sort((a, b) => a.priority - b.priority);
+}
+
+export function getAllKeys(): StoredKey[] {
+  return readStore().keys.sort((a, b) => {
+    if (a.provider !== b.provider) return a.provider.localeCompare(b.provider);
+    return a.priority - b.priority;
+  });
+}
+
+export function updateKeyPriority(id: string, priority: number): boolean {
+  const store = readStore();
+  const key = store.keys.find(k => k.id === id);
+  if (!key) return false;
+  key.priority = priority;
+  writeStore(store);
+  return true;
+}
+
+export function maskKey(key: string): string {
+  if (key.length <= 8) return '****';
+  return key.slice(0, 4) + '...' + key.slice(-4);
+}
+
+export function seedFromEnv(provider: string, envVar: string): void {
+  const value = process.env[envVar];
+  if (!value) return;
+  const store = readStore();
+  const exists = store.keys.some(k => k.provider === provider && k.key === value);
+  if (exists) return;
+
+  const entry: StoredKey = {
+    id: crypto.randomUUID(),
+    provider,
+    label: `${provider}-env`,
+    key: value,
+    priority: 999,
+    source: 'env',
+    addedAt: new Date().toISOString(),
+  };
+  store.keys.push(entry);
+  writeStore(store);
+}
diff --git a/gateway/src/infrastructure/passthrough/chat-completions-passthrough.ts b/gateway/src/infrastructure/passthrough/chat-completions-passthrough.ts
index a7e06b7..6e48edd 100644
--- a/gateway/src/infrastructure/passthrough/chat-completions-passthrough.ts
+++ b/gateway/src/infrastructure/passthrough/chat-completions-passthrough.ts
@@ -118,7 +118,10 @@ export class ChatCompletionsPassthrough {
     }
   }
 
+  private resolvedKey: string | undefined;
+
   private get apiKey(): string | undefined {
+    if (this.resolvedKey) return this.resolvedKey;
     const auth = this.config.auth;
     if (!auth) return undefined;
     const token = process.env[auth.envVar];
@@ -130,10 +133,13 @@ export class ChatCompletionsPassthrough {
 
   private buildAuthHeader(): string | undefined {
     const auth = this.config.auth;
-    if (!auth) return undefined;
+    const token = this.resolvedKey || this.apiKey;
+    if (!token) return undefined;
 
+    if (this.resolvedKey) return `Bearer ${this.resolvedKey}`;
+
+    if (!auth) return undefined;
     const { scheme, template } = auth;
-    const token = this.apiKey!;
 
     if (template) {
       return template.replace('{{token}}', token);
@@ -146,7 +152,7 @@ export class ChatCompletionsPassthrough {
     return token;
   }
 
-  private buildHeaders(): Record<string, string> {
+  private async buildHeaders(): Promise<Record<string, string>> {
     const headers: Record<string, string> = {
       'Content-Type': 'application/json',
     };
@@ -155,9 +161,17 @@ export class ChatCompletionsPassthrough {
       Object.assign(headers, this.config.staticHeaders);
     }
 
+    try {
+      const { resolveKeyForProvider } = await import('../auth/key-manager.js');
+      this.resolvedKey = await resolveKeyForProvider(this.config.provider);
+    } catch {
+      this.resolvedKey = undefined;
+    }
+
     const authHeader = this.buildAuthHeader();
-    if (authHeader && this.config.auth) {
-      headers[this.config.auth.header] = authHeader;
+    if (authHeader) {
+      const headerName = this.config.auth?.header ?? 'Authorization';
+      headers[headerName] = authHeader;
     }
 
     return headers;
@@ -214,7 +228,7 @@ export class ChatCompletionsPassthrough {
     try {
       response = await fetchFunction(this.config.baseUrl, {
         method: 'POST',
-        headers: this.buildHeaders(),
+        headers: await this.buildHeaders(),
         body: JSON.stringify(this.buildPayload(body, stream)),
       });
     } catch (error) {
diff --git a/gateway/src/infrastructure/passthrough/messages-passthrough.ts b/gateway/src/infrastructure/passthrough/messages-passthrough.ts
index 8d21764..3b0f3f9 100644
--- a/gateway/src/infrastructure/passthrough/messages-passthrough.ts
+++ b/gateway/src/infrastructure/passthrough/messages-passthrough.ts
@@ -100,7 +100,10 @@ export class MessagesPassthrough {
     return this.config.baseUrl;
   }
 
+  private resolvedKey: string | undefined;
+
   private get apiKey(): string | undefined {
+    if (this.resolvedKey) return this.resolvedKey;
     if (!this.config.auth) return undefined;
     const envVar = this.config.auth.envVar;
     const token = process.env[envVar];
@@ -111,11 +114,13 @@ export class MessagesPassthrough {
   }
 
   private buildAuthHeader(): string | undefined {
+    const token = this.resolvedKey || this.apiKey;
+    if (!token) return undefined;
+
+    if (this.resolvedKey) return `Bearer ${this.resolvedKey}`;
+
     if (!this.config.auth) return undefined;
     const { scheme, template } = this.config.auth;
-    const token = this.apiKey;
-    
-    if (!token) return undefined;
 
     if (template) {
       return template.replace('{{token}}', token);
@@ -128,14 +133,22 @@ export class MessagesPassthrough {
     return token;
   }
 
-  private buildHeaders(): Record<string, string> {
+  private async buildHeaders(): Promise<Record<string, string>> {
     const headers: Record<string, string> = {
       'Content-Type': 'application/json',
       ...this.config.staticHeaders,
     };
 
-    // Only add auth header if auth is configured (not x402 mode)
-    if (this.config.auth) {
+    try {
+      const { resolveKeyForProvider } = await import('../auth/key-manager.js');
+      this.resolvedKey = await resolveKeyForProvider(this.config.provider);
+    } catch {
+      this.resolvedKey = undefined;
+    }
+
+    if (this.resolvedKey && this.config.provider === 'anthropic') {
+      headers['x-api-key'] = this.resolvedKey;
+    } else if (this.config.auth) {
       const authHeader = this.buildAuthHeader();
       if (authHeader) {
         headers[this.config.auth.header] = authHeader;
@@ -284,7 +297,7 @@ export class MessagesPassthrough {
 
       response = await fetchFunction(this.resolveBaseUrl(), {
         method: 'POST',
-        headers: this.buildHeaders(),
+        headers: await this.buildHeaders(),
         body: payloadJson,
       });
     } catch (error) {
diff --git a/gateway/src/infrastructure/passthrough/openai-responses-passthrough.ts b/gateway/src/infrastructure/passthrough/openai-responses-passthrough.ts
index aa6d3e1..522e8af 100644
--- a/gateway/src/infrastructure/passthrough/openai-responses-passthrough.ts
+++ b/gateway/src/infrastructure/passthrough/openai-responses-passthrough.ts
@@ -1,59 +1,59 @@
-import { Response as ExpressResponse } from 'express';
-import { logger } from '../utils/logger.js';
-import { AuthenticationError, ProviderError } from '../../shared/errors/index.js';
-import { CONTENT_TYPES } from '../../domain/types/provider.js';
-import { getConfig } from '../config/app-config.js';
-import { ResponsesPassthrough, ResponsesPassthroughConfig } from './responses-passthrough.js';
-import { injectMemoryContext, persistMemory } from '../memory/memory-helper.js';
-
-export class OpenAIResponsesPassthrough implements ResponsesPassthrough {
-  constructor(private readonly config: ResponsesPassthroughConfig) {}
-
-  private get baseUrl(): string {
-    return this.config.baseUrl;
-  }
-
-  private get apiKey(): string {
-    const envVar = this.config.auth?.envVar;
-    if (envVar) {
-      const token = process.env[envVar];
-      if (token) return token;
-    }
-
-    const fallback = getConfig().providers.openai.apiKey;
-    if (fallback) return fallback;
-
-    throw new AuthenticationError('OpenAI API key not configured', { provider: this.config.provider });
-  }
-
-  private buildAuthHeader(): string {
-    const token = this.apiKey;
-    const { auth } = this.config;
-    if (!auth) {
-      return `Bearer ${token}`;
-    }
-
-    if (auth.template) {
-      return auth.template.replace('{{token}}', token);
-    }
-
-    if (auth.scheme) {
-      return `${auth.scheme} ${token}`.trim();
-    }
-
-    return token;
-  }
-
-  private buildHeaders(): Record<string, string> {
-    const headers: Record<string, string> = {
-      'Content-Type': 'application/json',
-      ...this.config.staticHeaders,
-    };
-
-    const headerName = this.config.auth?.header ?? 'Authorization';
-    headers[headerName] = this.buildAuthHeader();
-    return headers;
-  }
+import { Response as ExpressResponse } from 'express';
+import { logger } from '../utils/logger.js';
+import { AuthenticationError, ProviderError } from '../../shared/errors/index.js';
+import { CONTENT_TYPES } from '../../domain/types/provider.js';
+import { getConfig } from '../config/app-config.js';
+import { ResponsesPassthrough, ResponsesPassthroughConfig } from './responses-passthrough.js';
+import { injectMemoryContext, persistMemory } from '../memory/memory-helper.js';
+
+export class OpenAIResponsesPassthrough implements ResponsesPassthrough {
+  constructor(private readonly config: ResponsesPassthroughConfig) {}
+
+  private get baseUrl(): string {
+    return this.config.baseUrl;
+  }
+
+  private resolvedKey: string | undefined;
+
+  private get apiKey(): string {
+    if (this.resolvedKey) return this.resolvedKey;
+    const envVar = this.config.auth?.envVar;
+    if (envVar) {
+      const token = process.env[envVar];
+      if (token) return token;
+    }
+    const fallback = getConfig().providers.openai.apiKey;
+    if (fallback) return fallback;
+    throw new AuthenticationError('OpenAI API key not configured', { provider: this.config.provider });
+  }
+
+  private buildAuthHeader(): string {
+    if (this.resolvedKey) return `Bearer ${this.resolvedKey}`;
+    const token = this.apiKey;
+    const { auth } = this.config;
+    if (!auth) return `Bearer ${token}`;
+    if (auth.template) return auth.template.replace('{{token}}', token);
+    if (auth.scheme) return `${auth.scheme} ${token}`.trim();
+    return token;
+  }
+
+  private async buildHeaders(): Promise<Record<string, string>> {
+    const headers: Record<string, string> = {
+      'Content-Type': 'application/json',
+      ...this.config.staticHeaders,
+    };
+
+    try {
+      const { resolveKeyForProvider } = await import('../auth/key-manager.js');
+      this.resolvedKey = await resolveKeyForProvider(this.config.provider);
+    } catch {
+      this.resolvedKey = undefined;
+    }
+
+    const headerName = this.config.auth?.header ?? 'Authorization';
+    headers[headerName] = this.buildAuthHeader();
+    return headers;
+  }
 
   // Store usage data for tracking
   private usage: {
@@ -63,15 +63,15 @@ export class OpenAIResponsesPassthrough implements ResponsesPassthrough {
   } | null = null;
 
   // Buffer to handle multi-chunk SSE events
-  private eventBuffer: string = '';
-  private assistantResponseBuffer: string = '';
+  private eventBuffer: string = '';
+  private assistantResponseBuffer: string = '';
 
   private async makeRequest(body: any, stream: boolean): Promise<globalThis.Response> {
-    const response = await fetch(this.baseUrl, {
-      method: 'POST',
-      headers: this.buildHeaders(),
-      body: JSON.stringify({ ...body, stream, store: false }) // Not storing responses
-    });
+    const response = await fetch(this.baseUrl, {
+      method: 'POST',
+      headers: await this.buildHeaders(),
+      body: JSON.stringify({ ...body, stream, store: false }),
+    });
 
     if (!response.ok) {
       const errorText = await response.text();
@@ -81,17 +81,17 @@ export class OpenAIResponsesPassthrough implements ResponsesPassthrough {
     return response;
   }
 
-  private trackUsage(text: string, model: string, clientIp?: string): void {
-    try {
-      // Add to buffer to handle multi-chunk events
-      this.eventBuffer += text;
-      
-      // Extract assistant response content from text.delta events
-      const textDeltaMatch = /"type":"response\.text\.delta"[^}]*"text":"([^"]+)"/g;
-      let match;
-      while ((match = textDeltaMatch.exec(text)) !== null) {
-        this.assistantResponseBuffer += match[1];
-      }
+  private trackUsage(text: string, model: string, clientIp?: string): void {
+    try {
+      // Add to buffer to handle multi-chunk events
+      this.eventBuffer += text;
+      
+      // Extract assistant response content from text.delta events
+      const textDeltaMatch = /"type":"response\.text\.delta"[^}]*"text":"([^"]+)"/g;
+      let match;
+      while ((match = textDeltaMatch.exec(text)) !== null) {
+        this.assistantResponseBuffer += match[1];
+      }
       
       // Look for the exact response.completed event
       if (this.eventBuffer.includes('"type":"response.completed"')) {
@@ -178,22 +178,22 @@ export class OpenAIResponsesPassthrough implements ResponsesPassthrough {
 
   async handleDirectRequest(request: any, res: ExpressResponse, clientIp?: string): Promise<void> {
     // Reset usage tracking for new request
-    this.usage = null;
-    this.eventBuffer = '';
-    this.assistantResponseBuffer = '';
-
-    injectMemoryContext(request, {
-      provider: this.config.provider,
-      defaultUserId: 'default',
-      extractCurrentUserInputs: req => extractResponsesUserInputs(req),
-      applyMemoryContext: (req, context) => {
-        if (req.instructions) {
-          req.instructions = `${context}\n\n---\n\n${req.instructions}`;
-        } else {
-          req.instructions = context;
-        }
-      }
-    });
+    this.usage = null;
+    this.eventBuffer = '';
+    this.assistantResponseBuffer = '';
+
+    injectMemoryContext(request, {
+      provider: this.config.provider,
+      defaultUserId: 'default',
+      extractCurrentUserInputs: req => extractResponsesUserInputs(req),
+      applyMemoryContext: (req, context) => {
+        if (req.instructions) {
+          req.instructions = `${context}\n\n---\n\n${req.instructions}`;
+        } else {
+          req.instructions = context;
+        }
+      }
+    });
 
     if (request.stream) {
       const response = await this.makeRequest(request, true);
@@ -217,20 +217,20 @@ export class OpenAIResponsesPassthrough implements ResponsesPassthrough {
         
         res.write(value);
       }
-      res.end();
-
-      persistMemory(request, this.assistantResponseBuffer, {
-        provider: this.config.provider,
-        defaultUserId: 'default',
-        extractUserContent: req => req.input || '',
-        metadataBuilder: req => ({
-          model: req.model,
-          provider: this.config.provider,
-        }),
-      });
-    } else {
-      const response = await this.makeRequest(request, false);
-      const json = await response.json();
+      res.end();
+
+      persistMemory(request, this.assistantResponseBuffer, {
+        provider: this.config.provider,
+        defaultUserId: 'default',
+        extractUserContent: req => req.input || '',
+        metadataBuilder: req => ({
+          model: req.model,
+          provider: this.config.provider,
+        }),
+      });
+    } else {
+      const response = await this.makeRequest(request, false);
+      const json = await response.json();
 
       // Track usage for non-streaming requests
       if (json.usage) {
@@ -258,23 +258,23 @@ export class OpenAIResponsesPassthrough implements ResponsesPassthrough {
         }).catch(() => {});
       }
 
-      const assistantResponse = json?.output?.[0]?.content?.[0]?.text || '';
-      persistMemory(request, assistantResponse, {
-        provider: this.config.provider,
-        defaultUserId: 'default',
-        extractUserContent: req => req.input || '',
-        metadataBuilder: req => ({
-          model: req.model,
-          provider: this.config.provider,
-        }),
-      });
-
-      res.json(json);
-    }
-  }
-}
-
-function extractResponsesUserInputs(request: any): string[] {
-  const content = (request.input || '').trim();
-  return content ? [content] : [];
-}
+      const assistantResponse = json?.output?.[0]?.content?.[0]?.text || '';
+      persistMemory(request, assistantResponse, {
+        provider: this.config.provider,
+        defaultUserId: 'default',
+        extractUserContent: req => req.input || '',
+        metadataBuilder: req => ({
+          model: req.model,
+          provider: this.config.provider,
+        }),
+      });
+
+      res.json(json);
+    }
+  }
+}
+
+function extractResponsesUserInputs(request: any): string[] {
+  const content = (request.input || '').trim();
+  return content ? [content] : [];
+}

From 7b2d69b45f85d036eac6adb2adae23d95c93e576 Mon Sep 17 00:00:00 2001
From: Shashank <13179671+sm86@users.noreply.github.com>
Date: Fri, 6 Mar 2026 02:02:23 -0500
Subject: [PATCH 5/5] Update README.md

link to new work https://github.com/ekailabs/contexto
---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index a71413f..baf8f5b 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,10 @@
 [![GitHub stars](https://img.shields.io/github/stars/ekailabs/ekai-gateway.svg?style=social)](https://github.com/ekailabs/ekai-gateway)
 [![Discord](https://img.shields.io/badge/Discord-Join%20Server-7289da?logo=discord&logoColor=white)](https://discord.com/invite/5VsUUEfbJk)
 
+Archieved Repo:
+For our latest work, check https://github.com/ekailabs/contexto
+
+
 Multi-provider AI proxy with usage dashboard supporting Anthropic, OpenAI, Google Gemini, xAI, and OpenRouter models through OpenAI-compatible and Anthropic-compatible APIs.
 
 **Designed for self-hosted personal use** - run your own instance to securely proxy AI requests using your API keys.