rohitg00 · rohitg00 · May 16, 2026 · May 12, 2026 · May 14, 2026 · May 16, 2026
diff --git a/README.md b/README.md
@@ -1047,6 +1047,27 @@ Create `~/.agentmemory/.env`:
 # GEMINI_API_KEY=...
 # OPENROUTER_API_KEY=...
 # MINIMAX_API_KEY=...
+# OPENAI_API_KEY=***                       # NOTE: this same key auto-activates BOTH the
+#                                          # OpenAI LLM provider (here) AND the OpenAI
+#                                          # embedding provider (further below). Set
+#                                          # OPENAI_API_KEY_FOR_LLM=false to scope it
+#                                          # to embeddings only.
+# OPENAI_BASE_URL=https://api.openai.com   # Optional: override for Azure / vLLM / LM Studio / proxies
+#                                          # Azure: https://<resource>.openai.azure.com/openai/deployments/<deployment>
+#                                          # Auto-detected from `.openai.azure.com` hostname; uses
+#                                          # api-key header + api-version query param.
+# OPENAI_API_VERSION=2024-08-01-preview    # Optional: Azure api-version query param
+# OPENAI_MODEL=gpt-4o-mini                 # Optional: default model
+# OPENAI_TIMEOUT_MS=60000                  # Optional: outbound fetch timeout (default 60s)
+# OPENAI_REASONING_EFFORT=none             # Optional: "low" | "medium" | "high" | "none"
+#                                          # Honored only by OpenAI's reasoning models (o1, o3,
+#                                          # gpt-*-reasoning) and providers that mirror that
+#                                          # schema (Ollama Cloud thinking models). Standard
+#                                          # chat models reject this field with 400. Set to
+#                                          # "none" for thinking models that return reasoning
+#                                          # but no content.
+# OPENAI_API_KEY_FOR_LLM=false             # Optional: set to false to skip OpenAI auto-detection
+#                                          # for LLM (useful if you only want OpenAI for embeddings)
 # Opt-in Claude-subscription fallback (spawns @anthropic-ai/claude-agent-sdk);
 # leave OFF unless you understand the Stop-hook recursion risk (#149 follow-up):
 # AGENTMEMORY_ALLOW_AGENT_SDK=true

diff --git a/src/config.ts b/src/config.ts
@@ -50,6 +50,16 @@ function hasRealValue(v: string | undefined): v is string {
 function detectProvider(env: Record<string, string>): ProviderConfig {
   const maxTokens = parseInt(env["MAX_TOKENS"] || "4096", 10);
 
+  // OpenAI-compatible: supports OpenAI, DeepSeek, SiliconFlow, Azure, vLLM, LM Studio
+  if (hasRealValue(env["OPENAI_API_KEY"]) && env["OPENAI_API_KEY_FOR_LLM"] !== "false") {
+    return {
+      provider: "openai",
+      model: env["OPENAI_MODEL"] || "gpt-4o-mini",
+      maxTokens,
+      baseURL: env["OPENAI_BASE_URL"],
+    };
+  }
+
   // MiniMax: Anthropic-compatible API, requires raw fetch to avoid SDK stainless headers
   if (hasRealValue(env["MINIMAX_API_KEY"])) {
     return {
@@ -92,7 +102,7 @@ function detectProvider(env: Record<string, string>): ProviderConfig {
   if (!allowAgentSdk) {
     process.stderr.write(
       "[agentmemory] No LLM provider key found " +
-        "(ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, MINIMAX_API_KEY). " +
+        "(ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, MINIMAX_API_KEY, OPENAI_API_KEY). " +
         "LLM-backed compression and summarization are DISABLED — using no-op provider. " +
         "This is the safe default: the agent-sdk fallback used to spawn Claude Agent SDK " +
         "child sessions which inherit Claude Code's plugin hooks and cause infinite Stop-hook " +
@@ -156,7 +166,9 @@ export function detectLlmProviderKind(): "llm" | "noop" {
     hasRealValue(env["GEMINI_API_KEY"]) ||
     hasRealValue(env["GOOGLE_API_KEY"]) ||
     hasRealValue(env["OPENROUTER_API_KEY"]) ||
-    hasRealValue(env["MINIMAX_API_KEY"])
+    hasRealValue(env["MINIMAX_API_KEY"]) ||
+    (hasRealValue(env["OPENAI_API_KEY"]) &&
+      env["OPENAI_API_KEY_FOR_LLM"] !== "false")
   ) {
     return "llm";
   }
@@ -292,6 +304,7 @@ const VALID_PROVIDERS = new Set([
   "openrouter",
   "agent-sdk",
   "minimax",
+  "openai",
 ]);
 
 export function loadFallbackConfig(): FallbackConfig {

diff --git a/src/providers/index.ts b/src/providers/index.ts
@@ -7,6 +7,7 @@ import { AgentSDKProvider } from "./agent-sdk.js";
 import { AnthropicProvider } from "./anthropic.js";
 import { MinimaxProvider } from "./minimax.js";
 import { NoopProvider } from "./noop.js";
+import { OpenAIProvider } from "./openai.js";
 import { OpenRouterProvider } from "./openrouter.js";
 import { ResilientProvider } from "./resilient.js";
 import { FallbackChainProvider } from "./fallback-chain.js";
@@ -94,6 +95,20 @@ function createBaseProvider(config: ProviderConfig): MemoryProvider {
         config.maxTokens,
         "https://openrouter.ai/api/v1/chat/completions",
       );
+    case "openai": {
+      const openaiKey = getEnvVar("OPENAI_API_KEY");
+      if (!openaiKey) {
+        throw new Error(
+          "OPENAI_API_KEY is required for the openai provider",
+        );
+      }
+      return new OpenAIProvider(
+        openaiKey,
+        config.model,
+        config.maxTokens,
+        config.baseURL,
+      );
+    }
     case "noop":
       return new NoopProvider();
     case "agent-sdk":

diff --git a/src/providers/openai.ts b/src/providers/openai.ts
@@ -0,0 +1,179 @@
+import type { MemoryProvider } from "../types.js";
+import { getEnvVar } from "../config.js";
+
+const DEFAULT_BASE_URL = "https://api.openai.com";
+const DEFAULT_MODEL = "gpt-4o-mini";
+const DEFAULT_TIMEOUT_MS = 60_000;
+const DEFAULT_AZURE_API_VERSION = "2024-08-01-preview";
+
+/**
+ * OpenAI-compatible LLM provider.
+ *
+ * Uses raw fetch (no SDK) to support any OpenAI-compatible endpoint:
+ *   - OpenAI official
+ *   - Azure OpenAI (auto-detected from .openai.azure.com host)
+ *   - DeepSeek
+ *   - 硅基流动 (SiliconFlow)
+ *   - vLLM / LM Studio / Ollama (with OpenAI compatibility layer)
+ *   - Any other proxy implementing /v1/chat/completions
+ *
+ * Required env vars:
+ *   OPENAI_API_KEY  — API key
+ *
+ * Optional:
+ *   OPENAI_BASE_URL          — base URL without path (default: https://api.openai.com).
+ *                              Azure: https://<resource>.openai.azure.com/openai/deployments/<deployment>
+ *   OPENAI_MODEL             — model name (default: gpt-4o-mini)
+ *   OPENAI_API_VERSION       — Azure api-version query param (default: 2024-08-01-preview)
+ *   OPENAI_TIMEOUT_MS        — outbound fetch timeout in ms (default: 60000)
+ *   MAX_TOKENS               — max output tokens (default: from config or 4096)
+ *   OPENAI_REASONING_EFFORT  — "low" | "medium" | "high" | "none"
+ *                              Passthrough for reasoning models (e.g. Ollama Cloud
+ *                              thinking models). Set to "none" to ensure
+ *                              message.content is populated instead of only
+ *                              message.reasoning.
+ */
+export class OpenAIProvider implements MemoryProvider {
+  name = "openai";
+  private apiKey: string;
+  private model: string;
+  private maxTokens: number;
+  private baseUrl: string;
+  private reasoningEffort?: string;
+  private timeoutMs: number;
+  private isAzure: boolean;
+  private azureApiVersion: string;
+
+  constructor(apiKey: string, model: string, maxTokens: number, baseURL?: string) {
+    this.apiKey = apiKey;
+    this.model = model;
+    this.maxTokens = maxTokens;
+    this.baseUrl = (
+      baseURL ||
+      getEnvVar("OPENAI_BASE_URL") ||
+      DEFAULT_BASE_URL
+    ).replace(/\/+$/, "");
+    this.reasoningEffort = getEnvVar("OPENAI_REASONING_EFFORT") || undefined;
+    this.timeoutMs = parseTimeout(getEnvVar("OPENAI_TIMEOUT_MS"));
+    this.azureApiVersion =
+      getEnvVar("OPENAI_API_VERSION") || DEFAULT_AZURE_API_VERSION;
+    this.isAzure = detectAzure(this.baseUrl);
+  }
+
+  async compress(systemPrompt: string, userPrompt: string): Promise<string> {
+    return this.call(systemPrompt, userPrompt);
+  }
+
+  async summarize(systemPrompt: string, userPrompt: string): Promise<string> {
+    return this.call(systemPrompt, userPrompt);
+  }
+
+  private buildUrl(): string {
+    // Azure OpenAI carries the deployment in the path and requires
+    // `api-version` as a query param. Standard OpenAI-compatible
+    // endpoints append /v1/chat/completions to the base.
+    if (this.isAzure) {
+      const sep = this.baseUrl.includes("?") ? "&" : "?";
+      return `${this.baseUrl}/chat/completions${sep}api-version=${encodeURIComponent(this.azureApiVersion)}`;
+    }
+    return `${this.baseUrl}/v1/chat/completions`;
+  }
+
+  private buildHeaders(): Record<string, string> {
+    // Azure uses `api-key: <KEY>`; everyone else uses `Authorization: Bearer <KEY>`.
+    if (this.isAzure) {
+      return {
+        "Content-Type": "application/json",
+        "api-key": this.apiKey,
+      };
+    }
+    return {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${this.apiKey}`,
+    };
+  }
+
+  private async call(systemPrompt: string, userPrompt: string): Promise<string> {
+    const url = this.buildUrl();
+    const body: Record<string, unknown> = {
+      model: this.model,
+      max_tokens: this.maxTokens,
+      messages: [
+        { role: "system", content: systemPrompt },
+        { role: "user", content: userPrompt },
+      ],
+    };
+    if (this.reasoningEffort) {
+      body.reasoning_effort = this.reasoningEffort;
+    }
+
+    // Bound the request with an AbortController so a hung provider
+    // can't stall the worker. The other raw-fetch providers
+    // (anthropic, gemini, openrouter, minimax) have the same gap
+    // tracked in a follow-up issue; this PR fixes it for the new
+    // surface only.
+    const ac = new AbortController();
+    const t = setTimeout(() => ac.abort(), this.timeoutMs);
+    let response: Response;
+    try {
+      response = await fetch(url, {
+        method: "POST",
+        headers: this.buildHeaders(),
+        body: JSON.stringify(body),
+        signal: ac.signal,
+      });
+    } catch (err) {
+      const aborted =
+        ac.signal.aborted ||
+        (err instanceof Error && err.name === "AbortError");
+      if (aborted) {
+        throw new Error(
+          `OpenAI API request timed out after ${this.timeoutMs}ms — set OPENAI_TIMEOUT_MS to raise the bound or check the provider status.`,
+        );
+      }
+      throw err;
+    } finally {
+      clearTimeout(t);
+    }
+
+    if (!response.ok) {
+      const text = await response.text();
+      throw new Error(`OpenAI API error (${response.status}): ${text}`);
+    }
+
+    const data = (await response.json()) as {
+      choices?: Array<{ message?: { content?: string; reasoning?: string } }>;
+    };
+    const message = data.choices?.[0]?.message;
+    const content = message?.content;
+    if (content) {
+      return content;
+    }
+    // Fallback: some thinking models return reasoning but no content
+    const reasoning = message?.reasoning;
+    if (reasoning) {
+      return reasoning;
+    }
-    if (content) {
-      return content;
-    }
-    // Fallback: some thinking models return reasoning but no content
-    const reasoning = message?.reasoning;
-    if (reasoning) {
-      return reasoning;
-    }
+    if (typeof content === "string") {
+      return content;
+    }
+    // Fallback: some thinking models return reasoning but no content
+    const reasoning = message?.reasoning;
+    if (typeof reasoning === "string") {
+      return reasoning;
+    }
-    if (content) {
-      return content;
-    }
-    // Fallback: some thinking models return reasoning but no content
-    const reasoning = message?.reasoning;
-    if (reasoning) {
-      return reasoning;
-    }
+    if (typeof content === "string") {
+      return content;
+    }
+    // Fallback: some thinking models return reasoning but no content
+    const reasoning = message?.reasoning;
+    if (typeof reasoning === "string") {
+      return reasoning;
+    }
+    throw new Error(
+      `OpenAI returned unexpected response: ${JSON.stringify(data).slice(0, 200)}`,
+    );
+  }
+}
+
+function parseTimeout(raw: string | null | undefined): number {
+  if (!raw) return DEFAULT_TIMEOUT_MS;
+  const n = parseInt(raw, 10);
+  return Number.isFinite(n) && n > 0 ? n : DEFAULT_TIMEOUT_MS;
+}
+
+function detectAzure(baseUrl: string): boolean {
+  // Azure resource URLs land at <resource>.openai.azure.com. The
+  // `OPENAI_BASE_URL=https://<r>.openai.azure.com/openai/deployments/<d>`
+  // shape is the documented opt-in path.
+  try {
+    const u = new URL(baseUrl);
+    return u.hostname.endsWith(".openai.azure.com");
+  } catch {
+    return false;
+  }
+}
diff --git a/src/types.ts b/src/types.ts
@@ -129,7 +129,7 @@ export interface ProviderConfig {
   baseURL?: string;
 }
 
-export type ProviderType = "agent-sdk" | "anthropic" | "gemini" | "openrouter" | "minimax" | "noop";
+export type ProviderType = "agent-sdk" | "anthropic" | "gemini" | "openrouter" | "minimax" | "openai" | "noop";
 
 export interface MemoryProvider {
   name: string;