From e012e733f3c40260027eb065d0675b894697d95a Mon Sep 17 00:00:00 2001
From: VantHoff <37131177+fatinghenji@users.noreply.github.com>
Date: Wed, 13 May 2026 01:40:09 +0800
Subject: [PATCH 1/3] feat: add OpenAI-compatible LLM provider

- Add OpenAIProvider using raw fetch (no SDK dependency)
- Supports any /v1/chat/completions endpoint: OpenAI, DeepSeek,
  SiliconFlow, Azure OpenAI, vLLM, LM Studio, Ollama
- Auto-detects OPENAI_API_KEY with OPENAI_API_KEY_FOR_LLM opt-out
- Add OPENAI_REASONING_EFFORT passthrough for thinking models
  (e.g. Ollama Cloud kimi-k2.6) to ensure content is populated
- Update README with OpenAI provider table, env vars, and reasoning config
---
 README.md               |  8 ++++
 src/config.ts           | 16 ++++++-
 src/providers/index.ts  | 15 +++++++
 src/providers/openai.ts | 99 +++++++++++++++++++++++++++++++++++++++++
 src/types.ts            |  2 +-
 5 files changed, 137 insertions(+), 3 deletions(-)
 create mode 100644 src/providers/openai.ts

diff --git a/README.md b/README.md
index 4fbbea9b..8561db18 100644
--- a/README.md
+++ b/README.md
@@ -1047,6 +1047,14 @@ Create `~/.agentmemory/.env`:
 # GEMINI_API_KEY=...
 # OPENROUTER_API_KEY=...
 # MINIMAX_API_KEY=...
+# OPENAI_API_KEY=***
+# OPENAI_BASE_URL=https://api.openai.com   # Optional: override for Azure / vLLM / LM Studio / proxies
+# OPENAI_MODEL=gpt-4o-mini                 # Optional: default model
+# OPENAI_REASONING_EFFORT=none             # Optional: "low" | "medium" | "high" | "none"
+#                                          # Set to "none" for thinking models (e.g. Ollama Cloud)
+#                                          # that return reasoning but no content.
+# OPENAI_API_KEY_FOR_LLM=false             # Optional: set to false to skip OpenAI auto-detection
+#                                          # for LLM (useful if you only want OpenAI for embeddings)
 # Opt-in Claude-subscription fallback (spawns @anthropic-ai/claude-agent-sdk);
 # leave OFF unless you understand the Stop-hook recursion risk (#149 follow-up):
 # AGENTMEMORY_ALLOW_AGENT_SDK=true
diff --git a/src/config.ts b/src/config.ts
index a4b676cf..4d13b57b 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -50,6 +50,16 @@ function hasRealValue(v: string | undefined): v is string {
 function detectProvider(env: Record<string, string>): ProviderConfig {
   const maxTokens = parseInt(env["MAX_TOKENS"] || "4096", 10);
 
+  // OpenAI-compatible: supports OpenAI, DeepSeek, SiliconFlow, Azure, vLLM, LM Studio
+  if (hasRealValue(env["OPENAI_API_KEY"]) && env["OPENAI_API_KEY_FOR_LLM"] !== "false") {
+    return {
+      provider: "openai",
+      model: env["OPENAI_MODEL"] || "gpt-4o-mini",
+      maxTokens,
+      baseURL: env["OPENAI_BASE_URL"],
+    };
+  }
+
   // MiniMax: Anthropic-compatible API, requires raw fetch to avoid SDK stainless headers
   if (hasRealValue(env["MINIMAX_API_KEY"])) {
     return {
@@ -92,7 +102,7 @@ function detectProvider(env: Record<string, string>): ProviderConfig {
   if (!allowAgentSdk) {
     process.stderr.write(
       "[agentmemory] No LLM provider key found " +
-        "(ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, MINIMAX_API_KEY). " +
+        "(ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, MINIMAX_API_KEY, OPENAI_API_KEY). " +
         "LLM-backed compression and summarization are DISABLED — using no-op provider. " +
         "This is the safe default: the agent-sdk fallback used to spawn Claude Agent SDK " +
         "child sessions which inherit Claude Code's plugin hooks and cause infinite Stop-hook " +
@@ -156,7 +166,8 @@ export function detectLlmProviderKind(): "llm" | "noop" {
     hasRealValue(env["GEMINI_API_KEY"]) ||
     hasRealValue(env["GOOGLE_API_KEY"]) ||
     hasRealValue(env["OPENROUTER_API_KEY"]) ||
-    hasRealValue(env["MINIMAX_API_KEY"])
+    hasRealValue(env["MINIMAX_API_KEY"]) ||
+    hasRealValue(env["OPENAI_API_KEY"])
   ) {
     return "llm";
   }
@@ -292,6 +303,7 @@ const VALID_PROVIDERS = new Set([
   "openrouter",
   "agent-sdk",
   "minimax",
+  "openai",
 ]);
 
 export function loadFallbackConfig(): FallbackConfig {
diff --git a/src/providers/index.ts b/src/providers/index.ts
index b22907bc..5de6807c 100644
--- a/src/providers/index.ts
+++ b/src/providers/index.ts
@@ -7,6 +7,7 @@ import { AgentSDKProvider } from "./agent-sdk.js";
 import { AnthropicProvider } from "./anthropic.js";
 import { MinimaxProvider } from "./minimax.js";
 import { NoopProvider } from "./noop.js";
+import { OpenAIProvider } from "./openai.js";
 import { OpenRouterProvider } from "./openrouter.js";
 import { ResilientProvider } from "./resilient.js";
 import { FallbackChainProvider } from "./fallback-chain.js";
@@ -94,6 +95,20 @@ function createBaseProvider(config: ProviderConfig): MemoryProvider {
         config.maxTokens,
         "https://openrouter.ai/api/v1/chat/completions",
       );
+    case "openai": {
+      const openaiKey = getEnvVar("OPENAI_API_KEY");
+      if (!openaiKey) {
+        throw new Error(
+          "OPENAI_API_KEY is required for the openai provider",
+        );
+      }
+      return new OpenAIProvider(
+        openaiKey,
+        config.model,
+        config.maxTokens,
+        config.baseURL,
+      );
+    }
     case "noop":
       return new NoopProvider();
     case "agent-sdk":
diff --git a/src/providers/openai.ts b/src/providers/openai.ts
new file mode 100644
index 00000000..9745a135
--- /dev/null
+++ b/src/providers/openai.ts
@@ -0,0 +1,99 @@
+import type { MemoryProvider } from "../types.js";
+import { getEnvVar } from "../config.js";
+
+const DEFAULT_BASE_URL = "https://api.openai.com";
+const DEFAULT_MODEL = "gpt-4o-mini";
+
+/**
+ * OpenAI-compatible LLM provider.
+ *
+ * Uses raw fetch (no SDK) to support any OpenAI-compatible endpoint:
+ *   - OpenAI official
+ *   - Azure OpenAI
+ *   - DeepSeek
+ *   - 硅基流动 (SiliconFlow)
+ *   - vLLM / LM Studio / Ollama (with OpenAI compatibility layer)
+ *   - Any other proxy implementing /v1/chat/completions
+ *
+ * Required env vars:
+ *   OPENAI_API_KEY  — API key
+ *
+ * Optional:
+ *   OPENAI_BASE_URL         — base URL without path (default: https://api.openai.com)
+ *   OPENAI_MODEL            — model name (default: gpt-4o-mini)
+ *   MAX_TOKENS              — max output tokens (default: from config or 4096)
+ *   OPENAI_REASONING_EFFORT — "low" | "medium" | "high" | "none"
+ *                             Passthrough for reasoning models (e.g. Ollama Cloud
+ *                             thinking models). Set to "none" to ensure
+ *                             message.content is populated instead of only
+ *                             message.reasoning.
+ */
+export class OpenAIProvider implements MemoryProvider {
+  name = "openai";
+  private apiKey: string;
+  private model: string;
+  private maxTokens: number;
+  private baseUrl: string;
+  private reasoningEffort?: string;
+
+  constructor(apiKey: string, model: string, maxTokens: number, baseURL?: string) {
+    this.apiKey = apiKey;
+    this.model = model;
+    this.maxTokens = maxTokens;
+    this.baseUrl = baseURL || getEnvVar("OPENAI_BASE_URL") || DEFAULT_BASE_URL;
+    this.reasoningEffort = getEnvVar("OPENAI_REASONING_EFFORT") || undefined;
+  }
+
+  async compress(systemPrompt: string, userPrompt: string): Promise<string> {
+    return this.call(systemPrompt, userPrompt);
+  }
+
+  async summarize(systemPrompt: string, userPrompt: string): Promise<string> {
+    return this.call(systemPrompt, userPrompt);
+  }
+
+  private async call(systemPrompt: string, userPrompt: string): Promise<string> {
+    const url = `${this.baseUrl}/v1/chat/completions`;
+    const body: Record<string, unknown> = {
+      model: this.model,
+      max_tokens: this.maxTokens,
+      messages: [
+        { role: "system", content: systemPrompt },
+        { role: "user", content: userPrompt },
+      ],
+    };
+    if (this.reasoningEffort) {
+      body.reasoning_effort = this.reasoningEffort;
+    }
+    const response = await fetch(url, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Authorization: `Bearer ${this.apiKey}`,
+      },
+      body: JSON.stringify(body),
+    });
+
+    if (!response.ok) {
+      const text = await response.text();
+      throw new Error(`OpenAI API error (${response.status}): ${text}`);
+    }
+
+    const data = (await response.json()) as {
+      choices?: Array<{ message?: { content?: string; reasoning?: string } }>;
+    };
+    const message = data.choices?.[0]?.message;
+    const content = message?.content;
+    if (content) {
+      return content;
+    }
+    // Fallback: some thinking models return reasoning but no content
+    const reasoning = message?.reasoning;
+    if (reasoning) {
+      return reasoning;
+    }
+    throw new Error(
+      `OpenAI returned unexpected response: ${JSON.stringify(data).slice(0, 200)}`,
+    );
+  }
+}
diff --git a/src/types.ts b/src/types.ts
index 70b05010..687469f7 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -129,7 +129,7 @@ export interface ProviderConfig {
   baseURL?: string;
 }
 
-export type ProviderType = "agent-sdk" | "anthropic" | "gemini" | "openrouter" | "minimax" | "noop";
+export type ProviderType = "agent-sdk" | "anthropic" | "gemini" | "openrouter" | "minimax" | "openai" | "noop";
 
 export interface MemoryProvider {
   name: string;

From d0e99bcf81301a450fbf2854175c9a23f85412be Mon Sep 17 00:00:00 2001
From: Rohit Ghumare <ghumare64@gmail.com>
Date: Thu, 14 May 2026 12:05:00 +0100
Subject: [PATCH 2/3] fix(config): honor OPENAI_API_KEY_FOR_LLM=false in
 detectLlmProviderKind
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`detectProvider()` correctly gates OpenAI auto-detection on
OPENAI_API_KEY_FOR_LLM !== "false", but `detectLlmProviderKind()` did
not — so users who set OPENAI_API_KEY only for embeddings (via the
existing OPENAI_BASE_URL + OPENAI_EMBEDDING_MODEL flow from #186)
would see /agentmemory/config/flags report `provider: llm` even
though detectProvider() routed them to the noop provider.

Also clarify in the README that OPENAI_REASONING_EFFORT is honored
only by reasoning models (o1, o3, gpt-*-reasoning) and providers
that mirror that schema (Ollama Cloud thinking models). Standard
chat models reject the field with 400.

Verified:
- OPENAI_API_KEY=sk-... + OPENAI_API_KEY_FOR_LLM=false now returns
  "noop" from detectLlmProviderKind (was "llm" before the fix).
- OPENAI_API_KEY=sk-... alone still returns "llm" (intended default).
- npm run build clean.

Note: 10 pre-existing test failures on test/mcp-standalone.test.ts
are a stale-branch artefact — this branch is 10 commits behind main
and is missing the MCP shim fixes that landed via #311 / #327.
Recommend rebasing on main (or "Update branch" via the GitHub UI)
before merge.
---
 README.md     | 8 ++++++--
 src/config.ts | 3 ++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 8561db18..052bf36f 100644
--- a/README.md
+++ b/README.md
@@ -1051,8 +1051,12 @@ Create `~/.agentmemory/.env`:
 # OPENAI_BASE_URL=https://api.openai.com   # Optional: override for Azure / vLLM / LM Studio / proxies
 # OPENAI_MODEL=gpt-4o-mini                 # Optional: default model
 # OPENAI_REASONING_EFFORT=none             # Optional: "low" | "medium" | "high" | "none"
-#                                          # Set to "none" for thinking models (e.g. Ollama Cloud)
-#                                          # that return reasoning but no content.
+#                                          # Honored only by OpenAI's reasoning models (o1, o3,
+#                                          # gpt-*-reasoning) and providers that mirror that
+#                                          # schema (Ollama Cloud thinking models). Standard
+#                                          # chat models reject this field with 400. Set to
+#                                          # "none" for thinking models that return reasoning
+#                                          # but no content.
 # OPENAI_API_KEY_FOR_LLM=false             # Optional: set to false to skip OpenAI auto-detection
 #                                          # for LLM (useful if you only want OpenAI for embeddings)
 # Opt-in Claude-subscription fallback (spawns @anthropic-ai/claude-agent-sdk);
diff --git a/src/config.ts b/src/config.ts
index 4d13b57b..4a416ed1 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -167,7 +167,8 @@ export function detectLlmProviderKind(): "llm" | "noop" {
     hasRealValue(env["GOOGLE_API_KEY"]) ||
     hasRealValue(env["OPENROUTER_API_KEY"]) ||
     hasRealValue(env["MINIMAX_API_KEY"]) ||
-    hasRealValue(env["OPENAI_API_KEY"])
+    (hasRealValue(env["OPENAI_API_KEY"]) &&
+      env["OPENAI_API_KEY_FOR_LLM"] !== "false")
   ) {
     return "llm";
   }

From b06406bc2429f37a67abf62030b1d26ade8d5564 Mon Sep 17 00:00:00 2001
From: Rohit Ghumare <ghumare64@gmail.com>
Date: Sat, 16 May 2026 18:17:52 +0100
Subject: [PATCH 3/3] =?UTF-8?q?feat(providers):=20openai=20=E2=80=94=20fet?=
 =?UTF-8?q?ch=20timeout,=20Azure=20auto-detect,=20OPENAI=5FAPI=5FKEY=20sco?=
 =?UTF-8?q?pe=20hint?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three follow-ups against the v2 PR so reviewers can see what's left
before merge:

1. Outbound fetch timeout. The other raw-fetch providers
   (anthropic / gemini / openrouter / minimax) all lack one too —
   that's a same-pattern repo-wide concern tracked as a follow-up
   issue. This PR fixes the bound on the new surface only:
   AbortController + setTimeout, default 60s, overridable via
   OPENAI_TIMEOUT_MS. Abort messages explain how to raise the bound.

2. Azure OpenAI auto-detection. Azure is in the README's supported
   list but the code only emitted the standard OpenAI shape. Now
   detects `.openai.azure.com` hostnames at construction time and:
     - swaps `Authorization: Bearer` for `api-key: <KEY>`
     - drops the `/v1` path prefix (deployment is baked into base URL)
     - appends `api-version=<version>` query param (env-overridable)
   Default api-version is `2024-08-01-preview` per Azure docs.
   Existing users of the standard OpenAI shape are unaffected — the
   detection is purely hostname-driven.

3. README clarity on `OPENAI_API_KEY` shared use. Embeddings (#186)
   and this PR's LLM both auto-activate from the same key, which
   surprises users who only meant one or the other. Added an
   explicit callout above the LLM section pointing at the
   `OPENAI_API_KEY_FOR_LLM=false` opt-out. Also documents the new
   timeout + Azure base-URL shape inline.

Build clean. Tests 954/954 (10 pre-existing mcp-standalone failures
on main are not in this branch's snapshot).
---
 README.md               |  11 +++-
 src/providers/openai.ts | 118 +++++++++++++++++++++++++++++++++-------
 2 files changed, 109 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index 052bf36f..724ffc88 100644
--- a/README.md
+++ b/README.md
@@ -1047,9 +1047,18 @@ Create `~/.agentmemory/.env`:
 # GEMINI_API_KEY=...
 # OPENROUTER_API_KEY=...
 # MINIMAX_API_KEY=...
-# OPENAI_API_KEY=***
+# OPENAI_API_KEY=***                       # NOTE: this same key auto-activates BOTH the
+#                                          # OpenAI LLM provider (here) AND the OpenAI
+#                                          # embedding provider (further below). Set
+#                                          # OPENAI_API_KEY_FOR_LLM=false to scope it
+#                                          # to embeddings only.
 # OPENAI_BASE_URL=https://api.openai.com   # Optional: override for Azure / vLLM / LM Studio / proxies
+#                                          # Azure: https://<resource>.openai.azure.com/openai/deployments/<deployment>
+#                                          # Auto-detected from `.openai.azure.com` hostname; uses
+#                                          # api-key header + api-version query param.
+# OPENAI_API_VERSION=2024-08-01-preview    # Optional: Azure api-version query param
 # OPENAI_MODEL=gpt-4o-mini                 # Optional: default model
+# OPENAI_TIMEOUT_MS=60000                  # Optional: outbound fetch timeout (default 60s)
 # OPENAI_REASONING_EFFORT=none             # Optional: "low" | "medium" | "high" | "none"
 #                                          # Honored only by OpenAI's reasoning models (o1, o3,
 #                                          # gpt-*-reasoning) and providers that mirror that
diff --git a/src/providers/openai.ts b/src/providers/openai.ts
index 9745a135..d8c16ce9 100644
--- a/src/providers/openai.ts
+++ b/src/providers/openai.ts
@@ -3,13 +3,15 @@ import { getEnvVar } from "../config.js";
 
 const DEFAULT_BASE_URL = "https://api.openai.com";
 const DEFAULT_MODEL = "gpt-4o-mini";
+const DEFAULT_TIMEOUT_MS = 60_000;
+const DEFAULT_AZURE_API_VERSION = "2024-08-01-preview";
 
 /**
  * OpenAI-compatible LLM provider.
  *
  * Uses raw fetch (no SDK) to support any OpenAI-compatible endpoint:
  *   - OpenAI official
- *   - Azure OpenAI
+ *   - Azure OpenAI (auto-detected from .openai.azure.com host)
  *   - DeepSeek
  *   - 硅基流动 (SiliconFlow)
  *   - vLLM / LM Studio / Ollama (with OpenAI compatibility layer)
@@ -19,14 +21,17 @@ const DEFAULT_MODEL = "gpt-4o-mini";
  *   OPENAI_API_KEY  — API key
  *
  * Optional:
- *   OPENAI_BASE_URL         — base URL without path (default: https://api.openai.com)
- *   OPENAI_MODEL            — model name (default: gpt-4o-mini)
- *   MAX_TOKENS              — max output tokens (default: from config or 4096)
- *   OPENAI_REASONING_EFFORT — "low" | "medium" | "high" | "none"
- *                             Passthrough for reasoning models (e.g. Ollama Cloud
- *                             thinking models). Set to "none" to ensure
- *                             message.content is populated instead of only
- *                             message.reasoning.
+ *   OPENAI_BASE_URL          — base URL without path (default: https://api.openai.com).
+ *                              Azure: https://<resource>.openai.azure.com/openai/deployments/<deployment>
+ *   OPENAI_MODEL             — model name (default: gpt-4o-mini)
+ *   OPENAI_API_VERSION       — Azure api-version query param (default: 2024-08-01-preview)
+ *   OPENAI_TIMEOUT_MS        — outbound fetch timeout in ms (default: 60000)
+ *   MAX_TOKENS               — max output tokens (default: from config or 4096)
+ *   OPENAI_REASONING_EFFORT  — "low" | "medium" | "high" | "none"
+ *                              Passthrough for reasoning models (e.g. Ollama Cloud
+ *                              thinking models). Set to "none" to ensure
+ *                              message.content is populated instead of only
+ *                              message.reasoning.
  */
 export class OpenAIProvider implements MemoryProvider {
   name = "openai";
@@ -35,13 +40,24 @@ export class OpenAIProvider implements MemoryProvider {
   private maxTokens: number;
   private baseUrl: string;
   private reasoningEffort?: string;
+  private timeoutMs: number;
+  private isAzure: boolean;
+  private azureApiVersion: string;
 
   constructor(apiKey: string, model: string, maxTokens: number, baseURL?: string) {
     this.apiKey = apiKey;
     this.model = model;
     this.maxTokens = maxTokens;
-    this.baseUrl = baseURL || getEnvVar("OPENAI_BASE_URL") || DEFAULT_BASE_URL;
+    this.baseUrl = (
+      baseURL ||
+      getEnvVar("OPENAI_BASE_URL") ||
+      DEFAULT_BASE_URL
+    ).replace(/\/+$/, "");
     this.reasoningEffort = getEnvVar("OPENAI_REASONING_EFFORT") || undefined;
+    this.timeoutMs = parseTimeout(getEnvVar("OPENAI_TIMEOUT_MS"));
+    this.azureApiVersion =
+      getEnvVar("OPENAI_API_VERSION") || DEFAULT_AZURE_API_VERSION;
+    this.isAzure = detectAzure(this.baseUrl);
   }
 
   async compress(systemPrompt: string, userPrompt: string): Promise<string> {
@@ -52,8 +68,33 @@ export class OpenAIProvider implements MemoryProvider {
     return this.call(systemPrompt, userPrompt);
   }
 
+  private buildUrl(): string {
+    // Azure OpenAI carries the deployment in the path and requires
+    // `api-version` as a query param. Standard OpenAI-compatible
+    // endpoints append /v1/chat/completions to the base.
+    if (this.isAzure) {
+      const sep = this.baseUrl.includes("?") ? "&" : "?";
+      return `${this.baseUrl}/chat/completions${sep}api-version=${encodeURIComponent(this.azureApiVersion)}`;
+    }
+    return `${this.baseUrl}/v1/chat/completions`;
+  }
+
+  private buildHeaders(): Record<string, string> {
+    // Azure uses `api-key: <KEY>`; everyone else uses `Authorization: Bearer <KEY>`.
+    if (this.isAzure) {
+      return {
+        "Content-Type": "application/json",
+        "api-key": this.apiKey,
+      };
+    }
+    return {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${this.apiKey}`,
+    };
+  }
+
   private async call(systemPrompt: string, userPrompt: string): Promise<string> {
-    const url = `${this.baseUrl}/v1/chat/completions`;
+    const url = this.buildUrl();
     const body: Record<string, unknown> = {
       model: this.model,
       max_tokens: this.maxTokens,
@@ -65,14 +106,35 @@ export class OpenAIProvider implements MemoryProvider {
     if (this.reasoningEffort) {
       body.reasoning_effort = this.reasoningEffort;
     }
-    const response = await fetch(url, {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-        Authorization: `Bearer ${this.apiKey}`,
-      },
-      body: JSON.stringify(body),
-    });
+
+    // Bound the request with an AbortController so a hung provider
+    // can't stall the worker. The other raw-fetch providers
+    // (anthropic, gemini, openrouter, minimax) have the same gap
+    // tracked in a follow-up issue; this PR fixes it for the new
+    // surface only.
+    const ac = new AbortController();
+    const t = setTimeout(() => ac.abort(), this.timeoutMs);
+    let response: Response;
+    try {
+      response = await fetch(url, {
+        method: "POST",
+        headers: this.buildHeaders(),
+        body: JSON.stringify(body),
+        signal: ac.signal,
+      });
+    } catch (err) {
+      const aborted =
+        ac.signal.aborted ||
+        (err instanceof Error && err.name === "AbortError");
+      if (aborted) {
+        throw new Error(
+          `OpenAI API request timed out after ${this.timeoutMs}ms — set OPENAI_TIMEOUT_MS to raise the bound or check the provider status.`,
+        );
+      }
+      throw err;
+    } finally {
+      clearTimeout(t);
+    }
 
     if (!response.ok) {
       const text = await response.text();
@@ -97,3 +159,21 @@ export class OpenAIProvider implements MemoryProvider {
     );
   }
 }
+
+function parseTimeout(raw: string | null | undefined): number {
+  if (!raw) return DEFAULT_TIMEOUT_MS;
+  const n = parseInt(raw, 10);
+  return Number.isFinite(n) && n > 0 ? n : DEFAULT_TIMEOUT_MS;
+}
+
+function detectAzure(baseUrl: string): boolean {
+  // Azure resource URLs land at <resource>.openai.azure.com. The
+  // `OPENAI_BASE_URL=https://<r>.openai.azure.com/openai/deployments/<d>`
+  // shape is the documented opt-in path.
+  try {
+    const u = new URL(baseUrl);
+    return u.hostname.endsWith(".openai.azure.com");
+  } catch {
+    return false;
+  }
+}