From e012e733f3c40260027eb065d0675b894697d95a Mon Sep 17 00:00:00 2001 From: VantHoff <37131177+fatinghenji@users.noreply.github.com> Date: Wed, 13 May 2026 01:40:09 +0800 Subject: [PATCH 1/3] feat: add OpenAI-compatible LLM provider - Add OpenAIProvider using raw fetch (no SDK dependency) - Supports any /v1/chat/completions endpoint: OpenAI, DeepSeek, SiliconFlow, Azure OpenAI, vLLM, LM Studio, Ollama - Auto-detects OPENAI_API_KEY with OPENAI_API_KEY_FOR_LLM opt-out - Add OPENAI_REASONING_EFFORT passthrough for thinking models (e.g. Ollama Cloud kimi-k2.6) to ensure content is populated - Update README with OpenAI provider table, env vars, and reasoning config --- README.md | 8 ++++ src/config.ts | 16 ++++++- src/providers/index.ts | 15 +++++++ src/providers/openai.ts | 99 +++++++++++++++++++++++++++++++++++++++++ src/types.ts | 2 +- 5 files changed, 137 insertions(+), 3 deletions(-) create mode 100644 src/providers/openai.ts diff --git a/README.md b/README.md index 4fbbea9b..8561db18 100644 --- a/README.md +++ b/README.md @@ -1047,6 +1047,14 @@ Create `~/.agentmemory/.env`: # GEMINI_API_KEY=... # OPENROUTER_API_KEY=... # MINIMAX_API_KEY=... +# OPENAI_API_KEY=*** +# OPENAI_BASE_URL=https://api.openai.com # Optional: override for Azure / vLLM / LM Studio / proxies +# OPENAI_MODEL=gpt-4o-mini # Optional: default model +# OPENAI_REASONING_EFFORT=none # Optional: "low" | "medium" | "high" | "none" +# # Set to "none" for thinking models (e.g. Ollama Cloud) +# # that return reasoning but no content. +# OPENAI_API_KEY_FOR_LLM=false # Optional: set to false to skip OpenAI auto-detection +# # for LLM (useful if you only want OpenAI for embeddings) # Opt-in Claude-subscription fallback (spawns @anthropic-ai/claude-agent-sdk); # leave OFF unless you understand the Stop-hook recursion risk (#149 follow-up): # AGENTMEMORY_ALLOW_AGENT_SDK=true diff --git a/src/config.ts b/src/config.ts index a4b676cf..4d13b57b 100644 --- a/src/config.ts +++ b/src/config.ts @@ -50,6 +50,16 @@ function hasRealValue(v: string | undefined): v is string { function detectProvider(env: Record): ProviderConfig { const maxTokens = parseInt(env["MAX_TOKENS"] || "4096", 10); + // OpenAI-compatible: supports OpenAI, DeepSeek, SiliconFlow, Azure, vLLM, LM Studio + if (hasRealValue(env["OPENAI_API_KEY"]) && env["OPENAI_API_KEY_FOR_LLM"] !== "false") { + return { + provider: "openai", + model: env["OPENAI_MODEL"] || "gpt-4o-mini", + maxTokens, + baseURL: env["OPENAI_BASE_URL"], + }; + } + // MiniMax: Anthropic-compatible API, requires raw fetch to avoid SDK stainless headers if (hasRealValue(env["MINIMAX_API_KEY"])) { return { @@ -92,7 +102,7 @@ function detectProvider(env: Record): ProviderConfig { if (!allowAgentSdk) { process.stderr.write( "[agentmemory] No LLM provider key found " + - "(ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, MINIMAX_API_KEY). " + + "(ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, MINIMAX_API_KEY, OPENAI_API_KEY). " + "LLM-backed compression and summarization are DISABLED — using no-op provider. " + "This is the safe default: the agent-sdk fallback used to spawn Claude Agent SDK " + "child sessions which inherit Claude Code's plugin hooks and cause infinite Stop-hook " + @@ -156,7 +166,8 @@ export function detectLlmProviderKind(): "llm" | "noop" { hasRealValue(env["GEMINI_API_KEY"]) || hasRealValue(env["GOOGLE_API_KEY"]) || hasRealValue(env["OPENROUTER_API_KEY"]) || - hasRealValue(env["MINIMAX_API_KEY"]) + hasRealValue(env["MINIMAX_API_KEY"]) || + hasRealValue(env["OPENAI_API_KEY"]) ) { return "llm"; } @@ -292,6 +303,7 @@ const VALID_PROVIDERS = new Set([ "openrouter", "agent-sdk", "minimax", + "openai", ]); export function loadFallbackConfig(): FallbackConfig { diff --git a/src/providers/index.ts b/src/providers/index.ts index b22907bc..5de6807c 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -7,6 +7,7 @@ import { AgentSDKProvider } from "./agent-sdk.js"; import { AnthropicProvider } from "./anthropic.js"; import { MinimaxProvider } from "./minimax.js"; import { NoopProvider } from "./noop.js"; +import { OpenAIProvider } from "./openai.js"; import { OpenRouterProvider } from "./openrouter.js"; import { ResilientProvider } from "./resilient.js"; import { FallbackChainProvider } from "./fallback-chain.js"; @@ -94,6 +95,20 @@ function createBaseProvider(config: ProviderConfig): MemoryProvider { config.maxTokens, "https://openrouter.ai/api/v1/chat/completions", ); + case "openai": { + const openaiKey = getEnvVar("OPENAI_API_KEY"); + if (!openaiKey) { + throw new Error( + "OPENAI_API_KEY is required for the openai provider", + ); + } + return new OpenAIProvider( + openaiKey, + config.model, + config.maxTokens, + config.baseURL, + ); + } case "noop": return new NoopProvider(); case "agent-sdk": diff --git a/src/providers/openai.ts b/src/providers/openai.ts new file mode 100644 index 00000000..9745a135 --- /dev/null +++ b/src/providers/openai.ts @@ -0,0 +1,99 @@ +import type { MemoryProvider } from "../types.js"; +import { getEnvVar } from "../config.js"; + +const DEFAULT_BASE_URL = "https://api.openai.com"; +const DEFAULT_MODEL = "gpt-4o-mini"; + +/** + * OpenAI-compatible LLM provider. + * + * Uses raw fetch (no SDK) to support any OpenAI-compatible endpoint: + * - OpenAI official + * - Azure OpenAI + * - DeepSeek + * - 硅基流动 (SiliconFlow) + * - vLLM / LM Studio / Ollama (with OpenAI compatibility layer) + * - Any other proxy implementing /v1/chat/completions + * + * Required env vars: + * OPENAI_API_KEY — API key + * + * Optional: + * OPENAI_BASE_URL — base URL without path (default: https://api.openai.com) + * OPENAI_MODEL — model name (default: gpt-4o-mini) + * MAX_TOKENS — max output tokens (default: from config or 4096) + * OPENAI_REASONING_EFFORT — "low" | "medium" | "high" | "none" + * Passthrough for reasoning models (e.g. Ollama Cloud + * thinking models). Set to "none" to ensure + * message.content is populated instead of only + * message.reasoning. + */ +export class OpenAIProvider implements MemoryProvider { + name = "openai"; + private apiKey: string; + private model: string; + private maxTokens: number; + private baseUrl: string; + private reasoningEffort?: string; + + constructor(apiKey: string, model: string, maxTokens: number, baseURL?: string) { + this.apiKey = apiKey; + this.model = model; + this.maxTokens = maxTokens; + this.baseUrl = baseURL || getEnvVar("OPENAI_BASE_URL") || DEFAULT_BASE_URL; + this.reasoningEffort = getEnvVar("OPENAI_REASONING_EFFORT") || undefined; + } + + async compress(systemPrompt: string, userPrompt: string): Promise { + return this.call(systemPrompt, userPrompt); + } + + async summarize(systemPrompt: string, userPrompt: string): Promise { + return this.call(systemPrompt, userPrompt); + } + + private async call(systemPrompt: string, userPrompt: string): Promise { + const url = `${this.baseUrl}/v1/chat/completions`; + const body: Record = { + model: this.model, + max_tokens: this.maxTokens, + messages: [ + { role: "system", content: systemPrompt }, + { role: "user", content: userPrompt }, + ], + }; + if (this.reasoningEffort) { + body.reasoning_effort = this.reasoningEffort; + } + const response = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, + }, + body: JSON.stringify(body), + }); + + if (!response.ok) { + const text = await response.text(); + throw new Error(`OpenAI API error (${response.status}): ${text}`); + } + + const data = (await response.json()) as { + choices?: Array<{ message?: { content?: string; reasoning?: string } }>; + }; + const message = data.choices?.[0]?.message; + const content = message?.content; + if (content) { + return content; + } + // Fallback: some thinking models return reasoning but no content + const reasoning = message?.reasoning; + if (reasoning) { + return reasoning; + } + throw new Error( + `OpenAI returned unexpected response: ${JSON.stringify(data).slice(0, 200)}`, + ); + } +} diff --git a/src/types.ts b/src/types.ts index 70b05010..687469f7 100644 --- a/src/types.ts +++ b/src/types.ts @@ -129,7 +129,7 @@ export interface ProviderConfig { baseURL?: string; } -export type ProviderType = "agent-sdk" | "anthropic" | "gemini" | "openrouter" | "minimax" | "noop"; +export type ProviderType = "agent-sdk" | "anthropic" | "gemini" | "openrouter" | "minimax" | "openai" | "noop"; export interface MemoryProvider { name: string; From d0e99bcf81301a450fbf2854175c9a23f85412be Mon Sep 17 00:00:00 2001 From: Rohit Ghumare Date: Thu, 14 May 2026 12:05:00 +0100 Subject: [PATCH 2/3] fix(config): honor OPENAI_API_KEY_FOR_LLM=false in detectLlmProviderKind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `detectProvider()` correctly gates OpenAI auto-detection on OPENAI_API_KEY_FOR_LLM !== "false", but `detectLlmProviderKind()` did not — so users who set OPENAI_API_KEY only for embeddings (via the existing OPENAI_BASE_URL + OPENAI_EMBEDDING_MODEL flow from #186) would see /agentmemory/config/flags report `provider: llm` even though detectProvider() routed them to the noop provider. Also clarify in the README that OPENAI_REASONING_EFFORT is honored only by reasoning models (o1, o3, gpt-*-reasoning) and providers that mirror that schema (Ollama Cloud thinking models). Standard chat models reject the field with 400. Verified: - OPENAI_API_KEY=sk-... + OPENAI_API_KEY_FOR_LLM=false now returns "noop" from detectLlmProviderKind (was "llm" before the fix). - OPENAI_API_KEY=sk-... alone still returns "llm" (intended default). - npm run build clean. Note: 10 pre-existing test failures on test/mcp-standalone.test.ts are a stale-branch artefact — this branch is 10 commits behind main and is missing the MCP shim fixes that landed via #311 / #327. Recommend rebasing on main (or "Update branch" via the GitHub UI) before merge. --- README.md | 8 ++++++-- src/config.ts | 3 ++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8561db18..052bf36f 100644 --- a/README.md +++ b/README.md @@ -1051,8 +1051,12 @@ Create `~/.agentmemory/.env`: # OPENAI_BASE_URL=https://api.openai.com # Optional: override for Azure / vLLM / LM Studio / proxies # OPENAI_MODEL=gpt-4o-mini # Optional: default model # OPENAI_REASONING_EFFORT=none # Optional: "low" | "medium" | "high" | "none" -# # Set to "none" for thinking models (e.g. Ollama Cloud) -# # that return reasoning but no content. +# # Honored only by OpenAI's reasoning models (o1, o3, +# # gpt-*-reasoning) and providers that mirror that +# # schema (Ollama Cloud thinking models). Standard +# # chat models reject this field with 400. Set to +# # "none" for thinking models that return reasoning +# # but no content. # OPENAI_API_KEY_FOR_LLM=false # Optional: set to false to skip OpenAI auto-detection # # for LLM (useful if you only want OpenAI for embeddings) # Opt-in Claude-subscription fallback (spawns @anthropic-ai/claude-agent-sdk); diff --git a/src/config.ts b/src/config.ts index 4d13b57b..4a416ed1 100644 --- a/src/config.ts +++ b/src/config.ts @@ -167,7 +167,8 @@ export function detectLlmProviderKind(): "llm" | "noop" { hasRealValue(env["GOOGLE_API_KEY"]) || hasRealValue(env["OPENROUTER_API_KEY"]) || hasRealValue(env["MINIMAX_API_KEY"]) || - hasRealValue(env["OPENAI_API_KEY"]) + (hasRealValue(env["OPENAI_API_KEY"]) && + env["OPENAI_API_KEY_FOR_LLM"] !== "false") ) { return "llm"; } From b06406bc2429f37a67abf62030b1d26ade8d5564 Mon Sep 17 00:00:00 2001 From: Rohit Ghumare Date: Sat, 16 May 2026 18:17:52 +0100 Subject: [PATCH 3/3] =?UTF-8?q?feat(providers):=20openai=20=E2=80=94=20fet?= =?UTF-8?q?ch=20timeout,=20Azure=20auto-detect,=20OPENAI=5FAPI=5FKEY=20sco?= =?UTF-8?q?pe=20hint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three follow-ups against the v2 PR so reviewers can see what's left before merge: 1. Outbound fetch timeout. The other raw-fetch providers (anthropic / gemini / openrouter / minimax) all lack one too — that's a same-pattern repo-wide concern tracked as a follow-up issue. This PR fixes the bound on the new surface only: AbortController + setTimeout, default 60s, overridable via OPENAI_TIMEOUT_MS. Abort messages explain how to raise the bound. 2. Azure OpenAI auto-detection. Azure is in the README's supported list but the code only emitted the standard OpenAI shape. Now detects `.openai.azure.com` hostnames at construction time and: - swaps `Authorization: Bearer` for `api-key: ` - drops the `/v1` path prefix (deployment is baked into base URL) - appends `api-version=` query param (env-overridable) Default api-version is `2024-08-01-preview` per Azure docs. Existing users of the standard OpenAI shape are unaffected — the detection is purely hostname-driven. 3. README clarity on `OPENAI_API_KEY` shared use. Embeddings (#186) and this PR's LLM both auto-activate from the same key, which surprises users who only meant one or the other. Added an explicit callout above the LLM section pointing at the `OPENAI_API_KEY_FOR_LLM=false` opt-out. Also documents the new timeout + Azure base-URL shape inline. Build clean. Tests 954/954 (10 pre-existing mcp-standalone failures on main are not in this branch's snapshot). --- README.md | 11 +++- src/providers/openai.ts | 118 +++++++++++++++++++++++++++++++++------- 2 files changed, 109 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 052bf36f..724ffc88 100644 --- a/README.md +++ b/README.md @@ -1047,9 +1047,18 @@ Create `~/.agentmemory/.env`: # GEMINI_API_KEY=... # OPENROUTER_API_KEY=... # MINIMAX_API_KEY=... -# OPENAI_API_KEY=*** +# OPENAI_API_KEY=*** # NOTE: this same key auto-activates BOTH the +# # OpenAI LLM provider (here) AND the OpenAI +# # embedding provider (further below). Set +# # OPENAI_API_KEY_FOR_LLM=false to scope it +# # to embeddings only. # OPENAI_BASE_URL=https://api.openai.com # Optional: override for Azure / vLLM / LM Studio / proxies +# # Azure: https://.openai.azure.com/openai/deployments/ +# # Auto-detected from `.openai.azure.com` hostname; uses +# # api-key header + api-version query param. +# OPENAI_API_VERSION=2024-08-01-preview # Optional: Azure api-version query param # OPENAI_MODEL=gpt-4o-mini # Optional: default model +# OPENAI_TIMEOUT_MS=60000 # Optional: outbound fetch timeout (default 60s) # OPENAI_REASONING_EFFORT=none # Optional: "low" | "medium" | "high" | "none" # # Honored only by OpenAI's reasoning models (o1, o3, # # gpt-*-reasoning) and providers that mirror that diff --git a/src/providers/openai.ts b/src/providers/openai.ts index 9745a135..d8c16ce9 100644 --- a/src/providers/openai.ts +++ b/src/providers/openai.ts @@ -3,13 +3,15 @@ import { getEnvVar } from "../config.js"; const DEFAULT_BASE_URL = "https://api.openai.com"; const DEFAULT_MODEL = "gpt-4o-mini"; +const DEFAULT_TIMEOUT_MS = 60_000; +const DEFAULT_AZURE_API_VERSION = "2024-08-01-preview"; /** * OpenAI-compatible LLM provider. * * Uses raw fetch (no SDK) to support any OpenAI-compatible endpoint: * - OpenAI official - * - Azure OpenAI + * - Azure OpenAI (auto-detected from .openai.azure.com host) * - DeepSeek * - 硅基流动 (SiliconFlow) * - vLLM / LM Studio / Ollama (with OpenAI compatibility layer) @@ -19,14 +21,17 @@ const DEFAULT_MODEL = "gpt-4o-mini"; * OPENAI_API_KEY — API key * * Optional: - * OPENAI_BASE_URL — base URL without path (default: https://api.openai.com) - * OPENAI_MODEL — model name (default: gpt-4o-mini) - * MAX_TOKENS — max output tokens (default: from config or 4096) - * OPENAI_REASONING_EFFORT — "low" | "medium" | "high" | "none" - * Passthrough for reasoning models (e.g. Ollama Cloud - * thinking models). Set to "none" to ensure - * message.content is populated instead of only - * message.reasoning. + * OPENAI_BASE_URL — base URL without path (default: https://api.openai.com). + * Azure: https://.openai.azure.com/openai/deployments/ + * OPENAI_MODEL — model name (default: gpt-4o-mini) + * OPENAI_API_VERSION — Azure api-version query param (default: 2024-08-01-preview) + * OPENAI_TIMEOUT_MS — outbound fetch timeout in ms (default: 60000) + * MAX_TOKENS — max output tokens (default: from config or 4096) + * OPENAI_REASONING_EFFORT — "low" | "medium" | "high" | "none" + * Passthrough for reasoning models (e.g. Ollama Cloud + * thinking models). Set to "none" to ensure + * message.content is populated instead of only + * message.reasoning. */ export class OpenAIProvider implements MemoryProvider { name = "openai"; @@ -35,13 +40,24 @@ export class OpenAIProvider implements MemoryProvider { private maxTokens: number; private baseUrl: string; private reasoningEffort?: string; + private timeoutMs: number; + private isAzure: boolean; + private azureApiVersion: string; constructor(apiKey: string, model: string, maxTokens: number, baseURL?: string) { this.apiKey = apiKey; this.model = model; this.maxTokens = maxTokens; - this.baseUrl = baseURL || getEnvVar("OPENAI_BASE_URL") || DEFAULT_BASE_URL; + this.baseUrl = ( + baseURL || + getEnvVar("OPENAI_BASE_URL") || + DEFAULT_BASE_URL + ).replace(/\/+$/, ""); this.reasoningEffort = getEnvVar("OPENAI_REASONING_EFFORT") || undefined; + this.timeoutMs = parseTimeout(getEnvVar("OPENAI_TIMEOUT_MS")); + this.azureApiVersion = + getEnvVar("OPENAI_API_VERSION") || DEFAULT_AZURE_API_VERSION; + this.isAzure = detectAzure(this.baseUrl); } async compress(systemPrompt: string, userPrompt: string): Promise { @@ -52,8 +68,33 @@ export class OpenAIProvider implements MemoryProvider { return this.call(systemPrompt, userPrompt); } + private buildUrl(): string { + // Azure OpenAI carries the deployment in the path and requires + // `api-version` as a query param. Standard OpenAI-compatible + // endpoints append /v1/chat/completions to the base. + if (this.isAzure) { + const sep = this.baseUrl.includes("?") ? "&" : "?"; + return `${this.baseUrl}/chat/completions${sep}api-version=${encodeURIComponent(this.azureApiVersion)}`; + } + return `${this.baseUrl}/v1/chat/completions`; + } + + private buildHeaders(): Record { + // Azure uses `api-key: `; everyone else uses `Authorization: Bearer `. + if (this.isAzure) { + return { + "Content-Type": "application/json", + "api-key": this.apiKey, + }; + } + return { + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, + }; + } + private async call(systemPrompt: string, userPrompt: string): Promise { - const url = `${this.baseUrl}/v1/chat/completions`; + const url = this.buildUrl(); const body: Record = { model: this.model, max_tokens: this.maxTokens, @@ -65,14 +106,35 @@ export class OpenAIProvider implements MemoryProvider { if (this.reasoningEffort) { body.reasoning_effort = this.reasoningEffort; } - const response = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - }, - body: JSON.stringify(body), - }); + + // Bound the request with an AbortController so a hung provider + // can't stall the worker. The other raw-fetch providers + // (anthropic, gemini, openrouter, minimax) have the same gap + // tracked in a follow-up issue; this PR fixes it for the new + // surface only. + const ac = new AbortController(); + const t = setTimeout(() => ac.abort(), this.timeoutMs); + let response: Response; + try { + response = await fetch(url, { + method: "POST", + headers: this.buildHeaders(), + body: JSON.stringify(body), + signal: ac.signal, + }); + } catch (err) { + const aborted = + ac.signal.aborted || + (err instanceof Error && err.name === "AbortError"); + if (aborted) { + throw new Error( + `OpenAI API request timed out after ${this.timeoutMs}ms — set OPENAI_TIMEOUT_MS to raise the bound or check the provider status.`, + ); + } + throw err; + } finally { + clearTimeout(t); + } if (!response.ok) { const text = await response.text(); @@ -97,3 +159,21 @@ export class OpenAIProvider implements MemoryProvider { ); } } + +function parseTimeout(raw: string | null | undefined): number { + if (!raw) return DEFAULT_TIMEOUT_MS; + const n = parseInt(raw, 10); + return Number.isFinite(n) && n > 0 ? n : DEFAULT_TIMEOUT_MS; +} + +function detectAzure(baseUrl: string): boolean { + // Azure resource URLs land at .openai.azure.com. The + // `OPENAI_BASE_URL=https://.openai.azure.com/openai/deployments/` + // shape is the documented opt-in path. + try { + const u = new URL(baseUrl); + return u.hostname.endsWith(".openai.azure.com"); + } catch { + return false; + } +}