diff --git a/README.md b/README.md index 4fbbea9b..724ffc88 100644 --- a/README.md +++ b/README.md @@ -1047,6 +1047,27 @@ Create `~/.agentmemory/.env`: # GEMINI_API_KEY=... # OPENROUTER_API_KEY=... # MINIMAX_API_KEY=... +# OPENAI_API_KEY=*** # NOTE: this same key auto-activates BOTH the +# # OpenAI LLM provider (here) AND the OpenAI +# # embedding provider (further below). Set +# # OPENAI_API_KEY_FOR_LLM=false to scope it +# # to embeddings only. +# OPENAI_BASE_URL=https://api.openai.com # Optional: override for Azure / vLLM / LM Studio / proxies +# # Azure: https://.openai.azure.com/openai/deployments/ +# # Auto-detected from `.openai.azure.com` hostname; uses +# # api-key header + api-version query param. +# OPENAI_API_VERSION=2024-08-01-preview # Optional: Azure api-version query param +# OPENAI_MODEL=gpt-4o-mini # Optional: default model +# OPENAI_TIMEOUT_MS=60000 # Optional: outbound fetch timeout (default 60s) +# OPENAI_REASONING_EFFORT=none # Optional: "low" | "medium" | "high" | "none" +# # Honored only by OpenAI's reasoning models (o1, o3, +# # gpt-*-reasoning) and providers that mirror that +# # schema (Ollama Cloud thinking models). Standard +# # chat models reject this field with 400. Set to +# # "none" for thinking models that return reasoning +# # but no content. +# OPENAI_API_KEY_FOR_LLM=false # Optional: set to false to skip OpenAI auto-detection +# # for LLM (useful if you only want OpenAI for embeddings) # Opt-in Claude-subscription fallback (spawns @anthropic-ai/claude-agent-sdk); # leave OFF unless you understand the Stop-hook recursion risk (#149 follow-up): # AGENTMEMORY_ALLOW_AGENT_SDK=true diff --git a/src/config.ts b/src/config.ts index a4b676cf..4a416ed1 100644 --- a/src/config.ts +++ b/src/config.ts @@ -50,6 +50,16 @@ function hasRealValue(v: string | undefined): v is string { function detectProvider(env: Record): ProviderConfig { const maxTokens = parseInt(env["MAX_TOKENS"] || "4096", 10); + // OpenAI-compatible: supports OpenAI, DeepSeek, SiliconFlow, Azure, vLLM, LM Studio + if (hasRealValue(env["OPENAI_API_KEY"]) && env["OPENAI_API_KEY_FOR_LLM"] !== "false") { + return { + provider: "openai", + model: env["OPENAI_MODEL"] || "gpt-4o-mini", + maxTokens, + baseURL: env["OPENAI_BASE_URL"], + }; + } + // MiniMax: Anthropic-compatible API, requires raw fetch to avoid SDK stainless headers if (hasRealValue(env["MINIMAX_API_KEY"])) { return { @@ -92,7 +102,7 @@ function detectProvider(env: Record): ProviderConfig { if (!allowAgentSdk) { process.stderr.write( "[agentmemory] No LLM provider key found " + - "(ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, MINIMAX_API_KEY). " + + "(ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENROUTER_API_KEY, MINIMAX_API_KEY, OPENAI_API_KEY). " + "LLM-backed compression and summarization are DISABLED — using no-op provider. " + "This is the safe default: the agent-sdk fallback used to spawn Claude Agent SDK " + "child sessions which inherit Claude Code's plugin hooks and cause infinite Stop-hook " + @@ -156,7 +166,9 @@ export function detectLlmProviderKind(): "llm" | "noop" { hasRealValue(env["GEMINI_API_KEY"]) || hasRealValue(env["GOOGLE_API_KEY"]) || hasRealValue(env["OPENROUTER_API_KEY"]) || - hasRealValue(env["MINIMAX_API_KEY"]) + hasRealValue(env["MINIMAX_API_KEY"]) || + (hasRealValue(env["OPENAI_API_KEY"]) && + env["OPENAI_API_KEY_FOR_LLM"] !== "false") ) { return "llm"; } @@ -292,6 +304,7 @@ const VALID_PROVIDERS = new Set([ "openrouter", "agent-sdk", "minimax", + "openai", ]); export function loadFallbackConfig(): FallbackConfig { diff --git a/src/providers/index.ts b/src/providers/index.ts index b22907bc..5de6807c 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -7,6 +7,7 @@ import { AgentSDKProvider } from "./agent-sdk.js"; import { AnthropicProvider } from "./anthropic.js"; import { MinimaxProvider } from "./minimax.js"; import { NoopProvider } from "./noop.js"; +import { OpenAIProvider } from "./openai.js"; import { OpenRouterProvider } from "./openrouter.js"; import { ResilientProvider } from "./resilient.js"; import { FallbackChainProvider } from "./fallback-chain.js"; @@ -94,6 +95,20 @@ function createBaseProvider(config: ProviderConfig): MemoryProvider { config.maxTokens, "https://openrouter.ai/api/v1/chat/completions", ); + case "openai": { + const openaiKey = getEnvVar("OPENAI_API_KEY"); + if (!openaiKey) { + throw new Error( + "OPENAI_API_KEY is required for the openai provider", + ); + } + return new OpenAIProvider( + openaiKey, + config.model, + config.maxTokens, + config.baseURL, + ); + } case "noop": return new NoopProvider(); case "agent-sdk": diff --git a/src/providers/openai.ts b/src/providers/openai.ts new file mode 100644 index 00000000..d8c16ce9 --- /dev/null +++ b/src/providers/openai.ts @@ -0,0 +1,179 @@ +import type { MemoryProvider } from "../types.js"; +import { getEnvVar } from "../config.js"; + +const DEFAULT_BASE_URL = "https://api.openai.com"; +const DEFAULT_MODEL = "gpt-4o-mini"; +const DEFAULT_TIMEOUT_MS = 60_000; +const DEFAULT_AZURE_API_VERSION = "2024-08-01-preview"; + +/** + * OpenAI-compatible LLM provider. + * + * Uses raw fetch (no SDK) to support any OpenAI-compatible endpoint: + * - OpenAI official + * - Azure OpenAI (auto-detected from .openai.azure.com host) + * - DeepSeek + * - 硅基流动 (SiliconFlow) + * - vLLM / LM Studio / Ollama (with OpenAI compatibility layer) + * - Any other proxy implementing /v1/chat/completions + * + * Required env vars: + * OPENAI_API_KEY — API key + * + * Optional: + * OPENAI_BASE_URL — base URL without path (default: https://api.openai.com). + * Azure: https://.openai.azure.com/openai/deployments/ + * OPENAI_MODEL — model name (default: gpt-4o-mini) + * OPENAI_API_VERSION — Azure api-version query param (default: 2024-08-01-preview) + * OPENAI_TIMEOUT_MS — outbound fetch timeout in ms (default: 60000) + * MAX_TOKENS — max output tokens (default: from config or 4096) + * OPENAI_REASONING_EFFORT — "low" | "medium" | "high" | "none" + * Passthrough for reasoning models (e.g. Ollama Cloud + * thinking models). Set to "none" to ensure + * message.content is populated instead of only + * message.reasoning. + */ +export class OpenAIProvider implements MemoryProvider { + name = "openai"; + private apiKey: string; + private model: string; + private maxTokens: number; + private baseUrl: string; + private reasoningEffort?: string; + private timeoutMs: number; + private isAzure: boolean; + private azureApiVersion: string; + + constructor(apiKey: string, model: string, maxTokens: number, baseURL?: string) { + this.apiKey = apiKey; + this.model = model; + this.maxTokens = maxTokens; + this.baseUrl = ( + baseURL || + getEnvVar("OPENAI_BASE_URL") || + DEFAULT_BASE_URL + ).replace(/\/+$/, ""); + this.reasoningEffort = getEnvVar("OPENAI_REASONING_EFFORT") || undefined; + this.timeoutMs = parseTimeout(getEnvVar("OPENAI_TIMEOUT_MS")); + this.azureApiVersion = + getEnvVar("OPENAI_API_VERSION") || DEFAULT_AZURE_API_VERSION; + this.isAzure = detectAzure(this.baseUrl); + } + + async compress(systemPrompt: string, userPrompt: string): Promise { + return this.call(systemPrompt, userPrompt); + } + + async summarize(systemPrompt: string, userPrompt: string): Promise { + return this.call(systemPrompt, userPrompt); + } + + private buildUrl(): string { + // Azure OpenAI carries the deployment in the path and requires + // `api-version` as a query param. Standard OpenAI-compatible + // endpoints append /v1/chat/completions to the base. + if (this.isAzure) { + const sep = this.baseUrl.includes("?") ? "&" : "?"; + return `${this.baseUrl}/chat/completions${sep}api-version=${encodeURIComponent(this.azureApiVersion)}`; + } + return `${this.baseUrl}/v1/chat/completions`; + } + + private buildHeaders(): Record { + // Azure uses `api-key: `; everyone else uses `Authorization: Bearer `. + if (this.isAzure) { + return { + "Content-Type": "application/json", + "api-key": this.apiKey, + }; + } + return { + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, + }; + } + + private async call(systemPrompt: string, userPrompt: string): Promise { + const url = this.buildUrl(); + const body: Record = { + model: this.model, + max_tokens: this.maxTokens, + messages: [ + { role: "system", content: systemPrompt }, + { role: "user", content: userPrompt }, + ], + }; + if (this.reasoningEffort) { + body.reasoning_effort = this.reasoningEffort; + } + + // Bound the request with an AbortController so a hung provider + // can't stall the worker. The other raw-fetch providers + // (anthropic, gemini, openrouter, minimax) have the same gap + // tracked in a follow-up issue; this PR fixes it for the new + // surface only. + const ac = new AbortController(); + const t = setTimeout(() => ac.abort(), this.timeoutMs); + let response: Response; + try { + response = await fetch(url, { + method: "POST", + headers: this.buildHeaders(), + body: JSON.stringify(body), + signal: ac.signal, + }); + } catch (err) { + const aborted = + ac.signal.aborted || + (err instanceof Error && err.name === "AbortError"); + if (aborted) { + throw new Error( + `OpenAI API request timed out after ${this.timeoutMs}ms — set OPENAI_TIMEOUT_MS to raise the bound or check the provider status.`, + ); + } + throw err; + } finally { + clearTimeout(t); + } + + if (!response.ok) { + const text = await response.text(); + throw new Error(`OpenAI API error (${response.status}): ${text}`); + } + + const data = (await response.json()) as { + choices?: Array<{ message?: { content?: string; reasoning?: string } }>; + }; + const message = data.choices?.[0]?.message; + const content = message?.content; + if (content) { + return content; + } + // Fallback: some thinking models return reasoning but no content + const reasoning = message?.reasoning; + if (reasoning) { + return reasoning; + } + throw new Error( + `OpenAI returned unexpected response: ${JSON.stringify(data).slice(0, 200)}`, + ); + } +} + +function parseTimeout(raw: string | null | undefined): number { + if (!raw) return DEFAULT_TIMEOUT_MS; + const n = parseInt(raw, 10); + return Number.isFinite(n) && n > 0 ? n : DEFAULT_TIMEOUT_MS; +} + +function detectAzure(baseUrl: string): boolean { + // Azure resource URLs land at .openai.azure.com. The + // `OPENAI_BASE_URL=https://.openai.azure.com/openai/deployments/` + // shape is the documented opt-in path. + try { + const u = new URL(baseUrl); + return u.hostname.endsWith(".openai.azure.com"); + } catch { + return false; + } +} diff --git a/src/types.ts b/src/types.ts index 70b05010..687469f7 100644 --- a/src/types.ts +++ b/src/types.ts @@ -129,7 +129,7 @@ export interface ProviderConfig { baseURL?: string; } -export type ProviderType = "agent-sdk" | "anthropic" | "gemini" | "openrouter" | "minimax" | "noop"; +export type ProviderType = "agent-sdk" | "anthropic" | "gemini" | "openrouter" | "minimax" | "openai" | "noop"; export interface MemoryProvider { name: string;