diff --git a/README.md b/README.md index 37cd7258..36e6d618 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,23 @@ Open `http://localhost:3000`. - At least one supported model provider key, depending on which models you enable - LibreOffice for DOC/DOCX to PDF conversion +## LLM Configuration Options + +The backend supports multiple LLM providers via environment variables in `backend/.env`: + +### Provider Keys +- `GEMINI_API_KEY` - Google Gemini models +- `ANTHROPIC_API_KEY` - Anthropic Claude models +- `OPENROUTER_API_KEY` - OpenRouter (aggregates multiple providers) +- `RESEND_API_KEY` - Resend (for email functionality) + +### Local LLM (vLLM) Configuration +For self-hosted vLLM endpoints: +- `VLLM_BASE_URL` - Base URL for your vLLM server (e.g., `https://your-vllm-endpoint.com/v1`) +- `VLLM_API_KEY` - API key for vLLM authentication +- `VLLM_MAIN_MODEL` - Primary model name for vLLM (e.g., `BredaAI`) +- `VLLM_LIGHT_MODEL` - Lightweight model for faster responses (e.g., `your-light-model-name`) + ## Checks ```bash diff --git a/backend/.env.example b/backend/.env.example index 1db370a9..0a4a3788 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -12,3 +12,9 @@ GEMINI_API_KEY=your-gemini-key ANTHROPIC_API_KEY=your-anthropic-key OPENROUTER_API_KEY=your-openrouter-key RESEND_API_KEY=your-resend-key + +# vLLM Configuration (OpenAI-compatible endpoint) +VLLM_BASE_URL=https://your-vllm-endpoint.com/v1 +VLLM_API_KEY=your-vllm-api-key +VLLM_MAIN_MODEL=BredaAI +VLLM_LIGHT_MODEL=your-light-model-name diff --git a/backend/package-lock.json b/backend/package-lock.json index 86f82382..f31e9ea1 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -24,6 +24,7 @@ "libreoffice-convert": "^1.6.0", "mammoth": "^1.9.0", "multer": "^1.4.5-lts.2", + "openai": "^4.87.3", "pdfjs-dist": "^4.10.38", "resend": "^4.5.1" }, @@ -2688,6 +2689,16 @@ "undici-types": "~6.21.0" } }, + "node_modules/@types/node-fetch": { + "version": "2.6.13", + "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz", + "integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==", + "license": "MIT", + "dependencies": { + "@types/node": "*", + "form-data": "^4.0.4" + } + }, "node_modules/@types/qs": { "version": "6.15.0", "resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.15.0.tgz", @@ -2759,6 +2770,18 @@ "node": ">=10.0.0" } }, + "node_modules/abort-controller": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", + "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", + "license": "MIT", + "dependencies": { + "event-target-shim": "^5.0.0" + }, + "engines": { + "node": ">=6.5" + } + }, "node_modules/accepts": { "version": "1.3.8", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", @@ -2781,6 +2804,18 @@ "node": ">= 14" } }, + "node_modules/agentkeepalive": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz", + "integrity": "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==", + "license": "MIT", + "dependencies": { + "humanize-ms": "^1.2.1" + }, + "engines": { + "node": ">= 8.0.0" + } + }, "node_modules/append-field": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/append-field/-/append-field-1.0.0.tgz", @@ -2808,6 +2843,12 @@ "integrity": "sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==", "license": "MIT" }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, "node_modules/base64-js": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", @@ -2934,6 +2975,18 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/concat-stream": { "version": "1.6.2", "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz", @@ -3035,6 +3088,15 @@ "node": ">=0.10.0" } }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/depd": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", @@ -3248,6 +3310,21 @@ "node": ">= 0.4" } }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, "node_modules/esbuild": { "version": "0.27.7", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.7.tgz", @@ -3305,6 +3382,15 @@ "node": ">= 0.6" } }, + "node_modules/event-target-shim": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", + "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/express": { "version": "4.22.1", "resolved": "https://registry.npmjs.org/express/-/express-4.22.1.tgz", @@ -3446,6 +3532,50 @@ "node": ">= 0.8" } }, + "node_modules/form-data": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", + "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/form-data-encoder": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", + "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==", + "license": "MIT" + }, + "node_modules/formdata-node": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", + "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", + "license": "MIT", + "dependencies": { + "node-domexception": "1.0.0", + "web-streams-polyfill": "4.0.0-beta.3" + }, + "engines": { + "node": ">= 12.20" + } + }, + "node_modules/formdata-node/node_modules/web-streams-polyfill": { + "version": "4.0.0-beta.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", + "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, "node_modules/formdata-polyfill": { "version": "4.0.10", "resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz", @@ -3628,6 +3758,21 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/hash.js": { "version": "1.1.7", "resolved": "https://registry.npmjs.org/hash.js/-/hash.js-1.1.7.tgz", @@ -3741,6 +3886,15 @@ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "license": "MIT" }, + "node_modules/humanize-ms": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", + "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", + "license": "MIT", + "dependencies": { + "ms": "^2.0.0" + } + }, "node_modules/iceberg-js": { "version": "0.8.1", "resolved": "https://registry.npmjs.org/iceberg-js/-/iceberg-js-0.8.1.tgz", @@ -4135,6 +4289,71 @@ "node": ">= 0.8" } }, + "node_modules/openai": { + "version": "4.104.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-4.104.0.tgz", + "integrity": "sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==", + "license": "Apache-2.0", + "dependencies": { + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7" + }, + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.23.8" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + }, + "node_modules/openai/node_modules/@types/node": { + "version": "18.19.130", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz", + "integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==", + "license": "MIT", + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/openai/node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/openai/node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "license": "MIT" + }, "node_modules/option": { "version": "0.2.4", "resolved": "https://registry.npmjs.org/option/-/option-0.2.4.tgz", @@ -4665,6 +4884,12 @@ "node": ">=0.6" } }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT" + }, "node_modules/ts-algebra": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz", @@ -4784,6 +5009,22 @@ "node": ">= 8" } }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, "node_modules/ws": { "version": "8.20.0", "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz", diff --git a/backend/package.json b/backend/package.json index 50dfb585..86955768 100644 --- a/backend/package.json +++ b/backend/package.json @@ -24,6 +24,7 @@ "libreoffice-convert": "^1.6.0", "mammoth": "^1.9.0", "multer": "^1.4.5-lts.2", + "openai": "^4.87.3", "pdfjs-dist": "^4.10.38", "resend": "^4.5.1" }, diff --git a/backend/src/lib/chatTools.ts b/backend/src/lib/chatTools.ts index c3ab2439..a8bf1393 100644 --- a/backend/src/lib/chatTools.ts +++ b/backend/src/lib/chatTools.ts @@ -2121,7 +2121,8 @@ export async function runToolCalls( } } else if (tc.function.name === "generate_docx") { - const title = args.title as string; + // Fallback to "document" if title is missing - model sometimes omits required fields + const title = (args.title as string) || "document"; const landscape = !!(args.landscape); console.log(`[generate_docx] title="${title}" landscape=${landscape} args.landscape=${args.landscape}`); const previewFilename = `${(title.replace(/[^a-zA-Z0-9 _-]/g, "").trim().slice(0, 64) || "document")}.docx`; diff --git a/backend/src/lib/llm/index.ts b/backend/src/lib/llm/index.ts index 518ddc01..4b5e9793 100644 --- a/backend/src/lib/llm/index.ts +++ b/backend/src/lib/llm/index.ts @@ -1,5 +1,6 @@ import { streamClaude, completeClaudeText } from "./claude"; import { streamGemini, completeGeminiText } from "./gemini"; +import { streamOpenAI, completeOpenAIText } from "./openai"; import { providerForModel } from "./models"; import type { StreamChatParams, StreamChatResult, UserApiKeys } from "./types"; @@ -11,6 +12,7 @@ export async function streamChatWithTools( ): Promise { const provider = providerForModel(params.model); if (provider === "claude") return streamClaude(params); + if (provider === "openai") return streamOpenAI(params); return streamGemini(params); } @@ -23,5 +25,6 @@ export async function completeText(params: { }): Promise { const provider = providerForModel(params.model); if (provider === "claude") return completeClaudeText(params); + if (provider === "openai") return completeOpenAIText(params); return completeGeminiText(params); } diff --git a/backend/src/lib/llm/models.ts b/backend/src/lib/llm/models.ts index 52314007..e977a087 100644 --- a/backend/src/lib/llm/models.ts +++ b/backend/src/lib/llm/models.ts @@ -9,27 +9,33 @@ export const GEMINI_MAIN_MODELS = [ "gemini-3.1-pro-preview", "gemini-3-flash-preview", ] as const; +export const LOCAL_LLM_MAIN_MODELS = ["localllm-main"] as const; // Mid-tier (used for tabular review) — user picks one in account settings. export const CLAUDE_MID_MODELS = ["claude-sonnet-4-6"] as const; export const GEMINI_MID_MODELS = ["gemini-3-flash-preview"] as const; +export const LOCAL_LLM_MID_MODELS = ["localllm-main"] as const; // Low-tier (used for title generation, lightweight extractions) — user picks // one in account settings. export const CLAUDE_LOW_MODELS = ["claude-haiku-4-5"] as const; export const GEMINI_LOW_MODELS = ["gemini-3.1-flash-lite-preview"] as const; +export const LOCAL_LLM_LOW_MODELS = ["localllm-lite"] as const; -export const DEFAULT_MAIN_MODEL = "gemini-3-flash-preview"; -export const DEFAULT_TITLE_MODEL = "gemini-3.1-flash-lite-preview"; -export const DEFAULT_TABULAR_MODEL = "gemini-3-flash-preview"; +export const DEFAULT_MAIN_MODEL = "localllm-main"; +export const DEFAULT_TITLE_MODEL = "localllm-lite"; +export const DEFAULT_TABULAR_MODEL = "localllm-main"; const ALL_MODELS = new Set([ ...CLAUDE_MAIN_MODELS, ...GEMINI_MAIN_MODELS, + ...LOCAL_LLM_MAIN_MODELS, ...CLAUDE_MID_MODELS, ...GEMINI_MID_MODELS, + ...LOCAL_LLM_MID_MODELS, ...CLAUDE_LOW_MODELS, ...GEMINI_LOW_MODELS, + ...LOCAL_LLM_LOW_MODELS, ]); // --------------------------------------------------------------------------- @@ -37,6 +43,7 @@ const ALL_MODELS = new Set([ // --------------------------------------------------------------------------- export function providerForModel(model: string): Provider { + if (model.startsWith("localllm")) return "openai"; if (model.startsWith("claude")) return "claude"; if (model.startsWith("gemini")) return "gemini"; throw new Error(`Unknown model id: ${model}`); diff --git a/backend/src/lib/llm/openai.ts b/backend/src/lib/llm/openai.ts new file mode 100644 index 00000000..1b7cc015 --- /dev/null +++ b/backend/src/lib/llm/openai.ts @@ -0,0 +1,189 @@ +import OpenAI from "openai"; +import type { + StreamChatParams, + StreamChatResult, + NormalizedToolCall, + NormalizedToolResult, +} from "./types"; +import { toClaudeTools } from "./tools"; + +type OpenAIMessage = { + role: "user" | "assistant" | "system"; + content: string; +}; + +type OpenAIToolCall = { + id: string; + type: "function"; + function: { + name: string; + arguments: string; + }; +}; + +const MAX_TOKENS = 16384; + +function getClient(override?: string | null): OpenAI { + const apiKey = override?.trim() || process.env.VLLM_API_KEY || ""; + const baseURL = process.env.VLLM_BASE_URL || "http://localhost:8000/v1"; + console.log("[localllm] Client init:", { baseURL, apiKeyPresent: !!apiKey }); + return new OpenAI({ + apiKey, + baseURL, + }); +} + +function getActualModelName(model: string): string { + if (model === "localllm-main") { + return process.env.VLLM_MAIN_MODEL || "BredaAI"; + } + if (model === "localllm-lite") { + return process.env.VLLM_LIGHT_MODEL || "unsloth/gemma-4-E2B-it-GGUF:Q5_K_S"; + } + return model; +} + +function toNativeMessages( + messages: StreamChatParams["messages"], +): OpenAIMessage[] { + return messages.map((m) => ({ + role: m.role === "assistant" ? "assistant" : "user", + content: m.content, + })); +} + +export async function streamOpenAI( + params: StreamChatParams, +): Promise { + const { + model, + systemPrompt, + tools = [], + callbacks = {}, + runTools, + apiKeys, + enableThinking, + } = params; + const maxIter = params.maxIterations ?? 10; + + const actualModel = getActualModelName(model); + console.log("[localllm] streaming request:", { + internalModel: model, + actualModel, + baseURL: process.env.VLLM_BASE_URL + }); + + const client = getClient(apiKeys?.openai); + + const messages: OpenAIMessage[] = toNativeMessages(params.messages); + let fullText = ""; + + try { + for (let iter = 0; iter < maxIter; iter++) { + const systemMessage = systemPrompt + ? [{ role: "system" as const, content: systemPrompt }] + : []; + + const stream = await client.chat.completions.create({ + model: actualModel, + messages: [...systemMessage, ...messages] as any, + tools: tools.length + ? tools.map((t) => ({ + type: "function", + function: { + name: t.function.name, + description: t.function.description, + parameters: t.function.parameters, + }, + })) + : undefined, + stream: true, + }); + + let toolCalls: NormalizedToolCall[] = []; + let currentText = ""; + + for await (const chunk of stream) { + const choice = chunk.choices?.[0]; + if (!choice) continue; + + const delta = choice.delta; + + if (delta?.content) { + currentText += delta.content; + callbacks.onContentDelta?.(delta.content); + } + + if (delta?.tool_calls && delta.tool_calls.length > 0) { + for (const tc of delta.tool_calls) { + if (tc.type === "function" && tc.function) { + const call: NormalizedToolCall = { + id: tc.id || `call-${toolCalls.length}`, + name: tc.function.name || "unknown", + input: tc.function.arguments + ? JSON.parse(tc.function.arguments) + : {}, + }; + callbacks.onToolCallStart?.(call); + toolCalls.push(call); + } + } + } + } + + fullText += currentText; + + if (toolCalls.length > 0 && runTools) { + const results = await runTools(toolCalls); + + const assistantMessage: OpenAIMessage = { + role: "assistant", + content: currentText, + }; + messages.push(assistantMessage); + + const toolMessages: OpenAIMessage[] = results.map((r) => ({ + role: "tool" as any, + content: r.content, + })); + messages.push(...toolMessages); + } else { + break; + } + } + } catch (error: any) { + console.error("[localllm] streaming error:", error.message); + console.error("[localllm] error details:", JSON.stringify(error, null, 2)); + throw error; + } + + return { fullText }; +} + +export async function completeOpenAIText(params: { + model: string; + systemPrompt?: string; + user: string; + maxTokens?: number; + apiKeys?: { openai?: string | null }; +}): Promise { + const client = getClient(params.apiKeys?.openai); + const actualModel = getActualModelName(params.model); + + const messages: OpenAIMessage[] = [ + ...(params.systemPrompt + ? [{ role: "system" as const, content: params.systemPrompt }] + : []), + { role: "user", content: params.user }, + ]; + + const response = await client.chat.completions.create({ + model: actualModel, + messages: messages as any, + max_tokens: params.maxTokens ?? 512, + }); + + return response.choices?.[0]?.message?.content || ""; +} + +export type { NormalizedToolResult }; diff --git a/backend/src/lib/llm/types.ts b/backend/src/lib/llm/types.ts index 8cc411a7..a8409d80 100644 --- a/backend/src/lib/llm/types.ts +++ b/backend/src/lib/llm/types.ts @@ -2,7 +2,7 @@ // Callers always speak OpenAI-style tools + { role, content } messages; each // provider translates internally. -export type Provider = "claude" | "gemini"; +export type Provider = "claude" | "gemini" | "openai"; export type OpenAIToolSchema = { type: "function"; @@ -39,6 +39,7 @@ export type StreamCallbacks = { export type UserApiKeys = { claude?: string | null; gemini?: string | null; + openai?: string | null; }; export type StreamChatParams = { diff --git a/backend/src/lib/userSettings.ts b/backend/src/lib/userSettings.ts index c798b636..e8eb23b1 100644 --- a/backend/src/lib/userSettings.ts +++ b/backend/src/lib/userSettings.ts @@ -13,10 +13,13 @@ export type UserModelSettings = { }; // Title generation is a lightweight task — always routed to the cheapest model -// of whichever provider the user has keys for: Gemini Flash Lite if Gemini is -// available, otherwise Claude Haiku. With no user keys set, defaults to Gemini -// (the dev-mode env fallback). +// of whichever provider the user has keys for: LocalLLM lite if available, +// otherwise Gemini Flash Lite, otherwise Claude Haiku. function resolveTitleModel(apiKeys: UserApiKeys): string { + // Check if LocalLLM is configured server-side + if (process.env.VLLM_BASE_URL?.trim()) { + return "localllm-lite"; + } if (apiKeys.gemini?.trim()) return DEFAULT_TITLE_MODEL; if (apiKeys.claude?.trim()) return "claude-haiku-4-5"; return DEFAULT_TITLE_MODEL; @@ -36,11 +39,12 @@ export async function getUserModelSettings( const api_keys: UserApiKeys = { claude: data?.claude_api_key ?? null, gemini: data?.gemini_api_key ?? null, + openai: process.env.VLLM_API_KEY ?? null, }; return { title_model: resolveTitleModel(api_keys), - tabular_model: resolveModel(data?.tabular_model, DEFAULT_TABULAR_MODEL), + tabular_model: resolveModel(data?.tabular_model, "localllm-main"), api_keys, }; } @@ -58,5 +62,6 @@ export async function getUserApiKeys( return { claude: data?.claude_api_key ?? null, gemini: data?.gemini_api_key ?? null, + openai: process.env.VLLM_API_KEY ?? null, }; } diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 5782999f..d1544e19 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -1890,7 +1890,6 @@ "version": "1.9.2", "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.9.2.tgz", "integrity": "sha512-3U4+MIWHImeyu1wnmVygh5WlgfYDtyf0k8AbLhMFxOipihf6nrWC4syIm/SwEeec0mNSafiiNnMJwbza/Is6Lw==", - "dev": true, "license": "MIT", "optional": true, "dependencies": { @@ -2617,7 +2616,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "Apache-2.0", "optional": true, "os": [ @@ -2640,7 +2638,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "Apache-2.0", "optional": true, "os": [ @@ -2663,7 +2660,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -2680,7 +2676,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -2697,7 +2692,6 @@ "cpu": [ "arm" ], - "dev": true, "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -2714,7 +2708,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -2731,7 +2724,6 @@ "cpu": [ "ppc64" ], - "dev": true, "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -2748,7 +2740,6 @@ "cpu": [ "riscv64" ], - "dev": true, "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -2765,7 +2756,6 @@ "cpu": [ "s390x" ], - "dev": true, "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -2782,7 +2772,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -2799,7 +2788,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -2816,7 +2804,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "LGPL-3.0-or-later", "optional": true, "os": [ @@ -2833,7 +2820,6 @@ "cpu": [ "arm" ], - "dev": true, "license": "Apache-2.0", "optional": true, "os": [ @@ -2856,7 +2842,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "Apache-2.0", "optional": true, "os": [ @@ -2879,7 +2864,6 @@ "cpu": [ "ppc64" ], - "dev": true, "license": "Apache-2.0", "optional": true, "os": [ @@ -2902,7 +2886,6 @@ "cpu": [ "riscv64" ], - "dev": true, "license": "Apache-2.0", "optional": true, "os": [ @@ -2925,7 +2908,6 @@ "cpu": [ "s390x" ], - "dev": true, "license": "Apache-2.0", "optional": true, "os": [ @@ -2948,7 +2930,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "Apache-2.0", "optional": true, "os": [ @@ -2971,7 +2952,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "Apache-2.0", "optional": true, "os": [ @@ -2994,7 +2974,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "Apache-2.0", "optional": true, "os": [ @@ -3017,7 +2996,6 @@ "cpu": [ "wasm32" ], - "dev": true, "license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT", "optional": true, "dependencies": { @@ -3037,7 +3015,6 @@ "cpu": [ "arm64" ], - "dev": true, "license": "Apache-2.0 AND LGPL-3.0-or-later", "optional": true, "os": [ @@ -3057,7 +3034,6 @@ "cpu": [ "ia32" ], - "dev": true, "license": "Apache-2.0 AND LGPL-3.0-or-later", "optional": true, "os": [ @@ -3077,7 +3053,6 @@ "cpu": [ "x64" ], - "dev": true, "license": "Apache-2.0 AND LGPL-3.0-or-later", "optional": true, "os": [ diff --git a/frontend/src/app/(pages)/account/models/page.tsx b/frontend/src/app/(pages)/account/models/page.tsx index cf3720ea..cbbd1fba 100644 --- a/frontend/src/app/(pages)/account/models/page.tsx +++ b/frontend/src/app/(pages)/account/models/page.tsx @@ -39,16 +39,20 @@ export default function ModelsAndApiKeysPage() { updateModelPreference("tabularModel", id) } /> +

+ LocalLLM models are configured by the server administrator and are available to all users. +

@@ -67,8 +71,8 @@ export default function ModelsAndApiKeysPage() {

Title generation automatically routes to the cheapest model - of whichever provider you’ve configured (Gemini Flash - Lite if a Gemini key is set, otherwise Claude Haiku). + of whichever provider you’ve configured (LocalLLM Lite if + available, otherwise Gemini Flash Lite, otherwise Claude Haiku).

void; - apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null }; + apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null; openaiApiKey: string | null }; }) { const [isOpen, setIsOpen] = useState(false); const selected = MODELS.find((m) => m.id === value); const selectedAvailable = isModelAvailable(value, apiKeys); - const groups: ("Anthropic" | "Google")[] = ["Anthropic", "Google"]; + const groups: ("LocalLLM" | "Anthropic" | "Google")[] = ["LocalLLM", "Anthropic", "Google"]; return ( @@ -147,23 +151,24 @@ function TabularModelDropdown({ m.id, apiKeys, ); + const tooltip = !available + ? provider === "openai" + ? "LocalLLM configured by server" + : `Add a ${provider === "claude" ? "Claude" : "Gemini"} API key to use this model` + : undefined; return ( onChange(m.id)} - title={ - !available - ? `Add a ${provider === "claude" ? "Claude" : "Gemini"} API key to use this model` - : undefined - } + title={tooltip} > {m.label} - {!available && ( + {!available && provider !== "openai" && ( )} {m.id === value && available && ( diff --git a/frontend/src/app/components/assistant/ChatInput.tsx b/frontend/src/app/components/assistant/ChatInput.tsx index 7f56192b..73c5f24a 100644 --- a/frontend/src/app/components/assistant/ChatInput.tsx +++ b/frontend/src/app/components/assistant/ChatInput.tsx @@ -70,6 +70,7 @@ export const ChatInput = forwardRef(function ChatInput( const apiKeys = { claudeApiKey: profile?.claudeApiKey ?? null, geminiApiKey: profile?.geminiApiKey ?? null, + openaiApiKey: process.env.NEXT_PUBLIC_VLLM_API_KEY || "configured", }; const textareaRef = useRef(null); const [docSelectorOpen, setDocSelectorOpen] = useState(false); diff --git a/frontend/src/app/components/assistant/ModelToggle.tsx b/frontend/src/app/components/assistant/ModelToggle.tsx index cc10d518..bdc31352 100644 --- a/frontend/src/app/components/assistant/ModelToggle.tsx +++ b/frontend/src/app/components/assistant/ModelToggle.tsx @@ -15,21 +15,23 @@ import { isModelAvailable } from "@/app/lib/modelAvailability"; export interface ModelOption { id: string; label: string; - group: "Anthropic" | "Google"; + group: "Anthropic" | "Google" | "LocalLLM"; } export const MODELS: ModelOption[] = [ + { id: "localllm-main", label: "LocalLLM Main", group: "LocalLLM" }, + { id: "localllm-lite", label: "LocalLLM Lite", group: "LocalLLM" }, { id: "claude-opus-4-7", label: "Claude Opus 4.7", group: "Anthropic" }, { id: "claude-sonnet-4-6", label: "Claude Sonnet 4.6", group: "Anthropic" }, { id: "gemini-3.1-pro-preview", label: "Gemini 3.1 Pro", group: "Google" }, { id: "gemini-3-flash-preview", label: "Gemini 3 Flash", group: "Google" }, ]; -export const DEFAULT_MODEL_ID = "gemini-3-flash-preview"; +export const DEFAULT_MODEL_ID = "localllm-main"; export const ALLOWED_MODEL_IDS = new Set(MODELS.map((m) => m.id)); -const GROUP_ORDER: ModelOption["group"][] = ["Anthropic", "Google"]; +const GROUP_ORDER: ModelOption["group"][] = ["LocalLLM", "Anthropic", "Google"]; interface Props { value: string; @@ -37,6 +39,7 @@ interface Props { apiKeys?: { claudeApiKey: string | null; geminiApiKey: string | null; + openaiApiKey: string | null; }; } diff --git a/frontend/src/app/components/tabular/TRChatPanel.tsx b/frontend/src/app/components/tabular/TRChatPanel.tsx index 3522df3a..ef915573 100644 --- a/frontend/src/app/components/tabular/TRChatPanel.tsx +++ b/frontend/src/app/components/tabular/TRChatPanel.tsx @@ -453,7 +453,7 @@ function TRChatInput({ onCancel: () => void; model: string; onModelChange: (id: string) => void; - apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null }; + apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null; openaiApiKey: string | null }; }) { const [value, setValue] = useState(""); const textareaRef = useRef(null); @@ -610,6 +610,7 @@ export function TRChatPanel({ const apiKeys = { claudeApiKey: profile?.claudeApiKey ?? null, geminiApiKey: profile?.geminiApiKey ?? null, + openaiApiKey: process.env.NEXT_PUBLIC_VLLM_API_KEY || "configured", }; const currentModel = profile?.tabularModel ?? "gemini-3-flash-preview"; const [apiKeyModalProvider, setApiKeyModalProvider] = diff --git a/frontend/src/app/components/tabular/TabularReviewView.tsx b/frontend/src/app/components/tabular/TabularReviewView.tsx index af875899..ff5bb591 100644 --- a/frontend/src/app/components/tabular/TabularReviewView.tsx +++ b/frontend/src/app/components/tabular/TabularReviewView.tsx @@ -90,6 +90,7 @@ export function TRView({ reviewId, projectId }: Props) { const apiKeys = { claudeApiKey: profile?.claudeApiKey ?? null, geminiApiKey: profile?.geminiApiKey ?? null, + openaiApiKey: process.env.NEXT_PUBLIC_VLLM_API_KEY || "configured", }; const tabularModel = profile?.tabularModel ?? "gemini-3-flash-preview"; diff --git a/frontend/src/app/lib/modelAvailability.ts b/frontend/src/app/lib/modelAvailability.ts index 933a8c2d..dd63bc03 100644 --- a/frontend/src/app/lib/modelAvailability.ts +++ b/frontend/src/app/lib/modelAvailability.ts @@ -1,19 +1,22 @@ import { MODELS, type ModelOption } from "../components/assistant/ModelToggle"; -export type ModelProvider = "claude" | "gemini"; +export type ModelProvider = "claude" | "gemini" | "openai"; export function getModelProvider(modelId: string): ModelProvider | null { const model = MODELS.find((m) => m.id === modelId); if (!model) return null; + if (model.group === "LocalLLM") return "openai"; return model.group === "Anthropic" ? "claude" : "gemini"; } export function isModelAvailable( modelId: string, - apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null }, + apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null; openaiApiKey: string | null }, ): boolean { const provider = getModelProvider(modelId); if (!provider) return false; + // LocalLLM is server-configured, always available + if (provider === "openai") return true; return provider === "claude" ? !!apiKeys.claudeApiKey?.trim() : !!apiKeys.geminiApiKey?.trim(); @@ -21,19 +24,22 @@ export function isModelAvailable( export function isProviderAvailable( provider: ModelProvider, - apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null }, + apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null; openaiApiKey: string | null }, ): boolean { + if (provider === "openai") return true; // LocalLLM is server-configured return provider === "claude" ? !!apiKeys.claudeApiKey?.trim() : !!apiKeys.geminiApiKey?.trim(); } export function providerLabel(provider: ModelProvider): string { + if (provider === "openai") return "LocalLLM (OpenAI Compatible)"; return provider === "claude" ? "Anthropic (Claude)" : "Google (Gemini)"; } export function modelGroupToProvider( group: ModelOption["group"], ): ModelProvider { + if (group === "LocalLLM") return "openai"; return group === "Anthropic" ? "claude" : "gemini"; }