diff --git a/README.md b/README.md
index 37cd7258..36e6d618 100644
--- a/README.md
+++ b/README.md
@@ -47,6 +47,23 @@ Open `http://localhost:3000`.
- At least one supported model provider key, depending on which models you enable
- LibreOffice for DOC/DOCX to PDF conversion
+## LLM Configuration Options
+
+The backend supports multiple LLM providers via environment variables in `backend/.env`:
+
+### Provider Keys
+- `GEMINI_API_KEY` - Google Gemini models
+- `ANTHROPIC_API_KEY` - Anthropic Claude models
+- `OPENROUTER_API_KEY` - OpenRouter (aggregates multiple providers)
+- `RESEND_API_KEY` - Resend (for email functionality)
+
+### Local LLM (vLLM) Configuration
+For self-hosted vLLM endpoints:
+- `VLLM_BASE_URL` - Base URL for your vLLM server (e.g., `https://your-vllm-endpoint.com/v1`)
+- `VLLM_API_KEY` - API key for vLLM authentication
+- `VLLM_MAIN_MODEL` - Primary model name for vLLM (e.g., `BredaAI`)
+- `VLLM_LIGHT_MODEL` - Lightweight model for faster responses (e.g., `your-light-model-name`)
+
## Checks
```bash
diff --git a/backend/.env.example b/backend/.env.example
index 1db370a9..0a4a3788 100644
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -12,3 +12,9 @@ GEMINI_API_KEY=your-gemini-key
ANTHROPIC_API_KEY=your-anthropic-key
OPENROUTER_API_KEY=your-openrouter-key
RESEND_API_KEY=your-resend-key
+
+# vLLM Configuration (OpenAI-compatible endpoint)
+VLLM_BASE_URL=https://your-vllm-endpoint.com/v1
+VLLM_API_KEY=your-vllm-api-key
+VLLM_MAIN_MODEL=BredaAI
+VLLM_LIGHT_MODEL=your-light-model-name
diff --git a/backend/package-lock.json b/backend/package-lock.json
index 86f82382..f31e9ea1 100644
--- a/backend/package-lock.json
+++ b/backend/package-lock.json
@@ -24,6 +24,7 @@
"libreoffice-convert": "^1.6.0",
"mammoth": "^1.9.0",
"multer": "^1.4.5-lts.2",
+ "openai": "^4.87.3",
"pdfjs-dist": "^4.10.38",
"resend": "^4.5.1"
},
@@ -2688,6 +2689,16 @@
"undici-types": "~6.21.0"
}
},
+ "node_modules/@types/node-fetch": {
+ "version": "2.6.13",
+ "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
+ "integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
+ "license": "MIT",
+ "dependencies": {
+ "@types/node": "*",
+ "form-data": "^4.0.4"
+ }
+ },
"node_modules/@types/qs": {
"version": "6.15.0",
"resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.15.0.tgz",
@@ -2759,6 +2770,18 @@
"node": ">=10.0.0"
}
},
+ "node_modules/abort-controller": {
+ "version": "3.0.0",
+ "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
+ "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
+ "license": "MIT",
+ "dependencies": {
+ "event-target-shim": "^5.0.0"
+ },
+ "engines": {
+ "node": ">=6.5"
+ }
+ },
"node_modules/accepts": {
"version": "1.3.8",
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz",
@@ -2781,6 +2804,18 @@
"node": ">= 14"
}
},
+ "node_modules/agentkeepalive": {
+ "version": "4.6.0",
+ "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz",
+ "integrity": "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==",
+ "license": "MIT",
+ "dependencies": {
+ "humanize-ms": "^1.2.1"
+ },
+ "engines": {
+ "node": ">= 8.0.0"
+ }
+ },
"node_modules/append-field": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/append-field/-/append-field-1.0.0.tgz",
@@ -2808,6 +2843,12 @@
"integrity": "sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==",
"license": "MIT"
},
+ "node_modules/asynckit": {
+ "version": "0.4.0",
+ "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
+ "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
+ "license": "MIT"
+ },
"node_modules/base64-js": {
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
@@ -2934,6 +2975,18 @@
"url": "https://github.com/sponsors/ljharb"
}
},
+ "node_modules/combined-stream": {
+ "version": "1.0.8",
+ "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
+ "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
+ "license": "MIT",
+ "dependencies": {
+ "delayed-stream": "~1.0.0"
+ },
+ "engines": {
+ "node": ">= 0.8"
+ }
+ },
"node_modules/concat-stream": {
"version": "1.6.2",
"resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz",
@@ -3035,6 +3088,15 @@
"node": ">=0.10.0"
}
},
+ "node_modules/delayed-stream": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
+ "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=0.4.0"
+ }
+ },
"node_modules/depd": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz",
@@ -3248,6 +3310,21 @@
"node": ">= 0.4"
}
},
+ "node_modules/es-set-tostringtag": {
+ "version": "2.1.0",
+ "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
+ "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
+ "license": "MIT",
+ "dependencies": {
+ "es-errors": "^1.3.0",
+ "get-intrinsic": "^1.2.6",
+ "has-tostringtag": "^1.0.2",
+ "hasown": "^2.0.2"
+ },
+ "engines": {
+ "node": ">= 0.4"
+ }
+ },
"node_modules/esbuild": {
"version": "0.27.7",
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.7.tgz",
@@ -3305,6 +3382,15 @@
"node": ">= 0.6"
}
},
+ "node_modules/event-target-shim": {
+ "version": "5.0.1",
+ "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
+ "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=6"
+ }
+ },
"node_modules/express": {
"version": "4.22.1",
"resolved": "https://registry.npmjs.org/express/-/express-4.22.1.tgz",
@@ -3446,6 +3532,50 @@
"node": ">= 0.8"
}
},
+ "node_modules/form-data": {
+ "version": "4.0.5",
+ "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz",
+ "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==",
+ "license": "MIT",
+ "dependencies": {
+ "asynckit": "^0.4.0",
+ "combined-stream": "^1.0.8",
+ "es-set-tostringtag": "^2.1.0",
+ "hasown": "^2.0.2",
+ "mime-types": "^2.1.12"
+ },
+ "engines": {
+ "node": ">= 6"
+ }
+ },
+ "node_modules/form-data-encoder": {
+ "version": "1.7.2",
+ "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz",
+ "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==",
+ "license": "MIT"
+ },
+ "node_modules/formdata-node": {
+ "version": "4.4.1",
+ "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz",
+ "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==",
+ "license": "MIT",
+ "dependencies": {
+ "node-domexception": "1.0.0",
+ "web-streams-polyfill": "4.0.0-beta.3"
+ },
+ "engines": {
+ "node": ">= 12.20"
+ }
+ },
+ "node_modules/formdata-node/node_modules/web-streams-polyfill": {
+ "version": "4.0.0-beta.3",
+ "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz",
+ "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==",
+ "license": "MIT",
+ "engines": {
+ "node": ">= 14"
+ }
+ },
"node_modules/formdata-polyfill": {
"version": "4.0.10",
"resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz",
@@ -3628,6 +3758,21 @@
"url": "https://github.com/sponsors/ljharb"
}
},
+ "node_modules/has-tostringtag": {
+ "version": "1.0.2",
+ "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
+ "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
+ "license": "MIT",
+ "dependencies": {
+ "has-symbols": "^1.0.3"
+ },
+ "engines": {
+ "node": ">= 0.4"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
"node_modules/hash.js": {
"version": "1.1.7",
"resolved": "https://registry.npmjs.org/hash.js/-/hash.js-1.1.7.tgz",
@@ -3741,6 +3886,15 @@
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
"license": "MIT"
},
+ "node_modules/humanize-ms": {
+ "version": "1.2.1",
+ "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz",
+ "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==",
+ "license": "MIT",
+ "dependencies": {
+ "ms": "^2.0.0"
+ }
+ },
"node_modules/iceberg-js": {
"version": "0.8.1",
"resolved": "https://registry.npmjs.org/iceberg-js/-/iceberg-js-0.8.1.tgz",
@@ -4135,6 +4289,71 @@
"node": ">= 0.8"
}
},
+ "node_modules/openai": {
+ "version": "4.104.0",
+ "resolved": "https://registry.npmjs.org/openai/-/openai-4.104.0.tgz",
+ "integrity": "sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==",
+ "license": "Apache-2.0",
+ "dependencies": {
+ "@types/node": "^18.11.18",
+ "@types/node-fetch": "^2.6.4",
+ "abort-controller": "^3.0.0",
+ "agentkeepalive": "^4.2.1",
+ "form-data-encoder": "1.7.2",
+ "formdata-node": "^4.3.2",
+ "node-fetch": "^2.6.7"
+ },
+ "bin": {
+ "openai": "bin/cli"
+ },
+ "peerDependencies": {
+ "ws": "^8.18.0",
+ "zod": "^3.23.8"
+ },
+ "peerDependenciesMeta": {
+ "ws": {
+ "optional": true
+ },
+ "zod": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/openai/node_modules/@types/node": {
+ "version": "18.19.130",
+ "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz",
+ "integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==",
+ "license": "MIT",
+ "dependencies": {
+ "undici-types": "~5.26.4"
+ }
+ },
+ "node_modules/openai/node_modules/node-fetch": {
+ "version": "2.7.0",
+ "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
+ "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
+ "license": "MIT",
+ "dependencies": {
+ "whatwg-url": "^5.0.0"
+ },
+ "engines": {
+ "node": "4.x || >=6.0.0"
+ },
+ "peerDependencies": {
+ "encoding": "^0.1.0"
+ },
+ "peerDependenciesMeta": {
+ "encoding": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/openai/node_modules/undici-types": {
+ "version": "5.26.5",
+ "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
+ "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
+ "license": "MIT"
+ },
"node_modules/option": {
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/option/-/option-0.2.4.tgz",
@@ -4665,6 +4884,12 @@
"node": ">=0.6"
}
},
+ "node_modules/tr46": {
+ "version": "0.0.3",
+ "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
+ "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
+ "license": "MIT"
+ },
"node_modules/ts-algebra": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz",
@@ -4784,6 +5009,22 @@
"node": ">= 8"
}
},
+ "node_modules/webidl-conversions": {
+ "version": "3.0.1",
+ "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
+ "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
+ "license": "BSD-2-Clause"
+ },
+ "node_modules/whatwg-url": {
+ "version": "5.0.0",
+ "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
+ "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
+ "license": "MIT",
+ "dependencies": {
+ "tr46": "~0.0.3",
+ "webidl-conversions": "^3.0.0"
+ }
+ },
"node_modules/ws": {
"version": "8.20.0",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz",
diff --git a/backend/package.json b/backend/package.json
index 50dfb585..86955768 100644
--- a/backend/package.json
+++ b/backend/package.json
@@ -24,6 +24,7 @@
"libreoffice-convert": "^1.6.0",
"mammoth": "^1.9.0",
"multer": "^1.4.5-lts.2",
+ "openai": "^4.87.3",
"pdfjs-dist": "^4.10.38",
"resend": "^4.5.1"
},
diff --git a/backend/src/lib/chatTools.ts b/backend/src/lib/chatTools.ts
index c3ab2439..a8bf1393 100644
--- a/backend/src/lib/chatTools.ts
+++ b/backend/src/lib/chatTools.ts
@@ -2121,7 +2121,8 @@ export async function runToolCalls(
}
} else if (tc.function.name === "generate_docx") {
- const title = args.title as string;
+ // Fallback to "document" if title is missing - model sometimes omits required fields
+ const title = (args.title as string) || "document";
const landscape = !!(args.landscape);
console.log(`[generate_docx] title="${title}" landscape=${landscape} args.landscape=${args.landscape}`);
const previewFilename = `${(title.replace(/[^a-zA-Z0-9 _-]/g, "").trim().slice(0, 64) || "document")}.docx`;
diff --git a/backend/src/lib/llm/index.ts b/backend/src/lib/llm/index.ts
index 518ddc01..4b5e9793 100644
--- a/backend/src/lib/llm/index.ts
+++ b/backend/src/lib/llm/index.ts
@@ -1,5 +1,6 @@
import { streamClaude, completeClaudeText } from "./claude";
import { streamGemini, completeGeminiText } from "./gemini";
+import { streamOpenAI, completeOpenAIText } from "./openai";
import { providerForModel } from "./models";
import type { StreamChatParams, StreamChatResult, UserApiKeys } from "./types";
@@ -11,6 +12,7 @@ export async function streamChatWithTools(
): Promise {
const provider = providerForModel(params.model);
if (provider === "claude") return streamClaude(params);
+ if (provider === "openai") return streamOpenAI(params);
return streamGemini(params);
}
@@ -23,5 +25,6 @@ export async function completeText(params: {
}): Promise {
const provider = providerForModel(params.model);
if (provider === "claude") return completeClaudeText(params);
+ if (provider === "openai") return completeOpenAIText(params);
return completeGeminiText(params);
}
diff --git a/backend/src/lib/llm/models.ts b/backend/src/lib/llm/models.ts
index 52314007..e977a087 100644
--- a/backend/src/lib/llm/models.ts
+++ b/backend/src/lib/llm/models.ts
@@ -9,27 +9,33 @@ export const GEMINI_MAIN_MODELS = [
"gemini-3.1-pro-preview",
"gemini-3-flash-preview",
] as const;
+export const LOCAL_LLM_MAIN_MODELS = ["localllm-main"] as const;
// Mid-tier (used for tabular review) — user picks one in account settings.
export const CLAUDE_MID_MODELS = ["claude-sonnet-4-6"] as const;
export const GEMINI_MID_MODELS = ["gemini-3-flash-preview"] as const;
+export const LOCAL_LLM_MID_MODELS = ["localllm-main"] as const;
// Low-tier (used for title generation, lightweight extractions) — user picks
// one in account settings.
export const CLAUDE_LOW_MODELS = ["claude-haiku-4-5"] as const;
export const GEMINI_LOW_MODELS = ["gemini-3.1-flash-lite-preview"] as const;
+export const LOCAL_LLM_LOW_MODELS = ["localllm-lite"] as const;
-export const DEFAULT_MAIN_MODEL = "gemini-3-flash-preview";
-export const DEFAULT_TITLE_MODEL = "gemini-3.1-flash-lite-preview";
-export const DEFAULT_TABULAR_MODEL = "gemini-3-flash-preview";
+export const DEFAULT_MAIN_MODEL = "localllm-main";
+export const DEFAULT_TITLE_MODEL = "localllm-lite";
+export const DEFAULT_TABULAR_MODEL = "localllm-main";
const ALL_MODELS = new Set([
...CLAUDE_MAIN_MODELS,
...GEMINI_MAIN_MODELS,
+ ...LOCAL_LLM_MAIN_MODELS,
...CLAUDE_MID_MODELS,
...GEMINI_MID_MODELS,
+ ...LOCAL_LLM_MID_MODELS,
...CLAUDE_LOW_MODELS,
...GEMINI_LOW_MODELS,
+ ...LOCAL_LLM_LOW_MODELS,
]);
// ---------------------------------------------------------------------------
@@ -37,6 +43,7 @@ const ALL_MODELS = new Set([
// ---------------------------------------------------------------------------
export function providerForModel(model: string): Provider {
+ if (model.startsWith("localllm")) return "openai";
if (model.startsWith("claude")) return "claude";
if (model.startsWith("gemini")) return "gemini";
throw new Error(`Unknown model id: ${model}`);
diff --git a/backend/src/lib/llm/openai.ts b/backend/src/lib/llm/openai.ts
new file mode 100644
index 00000000..1b7cc015
--- /dev/null
+++ b/backend/src/lib/llm/openai.ts
@@ -0,0 +1,189 @@
+import OpenAI from "openai";
+import type {
+ StreamChatParams,
+ StreamChatResult,
+ NormalizedToolCall,
+ NormalizedToolResult,
+} from "./types";
+import { toClaudeTools } from "./tools";
+
+type OpenAIMessage = {
+ role: "user" | "assistant" | "system";
+ content: string;
+};
+
+type OpenAIToolCall = {
+ id: string;
+ type: "function";
+ function: {
+ name: string;
+ arguments: string;
+ };
+};
+
+const MAX_TOKENS = 16384;
+
+function getClient(override?: string | null): OpenAI {
+ const apiKey = override?.trim() || process.env.VLLM_API_KEY || "";
+ const baseURL = process.env.VLLM_BASE_URL || "http://localhost:8000/v1";
+ console.log("[localllm] Client init:", { baseURL, apiKeyPresent: !!apiKey });
+ return new OpenAI({
+ apiKey,
+ baseURL,
+ });
+}
+
+function getActualModelName(model: string): string {
+ if (model === "localllm-main") {
+ return process.env.VLLM_MAIN_MODEL || "BredaAI";
+ }
+ if (model === "localllm-lite") {
+ return process.env.VLLM_LIGHT_MODEL || "unsloth/gemma-4-E2B-it-GGUF:Q5_K_S";
+ }
+ return model;
+}
+
+function toNativeMessages(
+ messages: StreamChatParams["messages"],
+): OpenAIMessage[] {
+ return messages.map((m) => ({
+ role: m.role === "assistant" ? "assistant" : "user",
+ content: m.content,
+ }));
+}
+
+export async function streamOpenAI(
+ params: StreamChatParams,
+): Promise {
+ const {
+ model,
+ systemPrompt,
+ tools = [],
+ callbacks = {},
+ runTools,
+ apiKeys,
+ enableThinking,
+ } = params;
+ const maxIter = params.maxIterations ?? 10;
+
+ const actualModel = getActualModelName(model);
+ console.log("[localllm] streaming request:", {
+ internalModel: model,
+ actualModel,
+ baseURL: process.env.VLLM_BASE_URL
+ });
+
+ const client = getClient(apiKeys?.openai);
+
+ const messages: OpenAIMessage[] = toNativeMessages(params.messages);
+ let fullText = "";
+
+ try {
+ for (let iter = 0; iter < maxIter; iter++) {
+ const systemMessage = systemPrompt
+ ? [{ role: "system" as const, content: systemPrompt }]
+ : [];
+
+ const stream = await client.chat.completions.create({
+ model: actualModel,
+ messages: [...systemMessage, ...messages] as any,
+ tools: tools.length
+ ? tools.map((t) => ({
+ type: "function",
+ function: {
+ name: t.function.name,
+ description: t.function.description,
+ parameters: t.function.parameters,
+ },
+ }))
+ : undefined,
+ stream: true,
+ });
+
+ let toolCalls: NormalizedToolCall[] = [];
+ let currentText = "";
+
+ for await (const chunk of stream) {
+ const choice = chunk.choices?.[0];
+ if (!choice) continue;
+
+ const delta = choice.delta;
+
+ if (delta?.content) {
+ currentText += delta.content;
+ callbacks.onContentDelta?.(delta.content);
+ }
+
+ if (delta?.tool_calls && delta.tool_calls.length > 0) {
+ for (const tc of delta.tool_calls) {
+ if (tc.type === "function" && tc.function) {
+ const call: NormalizedToolCall = {
+ id: tc.id || `call-${toolCalls.length}`,
+ name: tc.function.name || "unknown",
+ input: tc.function.arguments
+ ? JSON.parse(tc.function.arguments)
+ : {},
+ };
+ callbacks.onToolCallStart?.(call);
+ toolCalls.push(call);
+ }
+ }
+ }
+ }
+
+ fullText += currentText;
+
+ if (toolCalls.length > 0 && runTools) {
+ const results = await runTools(toolCalls);
+
+ const assistantMessage: OpenAIMessage = {
+ role: "assistant",
+ content: currentText,
+ };
+ messages.push(assistantMessage);
+
+ const toolMessages: OpenAIMessage[] = results.map((r) => ({
+ role: "tool" as any,
+ content: r.content,
+ }));
+ messages.push(...toolMessages);
+ } else {
+ break;
+ }
+ }
+ } catch (error: any) {
+ console.error("[localllm] streaming error:", error.message);
+ console.error("[localllm] error details:", JSON.stringify(error, null, 2));
+ throw error;
+ }
+
+ return { fullText };
+}
+
+export async function completeOpenAIText(params: {
+ model: string;
+ systemPrompt?: string;
+ user: string;
+ maxTokens?: number;
+ apiKeys?: { openai?: string | null };
+}): Promise {
+ const client = getClient(params.apiKeys?.openai);
+ const actualModel = getActualModelName(params.model);
+
+ const messages: OpenAIMessage[] = [
+ ...(params.systemPrompt
+ ? [{ role: "system" as const, content: params.systemPrompt }]
+ : []),
+ { role: "user", content: params.user },
+ ];
+
+ const response = await client.chat.completions.create({
+ model: actualModel,
+ messages: messages as any,
+ max_tokens: params.maxTokens ?? 512,
+ });
+
+ return response.choices?.[0]?.message?.content || "";
+}
+
+export type { NormalizedToolResult };
diff --git a/backend/src/lib/llm/types.ts b/backend/src/lib/llm/types.ts
index 8cc411a7..a8409d80 100644
--- a/backend/src/lib/llm/types.ts
+++ b/backend/src/lib/llm/types.ts
@@ -2,7 +2,7 @@
// Callers always speak OpenAI-style tools + { role, content } messages; each
// provider translates internally.
-export type Provider = "claude" | "gemini";
+export type Provider = "claude" | "gemini" | "openai";
export type OpenAIToolSchema = {
type: "function";
@@ -39,6 +39,7 @@ export type StreamCallbacks = {
export type UserApiKeys = {
claude?: string | null;
gemini?: string | null;
+ openai?: string | null;
};
export type StreamChatParams = {
diff --git a/backend/src/lib/userSettings.ts b/backend/src/lib/userSettings.ts
index c798b636..e8eb23b1 100644
--- a/backend/src/lib/userSettings.ts
+++ b/backend/src/lib/userSettings.ts
@@ -13,10 +13,13 @@ export type UserModelSettings = {
};
// Title generation is a lightweight task — always routed to the cheapest model
-// of whichever provider the user has keys for: Gemini Flash Lite if Gemini is
-// available, otherwise Claude Haiku. With no user keys set, defaults to Gemini
-// (the dev-mode env fallback).
+// of whichever provider the user has keys for: LocalLLM lite if available,
+// otherwise Gemini Flash Lite, otherwise Claude Haiku.
function resolveTitleModel(apiKeys: UserApiKeys): string {
+ // Check if LocalLLM is configured server-side
+ if (process.env.VLLM_BASE_URL?.trim()) {
+ return "localllm-lite";
+ }
if (apiKeys.gemini?.trim()) return DEFAULT_TITLE_MODEL;
if (apiKeys.claude?.trim()) return "claude-haiku-4-5";
return DEFAULT_TITLE_MODEL;
@@ -36,11 +39,12 @@ export async function getUserModelSettings(
const api_keys: UserApiKeys = {
claude: data?.claude_api_key ?? null,
gemini: data?.gemini_api_key ?? null,
+ openai: process.env.VLLM_API_KEY ?? null,
};
return {
title_model: resolveTitleModel(api_keys),
- tabular_model: resolveModel(data?.tabular_model, DEFAULT_TABULAR_MODEL),
+ tabular_model: resolveModel(data?.tabular_model, "localllm-main"),
api_keys,
};
}
@@ -58,5 +62,6 @@ export async function getUserApiKeys(
return {
claude: data?.claude_api_key ?? null,
gemini: data?.gemini_api_key ?? null,
+ openai: process.env.VLLM_API_KEY ?? null,
};
}
diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index 5782999f..d1544e19 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -1890,7 +1890,6 @@
"version": "1.9.2",
"resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.9.2.tgz",
"integrity": "sha512-3U4+MIWHImeyu1wnmVygh5WlgfYDtyf0k8AbLhMFxOipihf6nrWC4syIm/SwEeec0mNSafiiNnMJwbza/Is6Lw==",
- "dev": true,
"license": "MIT",
"optional": true,
"dependencies": {
@@ -2617,7 +2616,6 @@
"cpu": [
"arm64"
],
- "dev": true,
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -2640,7 +2638,6 @@
"cpu": [
"x64"
],
- "dev": true,
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -2663,7 +2660,6 @@
"cpu": [
"arm64"
],
- "dev": true,
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -2680,7 +2676,6 @@
"cpu": [
"x64"
],
- "dev": true,
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -2697,7 +2692,6 @@
"cpu": [
"arm"
],
- "dev": true,
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -2714,7 +2708,6 @@
"cpu": [
"arm64"
],
- "dev": true,
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -2731,7 +2724,6 @@
"cpu": [
"ppc64"
],
- "dev": true,
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -2748,7 +2740,6 @@
"cpu": [
"riscv64"
],
- "dev": true,
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -2765,7 +2756,6 @@
"cpu": [
"s390x"
],
- "dev": true,
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -2782,7 +2772,6 @@
"cpu": [
"x64"
],
- "dev": true,
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -2799,7 +2788,6 @@
"cpu": [
"arm64"
],
- "dev": true,
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -2816,7 +2804,6 @@
"cpu": [
"x64"
],
- "dev": true,
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -2833,7 +2820,6 @@
"cpu": [
"arm"
],
- "dev": true,
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -2856,7 +2842,6 @@
"cpu": [
"arm64"
],
- "dev": true,
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -2879,7 +2864,6 @@
"cpu": [
"ppc64"
],
- "dev": true,
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -2902,7 +2886,6 @@
"cpu": [
"riscv64"
],
- "dev": true,
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -2925,7 +2908,6 @@
"cpu": [
"s390x"
],
- "dev": true,
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -2948,7 +2930,6 @@
"cpu": [
"x64"
],
- "dev": true,
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -2971,7 +2952,6 @@
"cpu": [
"arm64"
],
- "dev": true,
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -2994,7 +2974,6 @@
"cpu": [
"x64"
],
- "dev": true,
"license": "Apache-2.0",
"optional": true,
"os": [
@@ -3017,7 +2996,6 @@
"cpu": [
"wasm32"
],
- "dev": true,
"license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT",
"optional": true,
"dependencies": {
@@ -3037,7 +3015,6 @@
"cpu": [
"arm64"
],
- "dev": true,
"license": "Apache-2.0 AND LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -3057,7 +3034,6 @@
"cpu": [
"ia32"
],
- "dev": true,
"license": "Apache-2.0 AND LGPL-3.0-or-later",
"optional": true,
"os": [
@@ -3077,7 +3053,6 @@
"cpu": [
"x64"
],
- "dev": true,
"license": "Apache-2.0 AND LGPL-3.0-or-later",
"optional": true,
"os": [
diff --git a/frontend/src/app/(pages)/account/models/page.tsx b/frontend/src/app/(pages)/account/models/page.tsx
index cf3720ea..cbbd1fba 100644
--- a/frontend/src/app/(pages)/account/models/page.tsx
+++ b/frontend/src/app/(pages)/account/models/page.tsx
@@ -39,16 +39,20 @@ export default function ModelsAndApiKeysPage() {
updateModelPreference("tabularModel", id)
}
/>
+
+ LocalLLM models are configured by the server administrator and are available to all users.
+
@@ -67,8 +71,8 @@ export default function ModelsAndApiKeysPage() {
Title generation automatically routes to the cheapest model
- of whichever provider you’ve configured (Gemini Flash
- Lite if a Gemini key is set, otherwise Claude Haiku).
+ of whichever provider you’ve configured (LocalLLM Lite if
+ available, otherwise Gemini Flash Lite, otherwise Claude Haiku).
void;
- apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null };
+ apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null; openaiApiKey: string | null };
}) {
const [isOpen, setIsOpen] = useState(false);
const selected = MODELS.find((m) => m.id === value);
const selectedAvailable = isModelAvailable(value, apiKeys);
- const groups: ("Anthropic" | "Google")[] = ["Anthropic", "Google"];
+ const groups: ("LocalLLM" | "Anthropic" | "Google")[] = ["LocalLLM", "Anthropic", "Google"];
return (
@@ -147,23 +151,24 @@ function TabularModelDropdown({
m.id,
apiKeys,
);
+ const tooltip = !available
+ ? provider === "openai"
+ ? "LocalLLM configured by server"
+ : `Add a ${provider === "claude" ? "Claude" : "Gemini"} API key to use this model`
+ : undefined;
return (
onChange(m.id)}
- title={
- !available
- ? `Add a ${provider === "claude" ? "Claude" : "Gemini"} API key to use this model`
- : undefined
- }
+ title={tooltip}
>
{m.label}
- {!available && (
+ {!available && provider !== "openai" && (
)}
{m.id === value && available && (
diff --git a/frontend/src/app/components/assistant/ChatInput.tsx b/frontend/src/app/components/assistant/ChatInput.tsx
index 7f56192b..73c5f24a 100644
--- a/frontend/src/app/components/assistant/ChatInput.tsx
+++ b/frontend/src/app/components/assistant/ChatInput.tsx
@@ -70,6 +70,7 @@ export const ChatInput = forwardRef(function ChatInput(
const apiKeys = {
claudeApiKey: profile?.claudeApiKey ?? null,
geminiApiKey: profile?.geminiApiKey ?? null,
+ openaiApiKey: process.env.NEXT_PUBLIC_VLLM_API_KEY || "configured",
};
const textareaRef = useRef(null);
const [docSelectorOpen, setDocSelectorOpen] = useState(false);
diff --git a/frontend/src/app/components/assistant/ModelToggle.tsx b/frontend/src/app/components/assistant/ModelToggle.tsx
index cc10d518..bdc31352 100644
--- a/frontend/src/app/components/assistant/ModelToggle.tsx
+++ b/frontend/src/app/components/assistant/ModelToggle.tsx
@@ -15,21 +15,23 @@ import { isModelAvailable } from "@/app/lib/modelAvailability";
export interface ModelOption {
id: string;
label: string;
- group: "Anthropic" | "Google";
+ group: "Anthropic" | "Google" | "LocalLLM";
}
export const MODELS: ModelOption[] = [
+ { id: "localllm-main", label: "LocalLLM Main", group: "LocalLLM" },
+ { id: "localllm-lite", label: "LocalLLM Lite", group: "LocalLLM" },
{ id: "claude-opus-4-7", label: "Claude Opus 4.7", group: "Anthropic" },
{ id: "claude-sonnet-4-6", label: "Claude Sonnet 4.6", group: "Anthropic" },
{ id: "gemini-3.1-pro-preview", label: "Gemini 3.1 Pro", group: "Google" },
{ id: "gemini-3-flash-preview", label: "Gemini 3 Flash", group: "Google" },
];
-export const DEFAULT_MODEL_ID = "gemini-3-flash-preview";
+export const DEFAULT_MODEL_ID = "localllm-main";
export const ALLOWED_MODEL_IDS = new Set(MODELS.map((m) => m.id));
-const GROUP_ORDER: ModelOption["group"][] = ["Anthropic", "Google"];
+const GROUP_ORDER: ModelOption["group"][] = ["LocalLLM", "Anthropic", "Google"];
interface Props {
value: string;
@@ -37,6 +39,7 @@ interface Props {
apiKeys?: {
claudeApiKey: string | null;
geminiApiKey: string | null;
+ openaiApiKey: string | null;
};
}
diff --git a/frontend/src/app/components/tabular/TRChatPanel.tsx b/frontend/src/app/components/tabular/TRChatPanel.tsx
index 3522df3a..ef915573 100644
--- a/frontend/src/app/components/tabular/TRChatPanel.tsx
+++ b/frontend/src/app/components/tabular/TRChatPanel.tsx
@@ -453,7 +453,7 @@ function TRChatInput({
onCancel: () => void;
model: string;
onModelChange: (id: string) => void;
- apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null };
+ apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null; openaiApiKey: string | null };
}) {
const [value, setValue] = useState("");
const textareaRef = useRef(null);
@@ -610,6 +610,7 @@ export function TRChatPanel({
const apiKeys = {
claudeApiKey: profile?.claudeApiKey ?? null,
geminiApiKey: profile?.geminiApiKey ?? null,
+ openaiApiKey: process.env.NEXT_PUBLIC_VLLM_API_KEY || "configured",
};
const currentModel = profile?.tabularModel ?? "gemini-3-flash-preview";
const [apiKeyModalProvider, setApiKeyModalProvider] =
diff --git a/frontend/src/app/components/tabular/TabularReviewView.tsx b/frontend/src/app/components/tabular/TabularReviewView.tsx
index af875899..ff5bb591 100644
--- a/frontend/src/app/components/tabular/TabularReviewView.tsx
+++ b/frontend/src/app/components/tabular/TabularReviewView.tsx
@@ -90,6 +90,7 @@ export function TRView({ reviewId, projectId }: Props) {
const apiKeys = {
claudeApiKey: profile?.claudeApiKey ?? null,
geminiApiKey: profile?.geminiApiKey ?? null,
+ openaiApiKey: process.env.NEXT_PUBLIC_VLLM_API_KEY || "configured",
};
const tabularModel = profile?.tabularModel ?? "gemini-3-flash-preview";
diff --git a/frontend/src/app/lib/modelAvailability.ts b/frontend/src/app/lib/modelAvailability.ts
index 933a8c2d..dd63bc03 100644
--- a/frontend/src/app/lib/modelAvailability.ts
+++ b/frontend/src/app/lib/modelAvailability.ts
@@ -1,19 +1,22 @@
import { MODELS, type ModelOption } from "../components/assistant/ModelToggle";
-export type ModelProvider = "claude" | "gemini";
+export type ModelProvider = "claude" | "gemini" | "openai";
export function getModelProvider(modelId: string): ModelProvider | null {
const model = MODELS.find((m) => m.id === modelId);
if (!model) return null;
+ if (model.group === "LocalLLM") return "openai";
return model.group === "Anthropic" ? "claude" : "gemini";
}
export function isModelAvailable(
modelId: string,
- apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null },
+ apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null; openaiApiKey: string | null },
): boolean {
const provider = getModelProvider(modelId);
if (!provider) return false;
+ // LocalLLM is server-configured, always available
+ if (provider === "openai") return true;
return provider === "claude"
? !!apiKeys.claudeApiKey?.trim()
: !!apiKeys.geminiApiKey?.trim();
@@ -21,19 +24,22 @@ export function isModelAvailable(
export function isProviderAvailable(
provider: ModelProvider,
- apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null },
+ apiKeys: { claudeApiKey: string | null; geminiApiKey: string | null; openaiApiKey: string | null },
): boolean {
+ if (provider === "openai") return true; // LocalLLM is server-configured
return provider === "claude"
? !!apiKeys.claudeApiKey?.trim()
: !!apiKeys.geminiApiKey?.trim();
}
export function providerLabel(provider: ModelProvider): string {
+ if (provider === "openai") return "LocalLLM (OpenAI Compatible)";
return provider === "claude" ? "Anthropic (Claude)" : "Google (Gemini)";
}
export function modelGroupToProvider(
group: ModelOption["group"],
): ModelProvider {
+ if (group === "LocalLLM") return "openai";
return group === "Anthropic" ? "claude" : "gemini";
}