From f017c6bcd89ae6f250f59851b74d8a4bd8cdc575 Mon Sep 17 00:00:00 2001 From: yyh1102 Date: Wed, 25 Feb 2026 13:38:05 +0800 Subject: [PATCH 1/3] feat: add local DMind model management with private chat - Add `minara private` command with subcommands: chat, install, remove, models, load, unload, status - Support DMind-3 (21B), DMind-3-mini (4B), DMind-3-nano (270M) models - Background vLLM server with load/unload for resource management - Attached server mode for single chat sessions - Model download via huggingface_hub with prerequisite auto-install - Server state tracking in ~/.minara/vllm-server.json --- src/commands/install.ts | 153 ++++++++++++++++ src/commands/private.ts | 380 ++++++++++++++++++++++++++++++++++++++++ src/index.ts | 6 + src/local-models.ts | 274 +++++++++++++++++++++++++++++ 4 files changed, 813 insertions(+) create mode 100644 src/commands/install.ts create mode 100644 src/commands/private.ts create mode 100644 src/local-models.ts diff --git a/src/commands/install.ts b/src/commands/install.ts new file mode 100644 index 0000000..4f9e726 --- /dev/null +++ b/src/commands/install.ts @@ -0,0 +1,153 @@ +import { select, confirm } from '@inquirer/prompts'; +import chalk from 'chalk'; +import { + AVAILABLE_MODELS, getInstalledIds, getModelDef, + markInstalled, markUninstalled, + findPython, hasVllm, hasHfHub, + pipInstall, downloadModel, clearModelCache, +} from '../local-models.js'; +import { error, info, success, warn, spinner } from '../utils.js'; + +// ─── List ─────────────────────────────────────────────────────────────────── + +export function listModels(): void { + const installed = getInstalledIds(); + console.log(''); + console.log(chalk.bold(' DMind Models')); + console.log(chalk.dim(' ─'.repeat(24))); + console.log(''); + for (const m of AVAILABLE_MODELS) { + const status = installed.includes(m.id) + ? chalk.green(' [installed]') + : ''; + const rec = m.recommended ? chalk.yellow(' ★ recommended') : ''; + console.log(` ${chalk.bold(m.name)} ${chalk.dim(`(${m.params})`)}${rec}${status}`); + console.log(chalk.dim(` https://huggingface.co/${m.hfRepo}`)); + console.log(''); + } +} + +// ─── Install ──────────────────────────────────────────────────────────────── + +export async function installFlow(): Promise { + const py = ensurePython(); + if (!py) return; + if (!(await ensureDeps(py))) return; + + const installed = getInstalledIds(); + const candidates = AVAILABLE_MODELS.filter((m) => !installed.includes(m.id)); + if (candidates.length === 0) { + info('All models are already installed.'); + return; + } + + const defaultModel = candidates.find((m) => m.recommended) ?? candidates[0]; + const model = await select({ + message: 'Select a model to install:', + choices: candidates.map((m) => ({ + name: `${m.name} ${chalk.dim(`(${m.params})`)}${m.recommended ? chalk.yellow(' ★ recommended') : ''}`, + value: m, + })), + default: defaultModel, + }); + + console.log(''); + info(`Downloading ${chalk.bold(model.name)} from Hugging Face…`); + console.log(chalk.dim(` https://huggingface.co/${model.hfRepo}`)); + console.log(''); + + if (!downloadModel(py, model.hfRepo)) { + error('Download failed. Check your network connection and try again.'); + return; + } + + markInstalled(model.id); + console.log(''); + success(`${chalk.bold(model.name)} installed successfully!`); + info(`Start a private chat: ${chalk.cyan('minara private chat')}`); +} + +// ─── Uninstall ────────────────────────────────────────────────────────────── + +export async function uninstallFlow(): Promise { + const installed = getInstalledIds(); + if (installed.length === 0) { + info('No models installed.'); + return; + } + + const choices = installed.map((id) => { + const def = getModelDef(id); + return { name: def ? `${def.name} (${def.params})` : id, value: id }; + }); + + const modelId = await select({ message: 'Select model to uninstall:', choices }); + const def = getModelDef(modelId); + const ok = await confirm({ + message: `Uninstall ${def?.name ?? modelId}?`, + default: false, + }); + if (!ok) return; + + markUninstalled(modelId); + + const py = findPython(); + if (py && def) { + const spin = spinner('Removing cached model files…'); + const cleared = clearModelCache(py, def.hfRepo); + spin.stop(); + if (cleared) { + success(`${def.name} uninstalled and cache cleared.`); + } else { + success(`${def.name} uninstalled.`); + warn('Could not clear HuggingFace cache automatically.'); + console.log(chalk.dim(' Run `huggingface-cli delete-cache` to free disk space.')); + } + } else { + success(`${def?.name ?? modelId} uninstalled.`); + } +} + +// ─── Prerequisite helpers ─────────────────────────────────────────────────── + +function ensurePython(): string | null { + const py = findPython(); + if (!py) { + error('Python 3 is required. Please install Python 3.8+ first.'); + console.log(chalk.dim(' https://www.python.org/downloads/')); + return null; + } + return py; +} + +async function ensureDeps(py: string): Promise { + if (!hasVllm(py)) { + warn('vLLM is not installed.'); + const ok = await confirm({ message: 'Install vLLM now? (pip install vllm)', default: true }); + if (!ok) { + info('Skipped. Install manually: pip install vllm'); + return false; + } + if (!pipInstall(py, 'vllm')) { + error('Failed to install vLLM. Try manually: pip install vllm'); + return false; + } + success('vLLM installed'); + } + + if (!hasHfHub(py)) { + warn('huggingface_hub is not installed.'); + const ok = await confirm({ message: 'Install huggingface_hub now?', default: true }); + if (!ok) { + info('Skipped. Install manually: pip install huggingface_hub'); + return false; + } + if (!pipInstall(py, 'huggingface_hub')) { + error('Failed to install. Try: pip install huggingface_hub'); + return false; + } + success('huggingface_hub installed'); + } + + return true; +} diff --git a/src/commands/private.ts b/src/commands/private.ts new file mode 100644 index 0000000..d2b4de1 --- /dev/null +++ b/src/commands/private.ts @@ -0,0 +1,380 @@ +import { Command } from 'commander'; +import { select } from '@inquirer/prompts'; +import chalk from 'chalk'; +import { createInterface } from 'node:readline'; +import type { ChildProcess } from 'node:child_process'; +import { + VLLM_BASE_URL, VLLM_PORT, VLLM_LOG, + getInstalledIds, getActiveId, getModelDef, setActiveModel, + findPython, isServerRunning, startServerAttached, startServerDetached, + stopServer, waitForServer, getServerInfo, +} from '../local-models.js'; +import { installFlow, uninstallFlow, listModels } from './install.js'; +import { error, info, success, warn, spinner, wrapAction } from '../utils.js'; + +// ─── Main command — interactive menu ──────────────────────────────────────── + +export const privateCommand = new Command('private') + .description('Local DMind models — chat, install, load/unload (powered by vLLM)') + .action(wrapAction(async () => { + const running = await isServerRunning(); + const srvInfo = getServerInfo(); + const installed = getInstalledIds(); + + const serverLabel = running && srvInfo + ? chalk.green(`[ON] ${getModelDef(srvInfo.modelId)?.name ?? srvInfo.hfRepo}`) + : chalk.dim('[OFF]'); + + const action = await select({ + message: 'Private AI — select an action:', + choices: [ + { name: `Chat with local model ${serverLabel}`, value: 'chat' }, + { name: 'Load model ' + chalk.dim('(start vLLM server)'), value: 'load' }, + { name: 'Unload model ' + chalk.dim('(stop vLLM server)'), value: 'unload' }, + { name: 'Status', value: 'status' }, + { name: chalk.dim('────────────────────────'), value: '_sep', disabled: true }, + { name: 'Install model', value: 'install' }, + { name: 'Remove model', value: 'remove' }, + { name: 'List models', value: 'models' }, + ], + }); + + switch (action) { + case 'chat': await chatFlow(); break; + case 'load': await loadFlow(); break; + case 'unload': await unloadFlow(); break; + case 'status': await statusFlow(); break; + case 'install': await installFlow(); break; + case 'remove': await uninstallFlow(); break; + case 'models': listModels(); break; + } + })); + +// ─── Subcommands ──────────────────────────────────────────────────────────── + +privateCommand + .command('chat') + .description('Chat with a locally loaded DMind model') + .argument('[message]', 'Send a single message and exit') + .action(wrapAction(async (messageArg?: string) => { await chatFlow(messageArg); })); + +privateCommand + .command('install') + .description('Download a DMind model from Hugging Face') + .action(wrapAction(async () => { await installFlow(); })); + +privateCommand + .command('remove') + .description('Uninstall a downloaded model') + .action(wrapAction(async () => { await uninstallFlow(); })); + +privateCommand + .command('models') + .description('List available and installed models') + .action(wrapAction(async () => { listModels(); })); + +privateCommand + .command('load') + .description('Load a model into memory (start vLLM server in background)') + .action(wrapAction(async () => { await loadFlow(); })); + +privateCommand + .command('unload') + .description('Unload model from memory (stop vLLM server)') + .action(wrapAction(async () => { await unloadFlow(); })); + +privateCommand + .command('status') + .description('Show current server and model status') + .action(wrapAction(async () => { await statusFlow(); })); + +// ─── Load / Unload / Status flows ────────────────────────────────────────── + +async function loadFlow(): Promise { + const installed = getInstalledIds(); + if (installed.length === 0) { + warn('No models installed.'); + info(`Run ${chalk.cyan('minara private install')} first.`); + return; + } + + if (await isServerRunning()) { + const srv = getServerInfo(); + const name = srv ? getModelDef(srv.modelId)?.name ?? srv.hfRepo : 'unknown'; + warn(`Server already running with ${chalk.bold(name)}.`); + info(`Run ${chalk.cyan('minara private unload')} first to switch models.`); + return; + } + + const py = findPython(); + if (!py) { error('Python 3 is required.'); return; } + + let modelId = getActiveId() ?? installed[0]; + if (installed.length > 1) { + modelId = await select({ + message: 'Select model to load:', + choices: installed.map((id) => { + const def = getModelDef(id); + return { name: def ? `${def.name} ${chalk.dim(`(${def.params})`)}` : id, value: id }; + }), + default: modelId, + }); + } + + const model = getModelDef(modelId); + if (!model) { error('Model not found.'); return; } + setActiveModel(modelId); + + info(`Loading ${chalk.bold(model.name)} in background…`); + console.log(chalk.dim(` Logs: ${VLLM_LOG}`)); + startServerDetached(py, model.id, model.hfRepo); + + const spin = spinner('Starting vLLM server…'); + const ready = await waitForServer(); + spin.stop(); + + if (ready) { + success(`${chalk.bold(model.name)} loaded and serving on port ${VLLM_PORT}`); + info(`Chat: ${chalk.cyan('minara private chat')}`); + info(`Stop: ${chalk.cyan('minara private unload')}`); + } else { + error('Server failed to start in time.'); + console.log(chalk.dim(` Check logs: ${VLLM_LOG}`)); + } +} + +async function unloadFlow(): Promise { + if (!(await isServerRunning())) { + info('No model server is currently running.'); + return; + } + + const srv = getServerInfo(); + const name = srv ? getModelDef(srv.modelId)?.name ?? srv.hfRepo : 'model'; + stopServer(); + + await new Promise((r) => setTimeout(r, 500)); + success(`${chalk.bold(name)} unloaded — server stopped.`); +} + +async function statusFlow(): Promise { + const running = await isServerRunning(); + const srv = getServerInfo(); + const installed = getInstalledIds(); + + console.log(''); + console.log(chalk.bold(' Local Model Status')); + console.log(chalk.dim(' ─'.repeat(24))); + console.log(''); + console.log(` Server ${running ? chalk.green.bold('Running') : chalk.dim('Stopped')}`); + if (running && srv) { + const def = getModelDef(srv.modelId); + console.log(` Model ${chalk.bold(def?.name ?? srv.hfRepo)} ${chalk.dim(`(${def?.params ?? '?'})`)}`); + console.log(` Port ${chalk.cyan(String(VLLM_PORT))}`); + console.log(` PID ${chalk.dim(String(srv.pid))}`); + console.log(` Started ${chalk.dim(srv.startedAt)}`); + } + console.log(` Installed ${installed.length === 0 ? chalk.dim('none') : installed.map((id) => getModelDef(id)?.name ?? id).join(', ')}`); + console.log(` Logs ${chalk.dim(VLLM_LOG)}`); + console.log(''); +} + +// ─── Chat flow ────────────────────────────────────────────────────────────── + +async function chatFlow(messageArg?: string): Promise { + const installed = getInstalledIds(); + if (installed.length === 0) { + warn('No local models installed.'); + info(`Run ${chalk.cyan('minara private install')} to download a DMind model first.`); + return; + } + + const running = await isServerRunning(); + const srv = getServerInfo(); + + // Determine which model to use + let model = srv ? getModelDef(srv.modelId) : undefined; + if (!model) model = getModelDef(getActiveId() ?? installed[0]); + if (!model) { error('Model not found.'); return; } + + // If server is not running, start an attached session + let attachedProc: ChildProcess | null = null; + if (!running) { + const py = findPython(); + if (!py) { error('Python 3 is required to run local models.'); return; } + + info(`No loaded model. Starting ${chalk.bold(model.name)} for this session…`); + console.log(chalk.dim(' Tip: use `minara private load` to keep the server running between chats.')); + console.log(''); + + attachedProc = startServerAttached(py, model.hfRepo); + + let stderrBuf = ''; + attachedProc.stderr?.on('data', (d: Buffer) => { stderrBuf += d.toString(); }); + attachedProc.on('exit', (code) => { + if (code && code !== 0) { + console.log(''); + error('vLLM server exited unexpectedly.'); + if (stderrBuf) console.log(chalk.dim(stderrBuf.slice(-500))); + } + }); + + const spin = spinner('Loading model (this may take a moment)…'); + const ready = await waitForServer(); + spin.stop(); + + if (!ready) { + error('Server did not become ready in time.'); + attachedProc.kill(); + return; + } + success('Model loaded'); + } else { + info(`Using loaded model: ${chalk.bold(model.name)} ${chalk.dim(`(${model.params})`)}`); + } + + const cleanup = () => { + if (attachedProc && !attachedProc.killed) { + attachedProc.kill('SIGTERM'); + } + }; + process.on('SIGINT', cleanup); + process.on('SIGTERM', cleanup); + + try { + // Single-shot mode + if (messageArg) { + await sendAndPrint(model.hfRepo, messageArg); + return; + } + + // Interactive REPL + console.log(''); + console.log( + chalk.green.bold('Private Chat') + chalk.dim(` · ${model.name} (${model.params})`), + ); + console.log(chalk.dim('─'.repeat(50))); + console.log(chalk.dim('Your data stays local. Type a message, Ctrl+C to exit.')); + console.log(''); + + const rl = createInterface({ input: process.stdin, output: process.stdout }); + const ask = (): Promise => + new Promise((resolve) => rl.question(chalk.blue.bold('>>> '), resolve)); + + const history: { role: string; content: string }[] = []; + + rl.on('close', () => { + console.log(chalk.dim('\nGoodbye!')); + cleanup(); + process.exit(0); + }); + + while (true) { + const userMsg = (await ask()).trim(); + if (!userMsg) continue; + if (userMsg.toLowerCase() === 'exit' || userMsg.toLowerCase() === 'quit') { + console.log(chalk.dim('Goodbye!')); + rl.close(); + break; + } + + if (userMsg === '/new') { + history.length = 0; + info('Conversation cleared.'); + continue; + } + + if (userMsg === '/help') { + console.log(''); + console.log(chalk.bold(' Commands:')); + console.log(chalk.dim(' /new ') + 'Clear conversation history'); + console.log(chalk.dim(' exit ') + 'Quit the chat'); + console.log(''); + continue; + } + + history.push({ role: 'user', content: userMsg }); + rl.pause(); + try { + const reply = await sendAndPrint(model.hfRepo, userMsg, history); + if (reply) history.push({ role: 'assistant', content: reply }); + } finally { + rl.resume(); + process.stdout.write('\n'); + } + } + } finally { + cleanup(); + } +} + +// ─── Streaming chat with local vLLM ──────────────────────────────────────── + +async function sendAndPrint( + modelName: string, + message: string, + history?: { role: string; content: string }[], +): Promise { + const messages = history + ? [...history] + : [{ role: 'user', content: message }]; + + process.stdout.write(chalk.green.bold('DMind') + chalk.dim(': ')); + + try { + const res = await fetch(`${VLLM_BASE_URL}/v1/chat/completions`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model: modelName, + messages, + stream: true, + max_tokens: 2048, + }), + }); + + if (!res.ok) { + const body = await res.text(); + console.log(''); + error(`Local model error ${res.status}: ${body}`); + return ''; + } + + const reader = res.body?.getReader(); + if (!reader) { console.log(chalk.dim('(no response)')); return ''; } + + let fullReply = ''; + const decoder = new TextDecoder(); + let buffer = ''; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + + const lines = buffer.split('\n'); + buffer = lines.pop() ?? ''; + + for (const line of lines) { + if (!line.startsWith('data:')) continue; + const data = line.slice(5).trim(); + if (data === '[DONE]') break; + try { + const parsed = JSON.parse(data); + const content = parsed?.choices?.[0]?.delta?.content; + if (content) { + process.stdout.write(content); + fullReply += content; + } + } catch { /* skip malformed chunks */ } + } + } + + console.log('\n'); + return fullReply; + } catch (err) { + console.log(''); + error(err instanceof Error ? err.message : String(err)); + return ''; + } +} diff --git a/src/index.ts b/src/index.ts index 5e18fc7..5edc679 100644 --- a/src/index.ts +++ b/src/index.ts @@ -29,6 +29,9 @@ import { discoverCommand } from './commands/discover.js'; import { premiumCommand } from './commands/premium.js'; import { configCommand } from './commands/config.js'; +// Local Models +import { privateCommand } from './commands/private.js'; + const require = createRequire(import.meta.url); const { version } = require('../package.json') as { version: string }; @@ -84,6 +87,9 @@ program.addCommand(premiumCommand); // ── Config ─────────────────────────────────────────────────────────────── program.addCommand(configCommand); +// ── Local Models ───────────────────────────────────────────────────────── +program.addCommand(privateCommand); + // Default: show help program.action(() => { program.outputHelp(); diff --git a/src/local-models.ts b/src/local-models.ts new file mode 100644 index 0000000..ba1a7e0 --- /dev/null +++ b/src/local-models.ts @@ -0,0 +1,274 @@ +import { join } from 'node:path'; +import { homedir } from 'node:os'; +import { + existsSync, readFileSync, writeFileSync, mkdirSync, unlinkSync, + openSync, closeSync, +} from 'node:fs'; +import { execSync, spawn, spawnSync, type ChildProcess } from 'node:child_process'; + +const MINARA_DIR = join(homedir(), '.minara'); +const MODELS_FILE = join(MINARA_DIR, 'models.json'); +const SERVER_FILE = join(MINARA_DIR, 'vllm-server.json'); +export const VLLM_LOG = join(MINARA_DIR, 'vllm.log'); + +export const VLLM_PORT = 8321; +export const VLLM_BASE_URL = `http://localhost:${VLLM_PORT}`; + +// ─── Model registry ───────────────────────────────────────────────────────── + +export interface ModelDef { + id: string; + name: string; + hfRepo: string; + params: string; + recommended?: boolean; +} + +export const AVAILABLE_MODELS: ModelDef[] = [ + { + id: 'dmind-3-nano', + name: 'DMind-3-nano', + hfRepo: 'DMindAI/DMind-3-nano', + params: '270M', + recommended: true, + }, + { + id: 'dmind-3-mini', + name: 'DMind-3-mini', + hfRepo: 'DMindAI/DMind-3-mini', + params: '4B', + }, + { + id: 'dmind-3', + name: 'DMind-3', + hfRepo: 'DMindAI/DMind-3', + params: '21B', + }, +]; + +// ─── State persistence ────────────────────────────────────────────────────── + +interface ModelsState { + installed: string[]; + active?: string; +} + +function ensureDir(): void { + if (!existsSync(MINARA_DIR)) mkdirSync(MINARA_DIR, { recursive: true, mode: 0o700 }); +} + +function loadState(): ModelsState { + if (!existsSync(MODELS_FILE)) return { installed: [] }; + try { + return JSON.parse(readFileSync(MODELS_FILE, 'utf-8')) as ModelsState; + } catch { + return { installed: [] }; + } +} + +function saveState(s: ModelsState): void { + ensureDir(); + writeFileSync(MODELS_FILE, JSON.stringify(s, null, 2), { mode: 0o600 }); +} + +export function getInstalledIds(): string[] { + return loadState().installed; +} + +export function isInstalled(id: string): boolean { + return loadState().installed.includes(id); +} + +export function getActiveId(): string | undefined { + const s = loadState(); + if (s.installed.length === 0) return undefined; + return s.active && s.installed.includes(s.active) ? s.active : s.installed[0]; +} + +export function getModelDef(id: string): ModelDef | undefined { + return AVAILABLE_MODELS.find((m) => m.id === id); +} + +export function markInstalled(id: string): void { + const s = loadState(); + if (!s.installed.includes(id)) s.installed.push(id); + if (!s.active) s.active = id; + saveState(s); +} + +export function markUninstalled(id: string): void { + const s = loadState(); + s.installed = s.installed.filter((m) => m !== id); + if (s.active === id) s.active = s.installed[0]; + saveState(s); +} + +export function setActiveModel(id: string): void { + const s = loadState(); + if (s.installed.includes(id)) { + s.active = id; + saveState(s); + } +} + +// ─── Prerequisites ────────────────────────────────────────────────────────── + +export function findPython(): string | null { + for (const cmd of ['python3', 'python']) { + try { + const v = execSync(`${cmd} --version`, { + encoding: 'utf-8', + stdio: ['pipe', 'pipe', 'pipe'], + }).trim(); + if (v.includes('3.')) return cmd; + } catch { /* skip */ } + } + return null; +} + +export function hasVllm(py: string): boolean { + try { + execSync(`${py} -c "import vllm"`, { stdio: ['pipe', 'pipe', 'pipe'] }); + return true; + } catch { + return false; + } +} + +export function hasHfHub(py: string): boolean { + try { + execSync(`${py} -c "import huggingface_hub"`, { stdio: ['pipe', 'pipe', 'pipe'] }); + return true; + } catch { + return false; + } +} + +export function pipInstall(py: string, pkg: string): boolean { + const r = spawnSync(py, ['-m', 'pip', 'install', pkg], { stdio: 'inherit' }); + return r.status === 0; +} + +// ─── Model download / cache ───────────────────────────────────────────────── + +export function downloadModel(py: string, hfRepo: string): boolean { + const r = spawnSync(py, [ + '-c', + `from huggingface_hub import snapshot_download; snapshot_download('${hfRepo}')`, + ], { stdio: 'inherit' }); + return r.status === 0; +} + +export function clearModelCache(py: string, hfRepo: string): boolean { + try { + execSync(`${py} -c "\ +from huggingface_hub import scan_cache_dir;\ +c=scan_cache_dir();\ +h=[r.commit_hash for repo in c.repos if repo.repo_id=='${hfRepo}' for r in repo.revisions];\ +c.delete_revisions(*h).execute() if h else None\ +"`, { stdio: ['pipe', 'pipe', 'pipe'] }); + return true; + } catch { + return false; + } +} + +// ─── Server info ──────────────────────────────────────────────────────────── + +export interface ServerInfo { + pid: number; + modelId: string; + hfRepo: string; + startedAt: string; +} + +export function getServerInfo(): ServerInfo | null { + if (!existsSync(SERVER_FILE)) return null; + try { + return JSON.parse(readFileSync(SERVER_FILE, 'utf-8')) as ServerInfo; + } catch { + return null; + } +} + +function saveServerInfo(info: ServerInfo): void { + ensureDir(); + writeFileSync(SERVER_FILE, JSON.stringify(info, null, 2), { mode: 0o600 }); +} + +function clearServerInfo(): void { + try { unlinkSync(SERVER_FILE); } catch { /* ignore */ } +} + +// ─── vLLM server lifecycle ────────────────────────────────────────────────── + +export async function isServerRunning(): Promise { + try { + const res = await fetch(`${VLLM_BASE_URL}/v1/models`, { + signal: AbortSignal.timeout(2000), + }); + return res.ok; + } catch { + return false; + } +} + +/** Start vLLM attached to current process (auto-stops when parent exits). */ +export function startServerAttached(py: string, hfRepo: string): ChildProcess { + return spawn(py, [ + '-m', 'vllm.entrypoints.openai.api_server', + '--model', hfRepo, + '--port', String(VLLM_PORT), + '--host', '0.0.0.0', + '--trust-remote-code', + ], { + stdio: ['ignore', 'pipe', 'pipe'], + detached: false, + }); +} + +/** Start vLLM as a detached background process (survives CLI exit). */ +export function startServerDetached(py: string, modelId: string, hfRepo: string): number | null { + ensureDir(); + const out = openSync(VLLM_LOG, 'a'); + const err = openSync(VLLM_LOG, 'a'); + + const child = spawn(py, [ + '-m', 'vllm.entrypoints.openai.api_server', + '--model', hfRepo, + '--port', String(VLLM_PORT), + '--host', '0.0.0.0', + '--trust-remote-code', + ], { + stdio: ['ignore', out, err], + detached: true, + }); + + child.unref(); + closeSync(out); + closeSync(err); + + if (child.pid) { + saveServerInfo({ pid: child.pid, modelId, hfRepo, startedAt: new Date().toISOString() }); + } + + return child.pid ?? null; +} + +/** Stop the background vLLM server. */ +export function stopServer(): void { + const info = getServerInfo(); + if (info) { + try { process.kill(info.pid, 'SIGTERM'); } catch { /* already dead */ } + } + clearServerInfo(); +} + +export async function waitForServer(timeoutMs = 120_000): Promise { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + if (await isServerRunning()) return true; + await new Promise((r) => setTimeout(r, 2000)); + } + return false; +} From c35c6814bc3db61b93b0c3a2cbb0f853371c0e30 Mon Sep 17 00:00:00 2001 From: yyh1102 Date: Wed, 25 Feb 2026 16:32:59 +0800 Subject: [PATCH 2/3] fix: improve local model compatibility and stability - Auto-detect and fix x86_64 native deps on Apple Silicon (arm64) - Resolve model path for repos with non-root config (DMind-3-nano model/ subdir) - Add system prompt to prevent function call tag generation in chat - Add 60s response timeout and stop sequences for function call tags - Display system architecture during prerequisite checks --- src/commands/assets.ts | 9 +++ src/commands/install.ts | 13 +++++ src/commands/private.ts | 22 ++++++-- src/local-models.ts | 122 ++++++++++++++++++++++++++++++++++++++-- 4 files changed, 155 insertions(+), 11 deletions(-) diff --git a/src/commands/assets.ts b/src/commands/assets.ts index 9afad3c..f313d7a 100644 --- a/src/commands/assets.ts +++ b/src/commands/assets.ts @@ -38,6 +38,9 @@ async function showSpotAssets(token: string): Promise { let totalUnrealizedPnl = 0; let hasUnrealizedPnl = false; + const STABLECOINS = new Set(['USDC', 'USDT']); + let stablecoinBalance = 0; + for (const t of all) { const bal = Number(t.balance ?? 0); const price = Number(t.marketPrice ?? 0); @@ -53,6 +56,11 @@ async function showSpotAssets(token: string): Promise { hasUnrealizedPnl = true; } + const sym = String(t.tokenSymbol ?? '').toUpperCase(); + if (STABLECOINS.has(sym)) { + stablecoinBalance += bal; + } + if (bal > 0 && value >= MIN_DISPLAY_VALUE) { holdings.push({ ...t, _value: value }); } @@ -67,6 +75,7 @@ async function showSpotAssets(token: string): Promise { console.log(''); console.log(chalk.bold('Spot Wallet:')); + console.log(` Balance (USDC+USDT) : ${fmt(stablecoinBalance)}`); console.log(` Portfolio Value : ${fmt(totalValue)}`); console.log(` Unrealized PnL : ${pnlFmt(totalUnrealizedPnl)}`); console.log(` Realized PnL : ${pnlFmt(totalRealizedPnl)}`); diff --git a/src/commands/install.ts b/src/commands/install.ts index 4f9e726..adda940 100644 --- a/src/commands/install.ts +++ b/src/commands/install.ts @@ -5,6 +5,7 @@ import { markInstalled, markUninstalled, findPython, hasVllm, hasHfHub, pipInstall, downloadModel, clearModelCache, + isAppleSilicon, getArchLabel, fixNativeDeps, } from '../local-models.js'; import { error, info, success, warn, spinner } from '../utils.js'; @@ -117,6 +118,7 @@ function ensurePython(): string | null { console.log(chalk.dim(' https://www.python.org/downloads/')); return null; } + info(`Python found · ${chalk.dim(getArchLabel())}`); return py; } @@ -149,5 +151,16 @@ async function ensureDeps(py: string): Promise { success('huggingface_hub installed'); } + // On Apple Silicon, scan all native extensions and fix x86_64 mismatches + if (isAppleSilicon()) { + info('Scanning native extensions for arm64 compatibility…'); + const fixed = fixNativeDeps(py); + if (fixed.length > 0) { + success(`Fixed ${fixed.length} package(s) for arm64: ${chalk.dim(fixed.join(', '))}`); + } else { + success('All native extensions are arm64 compatible'); + } + } + return true; } diff --git a/src/commands/private.ts b/src/commands/private.ts index d2b4de1..8aaf5fb 100644 --- a/src/commands/private.ts +++ b/src/commands/private.ts @@ -7,7 +7,7 @@ import { VLLM_BASE_URL, VLLM_PORT, VLLM_LOG, getInstalledIds, getActiveId, getModelDef, setActiveModel, findPython, isServerRunning, startServerAttached, startServerDetached, - stopServer, waitForServer, getServerInfo, + stopServer, waitForServer, getServerInfo, resolveModelPath, } from '../local-models.js'; import { installFlow, uninstallFlow, listModels } from './install.js'; import { error, info, success, warn, spinner, wrapAction } from '../utils.js'; @@ -125,9 +125,13 @@ async function loadFlow(): Promise { if (!model) { error('Model not found.'); return; } setActiveModel(modelId); + const modelPath = resolveModelPath(py, model); + if (!modelPath) { error('Could not resolve model path from HuggingFace cache.'); return; } + info(`Loading ${chalk.bold(model.name)} in background…`); + console.log(chalk.dim(` Path: ${modelPath}`)); console.log(chalk.dim(` Logs: ${VLLM_LOG}`)); - startServerDetached(py, model.id, model.hfRepo); + startServerDetached(py, model.id, modelPath); const spin = spinner('Starting vLLM server…'); const ready = await waitForServer(); @@ -197,17 +201,25 @@ async function chatFlow(messageArg?: string): Promise { if (!model) model = getModelDef(getActiveId() ?? installed[0]); if (!model) { error('Model not found.'); return; } + // vLLM API requires the model name that was used to start the server + // (which is the resolved local path when subdir is used) + let vllmModelName = srv?.hfRepo ?? model.hfRepo; + // If server is not running, start an attached session let attachedProc: ChildProcess | null = null; if (!running) { const py = findPython(); if (!py) { error('Python 3 is required to run local models.'); return; } + const modelPath = resolveModelPath(py, model); + if (!modelPath) { error('Could not resolve model path from HuggingFace cache.'); return; } + vllmModelName = modelPath; + info(`No loaded model. Starting ${chalk.bold(model.name)} for this session…`); console.log(chalk.dim(' Tip: use `minara private load` to keep the server running between chats.')); console.log(''); - attachedProc = startServerAttached(py, model.hfRepo); + attachedProc = startServerAttached(py, modelPath); let stderrBuf = ''; attachedProc.stderr?.on('data', (d: Buffer) => { stderrBuf += d.toString(); }); @@ -244,7 +256,7 @@ async function chatFlow(messageArg?: string): Promise { try { // Single-shot mode if (messageArg) { - await sendAndPrint(model.hfRepo, messageArg); + await sendAndPrint(vllmModelName, messageArg); return; } @@ -296,7 +308,7 @@ async function chatFlow(messageArg?: string): Promise { history.push({ role: 'user', content: userMsg }); rl.pause(); try { - const reply = await sendAndPrint(model.hfRepo, userMsg, history); + const reply = await sendAndPrint(vllmModelName, userMsg, history); if (reply) history.push({ role: 'assistant', content: reply }); } finally { rl.resume(); diff --git a/src/local-models.ts b/src/local-models.ts index ba1a7e0..7813dd1 100644 --- a/src/local-models.ts +++ b/src/local-models.ts @@ -1,5 +1,5 @@ import { join } from 'node:path'; -import { homedir } from 'node:os'; +import { homedir, platform, arch } from 'node:os'; import { existsSync, readFileSync, writeFileSync, mkdirSync, unlinkSync, openSync, closeSync, @@ -21,6 +21,8 @@ export interface ModelDef { name: string; hfRepo: string; params: string; + /** Subdirectory inside the repo where config.json / model weights live */ + subdir?: string; recommended?: boolean; } @@ -30,6 +32,7 @@ export const AVAILABLE_MODELS: ModelDef[] = [ name: 'DMind-3-nano', hfRepo: 'DMindAI/DMind-3-nano', params: '270M', + subdir: 'model', recommended: true, }, { @@ -149,6 +152,95 @@ export function pipInstall(py: string, pkg: string): boolean { return r.status === 0; } +// ─── Architecture ─────────────────────────────────────────────────────────── + +export function isAppleSilicon(): boolean { + return platform() === 'darwin' && arch() === 'arm64'; +} + +export function getArchLabel(): string { + const a = arch(); + const p = platform(); + if (p === 'darwin') return a === 'arm64' ? 'Apple Silicon (arm64)' : `macOS (${a})`; + if (p === 'linux') return `Linux (${a})`; + return `${p} (${a})`; +} + +/** Map Python module directory names to pip package names for common cases */ +const MOD_TO_PIP: Record = { + charset_normalizer: 'charset-normalizer', + PIL: 'pillow', + cv2: 'opencv-python-headless', + yaml: 'pyyaml', + zmq: 'pyzmq', + _cffi_backend: 'cffi', + grpc: 'grpcio', + sklearn: 'scikit-learn', + skimage: 'scikit-image', +}; + +/** + * On arm64 macOS, pre-existing x86_64 native extensions cause ImportError. + * Iteratively try to import `targetModule`, detect which dependency has + * an arch-mismatched or broken .so file, fix it, and retry — until the + * import succeeds or no more fixable errors are found. + * Only fixes packages actually needed by the target, not the entire env. + */ +export function fixArchForImport(py: string, targetModule: string): string[] { + if (!isAppleSilicon()) return []; + + const fixed: string[] = []; + const seen = new Set(); + + for (let i = 0; i < 30; i++) { + try { + execSync(`${py} -c "import ${targetModule}"`, { + stdio: ['pipe', 'pipe', 'pipe'], + timeout: 30_000, + }); + break; + } catch (err: unknown) { + const stderr = (err as { stderr?: Buffer })?.stderr?.toString() ?? ''; + const isArchError = + stderr.includes('incompatible architecture') || + stderr.includes('x86_64') || + stderr.includes('has not been built correctly'); + + if (!isArchError) break; + + // Extract the module directory from the .so path or import traceback + const soMatch = + stderr.match(/site-packages\/([^/]+)\//) || + stderr.match(/that ([\w-]+) has not been built/); + if (!soMatch) break; + + const modDir = soMatch[1]; + if (seen.has(modDir)) break; + seen.add(modDir); + + const pipName = MOD_TO_PIP[modDir] ?? modDir.replace(/_/g, '-'); + const r = spawnSync(py, ['-m', 'pip', 'install', '--force-reinstall', '--no-cache-dir', pipName], { + stdio: 'inherit', + }); + if (r.status === 0) fixed.push(pipName); + else break; + } + } + return fixed; +} + +/** + * Fix native deps for all modules required by the local model stack. + */ +export function fixNativeDeps(py: string): string[] { + if (!isAppleSilicon()) return []; + const all: string[] = []; + for (const mod of ['vllm', 'huggingface_hub']) { + all.push(...fixArchForImport(py, mod)); + } + return all; +} + // ─── Model download / cache ───────────────────────────────────────────────── export function downloadModel(py: string, hfRepo: string): boolean { @@ -159,6 +251,24 @@ export function downloadModel(py: string, hfRepo: string): boolean { return r.status === 0; } +/** + * Resolve the local filesystem path for a downloaded model. + * For repos with a `subdir`, returns the path to that subdirectory. + */ +export function resolveModelPath(py: string, model: ModelDef): string | null { + const suffix = model.subdir ? `, '${model.subdir}'` : ''; + const script = `from huggingface_hub import snapshot_download; import os; p=snapshot_download('${model.hfRepo}'); print(os.path.join(p${suffix}))`; + try { + return execSync(`${py} -c "${script}"`, { + encoding: 'utf-8', + stdio: ['pipe', 'pipe', 'pipe'], + timeout: 30_000, + }).trim(); + } catch { + return null; + } +} + export function clearModelCache(py: string, hfRepo: string): boolean { try { execSync(`${py} -c "\ @@ -214,10 +324,10 @@ export async function isServerRunning(): Promise { } /** Start vLLM attached to current process (auto-stops when parent exits). */ -export function startServerAttached(py: string, hfRepo: string): ChildProcess { +export function startServerAttached(py: string, modelPath: string): ChildProcess { return spawn(py, [ '-m', 'vllm.entrypoints.openai.api_server', - '--model', hfRepo, + '--model', modelPath, '--port', String(VLLM_PORT), '--host', '0.0.0.0', '--trust-remote-code', @@ -228,14 +338,14 @@ export function startServerAttached(py: string, hfRepo: string): ChildProcess { } /** Start vLLM as a detached background process (survives CLI exit). */ -export function startServerDetached(py: string, modelId: string, hfRepo: string): number | null { +export function startServerDetached(py: string, modelId: string, modelPath: string): number | null { ensureDir(); const out = openSync(VLLM_LOG, 'a'); const err = openSync(VLLM_LOG, 'a'); const child = spawn(py, [ '-m', 'vllm.entrypoints.openai.api_server', - '--model', hfRepo, + '--model', modelPath, '--port', String(VLLM_PORT), '--host', '0.0.0.0', '--trust-remote-code', @@ -249,7 +359,7 @@ export function startServerDetached(py: string, modelId: string, hfRepo: string) closeSync(err); if (child.pid) { - saveServerInfo({ pid: child.pid, modelId, hfRepo, startedAt: new Date().toISOString() }); + saveServerInfo({ pid: child.pid, modelId, hfRepo: modelPath, startedAt: new Date().toISOString() }); } return child.pid ?? null; From 0c003a504d6c1253b8d22f6f862ca60e09faaa7e Mon Sep 17 00:00:00 2001 From: yyh1102 Date: Thu, 26 Feb 2026 15:59:13 +0800 Subject: [PATCH 3/3] feat: add local model update check and explicit refresh flow Add Hugging Face revision checks for installed local models with cached lookups, introduce explicit minara private update refresh behavior, and surface update notices in load/chat without auto-updating. --- README.md | 28 +++++- src/commands/chat.ts | 10 +++ src/commands/private.ts | 161 +++++++++++++++++++++++++++++++++- src/local-models.ts | 186 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 383 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6186b3c..14584d5 100644 --- a/README.md +++ b/README.md @@ -214,6 +214,32 @@ Minara: Bitcoin is currently trading at $95,432... exit Quit the chat ``` +### Local Models (DMind / Hugging Face) + +| Command | Description | +| ------------------------ | ---------------------------------------------------------- | +| `minara private` | Open interactive local-model menu | +| `minara private install` | Download a DMind model from Hugging Face | +| `minara private models` | List available/installed local models | +| `minara private load` | Load selected model into vLLM server | +| `minara private chat` | Chat with local model | +| `minara private check` | Check whether installed local models have newer HF revision | +| `minara private update` | Explicitly update an installed local model | +| `minara private unload` | Stop local vLLM server | +| `minara private status` | Show local model server status | + +```bash +minara private # Interactive menu +minara private install # Install local model from Hugging Face +minara private check # Check model revisions (local vs remote) +minara private update # Explicitly update selected installed model +minara private load # Start local model server +minara private chat # Chat locally +minara private unload # Stop local model server +``` + +> **Update behavior:** `load` / `chat` only show an update notice when a newer model revision exists. Models are updated only when you explicitly run `minara private update`. + ### Market Discovery | Command | Description | @@ -361,4 +387,4 @@ npm run test:coverage # With coverage report ## License -[MIT](LICENSE) +[MIT](LICENSE) \ No newline at end of file diff --git a/src/commands/chat.ts b/src/commands/chat.ts index 3af2ccf..1695876 100644 --- a/src/commands/chat.ts +++ b/src/commands/chat.ts @@ -191,6 +191,13 @@ export const chatCommand = new Command('chat') console.log(''); const rl = createInterface({ input: process.stdin, output: process.stdout }); + let exiting = false; + + const handleSigint = () => { + if (exiting) return; + exiting = true; + rl.close(); + }; async function sendAndPrintWithPause(msg: string) { rl.pause(); @@ -205,7 +212,10 @@ export const chatCommand = new Command('chat') const ask = (): Promise => new Promise((resolve) => rl.question(chalk.blue.bold('>>> '), resolve)); + rl.on('SIGINT', handleSigint); + process.on('SIGINT', handleSigint); rl.on('close', () => { + process.off('SIGINT', handleSigint); console.log(chalk.dim('\nGoodbye!')); process.exit(0); }); diff --git a/src/commands/private.ts b/src/commands/private.ts index 8aaf5fb..0a0402b 100644 --- a/src/commands/private.ts +++ b/src/commands/private.ts @@ -1,5 +1,5 @@ import { Command } from 'commander'; -import { select } from '@inquirer/prompts'; +import { select, confirm } from '@inquirer/prompts'; import chalk from 'chalk'; import { createInterface } from 'node:readline'; import type { ChildProcess } from 'node:child_process'; @@ -8,6 +8,8 @@ import { getInstalledIds, getActiveId, getModelDef, setActiveModel, findPython, isServerRunning, startServerAttached, startServerDetached, stopServer, waitForServer, getServerInfo, resolveModelPath, + checkInstalledModelUpdates, checkModelUpdate, + hasHfHub, pipInstall, clearModelCache, downloadModel, } from '../local-models.js'; import { installFlow, uninstallFlow, listModels } from './install.js'; import { error, info, success, warn, spinner, wrapAction } from '../utils.js'; @@ -36,6 +38,8 @@ export const privateCommand = new Command('private') { name: 'Install model', value: 'install' }, { name: 'Remove model', value: 'remove' }, { name: 'List models', value: 'models' }, + { name: 'Check model updates', value: 'check' }, + { name: 'Update model', value: 'update' }, ], }); @@ -47,6 +51,8 @@ export const privateCommand = new Command('private') case 'install': await installFlow(); break; case 'remove': await uninstallFlow(); break; case 'models': listModels(); break; + case 'check': await updatesFlow(); break; + case 'update': await updateFlow(); break; } })); @@ -88,6 +94,17 @@ privateCommand .description('Show current server and model status') .action(wrapAction(async () => { await statusFlow(); })); +privateCommand + .command('check') + .alias('updates') + .description('Check installed local models for Hugging Face updates') + .action(wrapAction(async () => { await updatesFlow(); })); + +privateCommand + .command('update') + .description('Update an installed local model from Hugging Face') + .action(wrapAction(async () => { await updateFlow(); })); + // ─── Load / Unload / Status flows ────────────────────────────────────────── async function loadFlow(): Promise { @@ -124,6 +141,7 @@ async function loadFlow(): Promise { const model = getModelDef(modelId); if (!model) { error('Model not found.'); return; } setActiveModel(modelId); + await maybeWarnModelUpdate(model.id); const modelPath = resolveModelPath(py, model); if (!modelPath) { error('Could not resolve model path from HuggingFace cache.'); return; } @@ -183,6 +201,146 @@ async function statusFlow(): Promise { console.log(''); } +function shortSha(v?: string): string { + return v ? v.slice(0, 12) : '—'; +} + +async function updatesFlow(): Promise { + const installed = getInstalledIds(); + if (installed.length === 0) { + info('No local models installed.'); + info(`Run ${chalk.cyan('minara private install')} first.`); + return; + } + + const py = findPython(); + if (!py) { + error('Python 3 is required to check local model revisions.'); + return; + } + + const spin = spinner('Checking Hugging Face for model updates…'); + const results = await checkInstalledModelUpdates(py); + spin.stop(); + + if (results.length === 0) { + info('No installed models found in state.'); + return; + } + + const updatable = results.filter((r) => r.hasUpdate); + + console.log(''); + console.log(chalk.bold(' Model Update Check')); + console.log(chalk.dim(' ─'.repeat(24))); + console.log(''); + + for (const r of results) { + const status = r.hasUpdate + ? chalk.yellow.bold('Update available') + : r.error + ? chalk.red('Check failed') + : chalk.green('Up-to-date'); + + console.log(` ${chalk.bold(r.modelName)} ${chalk.dim(`(${r.hfRepo})`)}`); + console.log(` Status : ${status}`); + console.log(` Local : ${chalk.dim(shortSha(r.localRevision))}`); + console.log(` Remote : ${chalk.dim(shortSha(r.remoteRevision))}`); + if (r.error) { + console.log(` Note : ${chalk.dim(r.error)}`); + } + console.log(''); + } + + if (updatable.length > 0) { + info(`${updatable.length} model(s) can be refreshed from Hugging Face.`); + console.log(chalk.dim(` Update with: ${chalk.cyan('minara private update')}`)); + } else { + success('All installed local models are up-to-date.'); + } +} + +async function ensureHfHubReady(py: string): Promise { + if (hasHfHub(py)) return true; + warn('huggingface_hub is not installed.'); + const ok = await confirm({ message: 'Install huggingface_hub now?', default: true }); + if (!ok) return false; + if (!pipInstall(py, 'huggingface_hub')) { + error('Failed to install huggingface_hub.'); + return false; + } + success('huggingface_hub installed'); + return true; +} + +async function updateFlow(): Promise { + const installed = getInstalledIds(); + if (installed.length === 0) { + info('No local models installed.'); + info(`Run ${chalk.cyan('minara private install')} first.`); + return; + } + + const py = findPython(); + if (!py) { + error('Python 3 is required.'); + return; + } + if (!(await ensureHfHubReady(py))) return; + + const checkSpin = spinner('Checking which models have updates…'); + const results = await checkInstalledModelUpdates(py); + checkSpin.stop(); + + const candidates = results.filter((r) => r.hasUpdate); + if (candidates.length === 0) { + success('All installed local models are up-to-date.'); + return; + } + + const selected = await select({ + message: 'Select model to update:', + choices: candidates.map((r) => ({ + name: `${r.modelName} ${chalk.dim(`(${shortSha(r.localRevision)} -> ${shortSha(r.remoteRevision)})`)}`, + value: r.modelId, + })), + }); + + const model = getModelDef(selected); + if (!model) { + error('Model not found.'); + return; + } + + const ok = await confirm({ + message: `Update ${model.name} now? This may take a while and consume bandwidth.`, + default: true, + }); + if (!ok) return; + + const spin = spinner(`Updating ${model.name} from Hugging Face…`); + clearModelCache(py, model.hfRepo); + const downloaded = downloadModel(py, model.hfRepo); + spin.stop(); + + if (!downloaded) { + error(`Failed to update ${model.name}.`); + return; + } + + success(`${model.name} updated successfully.`); +} + +async function maybeWarnModelUpdate(modelId: string): Promise { + const py = findPython(); + if (!py) return; + const infoRes = await checkModelUpdate(py, modelId); + if (infoRes?.hasUpdate) { + warn(`A newer Hugging Face revision is available for ${chalk.bold(infoRes.modelName)}.`); + info(`Run ${chalk.cyan('minara private update')} to update explicitly.`); + } +} + // ─── Chat flow ────────────────────────────────────────────────────────────── async function chatFlow(messageArg?: string): Promise { @@ -200,6 +358,7 @@ async function chatFlow(messageArg?: string): Promise { let model = srv ? getModelDef(srv.modelId) : undefined; if (!model) model = getModelDef(getActiveId() ?? installed[0]); if (!model) { error('Model not found.'); return; } + await maybeWarnModelUpdate(model.id); // vLLM API requires the model name that was used to start the server // (which is the resolved local path when subdir is used) diff --git a/src/local-models.ts b/src/local-models.ts index 7813dd1..3e7c001 100644 --- a/src/local-models.ts +++ b/src/local-models.ts @@ -9,6 +9,7 @@ import { execSync, spawn, spawnSync, type ChildProcess } from 'node:child_proces const MINARA_DIR = join(homedir(), '.minara'); const MODELS_FILE = join(MINARA_DIR, 'models.json'); const SERVER_FILE = join(MINARA_DIR, 'vllm-server.json'); +const MODEL_UPDATE_CACHE_FILE = join(MINARA_DIR, 'model-update-check.json'); export const VLLM_LOG = join(MINARA_DIR, 'vllm.log'); export const VLLM_PORT = 8321; @@ -49,6 +50,28 @@ export const AVAILABLE_MODELS: ModelDef[] = [ }, ]; +export interface ModelUpdateInfo { + modelId: string; + modelName: string; + hfRepo: string; + localRevision?: string; + remoteRevision?: string; + hasUpdate: boolean; + error?: string; +} + +interface ModelUpdateCacheEntry { + checkedAt: number; + localRevision?: string; + remoteRevision?: string; + hasUpdate: boolean; + error?: string; +} + +interface ModelUpdateCache { + byModelId: Record; +} + // ─── State persistence ────────────────────────────────────────────────────── interface ModelsState { @@ -283,6 +306,169 @@ c.delete_revisions(*h).execute() if h else None\ } } +function loadModelUpdateCache(): ModelUpdateCache { + if (!existsSync(MODEL_UPDATE_CACHE_FILE)) return { byModelId: {} }; + try { + return JSON.parse(readFileSync(MODEL_UPDATE_CACHE_FILE, 'utf-8')) as ModelUpdateCache; + } catch { + return { byModelId: {} }; + } +} + +function saveModelUpdateCache(cache: ModelUpdateCache): void { + ensureDir(); + writeFileSync(MODEL_UPDATE_CACHE_FILE, JSON.stringify(cache, null, 2), { mode: 0o600 }); +} + +function getLocalCachedRevision(py: string, hfRepo: string): string | null { + const script = ` +import sys +from huggingface_hub import scan_cache_dir + +repo_id = sys.argv[1] +cache = scan_cache_dir() +repos = [r for r in cache.repos if r.repo_id == repo_id] +if not repos: + print("") + raise SystemExit(0) + +revisions = [] +for repo in repos: + revisions.extend(repo.revisions) + +if not revisions: + print("") + raise SystemExit(0) + +revisions.sort(key=lambda r: r.last_modified or 0, reverse=True) +print(revisions[0].commit_hash or "") +`.trim(); + + const r = spawnSync(py, ['-c', script, hfRepo], { + encoding: 'utf-8', + stdio: ['pipe', 'pipe', 'pipe'], + }); + + if (r.status !== 0) return null; + const out = (r.stdout ?? '').trim(); + return out || null; +} + +async function getRemoteLatestRevision(hfRepo: string): Promise { + const safeRepo = hfRepo.split('/').map((s) => encodeURIComponent(s)).join('/'); + try { + const res = await fetch(`https://huggingface.co/api/models/${safeRepo}`, { + headers: { Accept: 'application/json' }, + signal: AbortSignal.timeout(5000), + }); + if (!res.ok) return null; + const data = (await res.json()) as { sha?: unknown }; + return typeof data.sha === 'string' && data.sha ? data.sha : null; + } catch { + return null; + } +} + +/** + * Check one installed model for updates. + * Uses a local cache to avoid frequent Hugging Face API calls. + */ +export async function checkModelUpdate( + py: string, + modelId: string, + maxAgeMs = 4 * 60 * 60 * 1000, +): Promise { + const model = getModelDef(modelId); + if (!model) return null; + + const localRevision = getLocalCachedRevision(py, model.hfRepo) ?? undefined; + const cache = loadModelUpdateCache(); + const cached = cache.byModelId[model.id]; + + if ( + cached && + Date.now() - cached.checkedAt < maxAgeMs && + cached.localRevision === localRevision + ) { + return { + modelId: model.id, + modelName: model.name, + hfRepo: model.hfRepo, + localRevision, + remoteRevision: cached.remoteRevision, + hasUpdate: cached.hasUpdate, + error: cached.error, + }; + } + + const remoteRevision = await getRemoteLatestRevision(model.hfRepo) ?? undefined; + + let result: ModelUpdateInfo; + if (!remoteRevision) { + result = { + modelId: model.id, + modelName: model.name, + hfRepo: model.hfRepo, + localRevision, + hasUpdate: false, + error: 'Could not fetch latest remote revision', + }; + } else if (!localRevision) { + result = { + modelId: model.id, + modelName: model.name, + hfRepo: model.hfRepo, + remoteRevision, + hasUpdate: false, + error: 'No local cached revision found', + }; + } else { + result = { + modelId: model.id, + modelName: model.name, + hfRepo: model.hfRepo, + localRevision, + remoteRevision, + hasUpdate: localRevision !== remoteRevision, + }; + } + + cache.byModelId[model.id] = { + checkedAt: Date.now(), + localRevision: result.localRevision, + remoteRevision: result.remoteRevision, + hasUpdate: result.hasUpdate, + error: result.error, + }; + saveModelUpdateCache(cache); + + return result; +} + +/** + * Compare locally cached Hugging Face model revisions with remote latest sha. + * Returns one entry per installed model. + */ +export async function checkInstalledModelUpdates(py: string): Promise { + const installedIds = getInstalledIds(); + const models = installedIds + .map((id) => getModelDef(id)) + .filter((m): m is ModelDef => Boolean(m)); + + const checks = models.map(async (model): Promise => { + const info = await checkModelUpdate(py, model.id, 0); + return info ?? { + modelId: model.id, + modelName: model.name, + hfRepo: model.hfRepo, + hasUpdate: false, + error: 'Model not found', + }; + }); + + return Promise.all(checks); +} + // ─── Server info ──────────────────────────────────────────────────────────── export interface ServerInfo {