From d507a416702151ecc1e735068cccd1af0c6b0802 Mon Sep 17 00:00:00 2001 From: Steven Enamakel Date: Sat, 16 May 2026 15:11:51 -0700 Subject: [PATCH 01/18] Refactor inference around external Ollama routing --- .../local-model/DeviceCapabilitySection.tsx | 51 +-- .../local-model/ModelDownloadSection.tsx | 18 +- .../local-model/ModelStatusSection.test.tsx | 227 +++++++++--- .../panels/local-model/ModelStatusSection.tsx | 200 ++--------- app/src/utils/tauriCommands/localAi.ts | 13 +- .../e2e/specs/local-model-runtime.spec.ts | 26 +- src/core/all.rs | 4 + src/core/cli_tests.rs | 1 + src/openhuman/app_state/ops.rs | 2 +- .../channels/providers/presentation.rs | 3 +- src/openhuman/inference/mod.rs | 15 + src/openhuman/inference/ops.rs | 89 +++++ src/openhuman/inference/ops_tests.rs | 96 ++++++ src/openhuman/inference/schemas.rs | 325 ++++++++++++++++++ src/openhuman/inference/schemas_tests.rs | 78 +++++ src/openhuman/local_ai/mod.rs | 1 + src/openhuman/local_ai/ops.rs | 80 ++--- src/openhuman/local_ai/schemas.rs | 41 +-- src/openhuman/local_ai/schemas_tests.rs | 10 +- src/openhuman/local_ai/service/assets.rs | 16 +- src/openhuman/local_ai/service/bootstrap.rs | 30 +- .../local_ai/service/ollama_admin.rs | 81 +---- .../local_ai/service/ollama_admin_tests.rs | 113 +++++- src/openhuman/local_ai/types.rs | 8 +- src/openhuman/mod.rs | 1 + src/openhuman/subconscious/executor.rs | 2 +- tests/json_rpc_e2e.rs | 180 ++++++++++ 27 files changed, 1207 insertions(+), 504 deletions(-) create mode 100644 src/openhuman/inference/mod.rs create mode 100644 src/openhuman/inference/ops.rs create mode 100644 src/openhuman/inference/ops_tests.rs create mode 100644 src/openhuman/inference/schemas.rs create mode 100644 src/openhuman/inference/schemas_tests.rs diff --git a/app/src/components/settings/panels/local-model/DeviceCapabilitySection.tsx b/app/src/components/settings/panels/local-model/DeviceCapabilitySection.tsx index b910d2e2f9..8fdf648319 100644 --- a/app/src/components/settings/panels/local-model/DeviceCapabilitySection.tsx +++ b/app/src/components/settings/panels/local-model/DeviceCapabilitySection.tsx @@ -14,30 +14,15 @@ interface DeviceCapabilitySectionProps { formatRamGb: (bytes: number) => string; onPresetApplied?: (result: ApplyPresetResult) => void; /** - * When `false`, the Ollama runtime isn't installed yet. Local tiers - * require Ollama, so they're rendered disabled with a notice that - * lets the user install Ollama in place. The "Disabled (cloud - * fallback)" option stays enabled since it doesn't need Ollama. + * When `false`, the external Ollama runtime isn't reachable yet. Local tiers + * stay disabled until the user runs Ollama themselves. The "Disabled (cloud + * fallback)" option stays enabled since it doesn't depend on Ollama. */ ollamaAvailable?: boolean; - /** - * Triggers the same install pipeline the Runtime Status section uses. - * Wired only when `ollamaAvailable === false` to surface an inline - * Install Ollama button next to the locked tiers. - */ onTriggerOllamaInstall?: () => void; - /** True while an install pipeline is already running. */ isTriggeringInstall?: boolean; - /** - * Live state from `local_ai_status` so the notice can show real install - * progress: `installing`, `downloading`, `degraded`, etc. The button's - * own `isTriggeringInstall` only covers the RPC round-trip (~ms); - * `installState` covers the entire backend pipeline (~60s). - */ installState?: string; - /** Latest `status.warning` text — shown under the progress label. */ installWarning?: string | null; - /** Latest `status.error_detail` — shown when state is `degraded`. */ installError?: string | null; } @@ -57,9 +42,13 @@ const DeviceCapabilitySection = ({ installWarning, installError, }: DeviceCapabilitySectionProps) => { - const installInProgress = - installState === 'installing' || installState === 'downloading' || installState === 'loading'; - const installFailed = installState === 'degraded'; + void onTriggerOllamaInstall; + void isTriggeringInstall; + void installState; + void installWarning; + void installError; + const installInProgress = false; + const installFailed = false; const [applying, setApplying] = useState(null); const [applyError, setApplyError] = useState(''); const [applySuccess, setApplySuccess] = useState(null); @@ -187,26 +176,18 @@ const DeviceCapabilitySection = ({ ) : ( <>
- Install Ollama first. Local - tiers run on the Ollama runtime, which isn't installed yet. The “Disabled - (cloud fallback)” option stays available either way. + Run Ollama first. Local + tiers depend on an externally managed Ollama endpoint. Start it yourself, pull the + models you want, and keep using “Disabled (cloud fallback)” until the + runtime is reachable.
- {onTriggerOllamaInstall && ( - - )} - Install manually + Ollama docs
@@ -257,7 +238,7 @@ const DeviceCapabilitySection = ({ key={preset.tier} onClick={() => void handleApply(preset.tier)} disabled={applying !== null || locked} - title={locked ? 'Install Ollama first to use this tier' : undefined} + title={locked ? 'Run Ollama first to use this tier' : undefined} className={`w-full text-left rounded-lg border p-3 transition-colors ${ isCurrent ? 'border-primary-400 bg-primary-50' diff --git a/app/src/components/settings/panels/local-model/ModelDownloadSection.tsx b/app/src/components/settings/panels/local-model/ModelDownloadSection.tsx index 994f2fe42d..5ac567c75f 100644 --- a/app/src/components/settings/panels/local-model/ModelDownloadSection.tsx +++ b/app/src/components/settings/panels/local-model/ModelDownloadSection.tsx @@ -126,12 +126,18 @@ const ModelDownloadSection = ({ {item?.path && (
{item.path}
)} - + {item?.provider === 'ollama' || item?.provider === 'lm_studio' ? ( +
+ Manage this model in your external runtime. +
+ ) : ( + + )} ))} diff --git a/app/src/components/settings/panels/local-model/ModelStatusSection.test.tsx b/app/src/components/settings/panels/local-model/ModelStatusSection.test.tsx index 1b34c17d54..c511b8a77a 100644 --- a/app/src/components/settings/panels/local-model/ModelStatusSection.test.tsx +++ b/app/src/components/settings/panels/local-model/ModelStatusSection.test.tsx @@ -1,7 +1,7 @@ -import { fireEvent, render, screen } from '@testing-library/react'; +import { render, screen } from '@testing-library/react'; import { describe, expect, it, vi } from 'vitest'; -import type { LocalAiDiagnostics, RepairAction } from '../../../../utils/tauriCommands'; +import type { LocalAiDiagnostics } from '../../../../utils/tauriCommands'; import ModelStatusSection from './ModelStatusSection'; const defaultProps = { @@ -55,11 +55,11 @@ const makeDiagnostics = (overrides: Partial = {}): LocalAiDi }); describe('ModelStatusSection diagnostics', () => { - it('disables bootstrap controls when runtime is disabled', () => { + it('still renders runtime status when runtime is disabled', () => { render(); - expect(screen.getByRole('button', { name: 'Bootstrap / Resume' })).toBeDisabled(); - expect(screen.getByRole('button', { name: 'Force Re-bootstrap' })).toBeDisabled(); + expect(screen.getByText('Runtime Status')).toBeTruthy(); + expect(screen.getByText('Refresh')).toBeTruthy(); }); it('shows the base URL being checked', () => { @@ -122,64 +122,19 @@ describe('ModelStatusSection diagnostics', () => { expect(screen.getByText('/opt/homebrew/bin/ollama')).toBeTruthy(); }); - it('renders repair action buttons', () => { - const repairActions: RepairAction[] = [ - { action: 'install_ollama' }, - { action: 'start_server', binary_path: '/usr/local/bin/ollama' }, - { action: 'pull_model', model: 'gemma3:1b-it-qat' }, - ]; + it('renders manual-management guidance when diagnostics fail', () => { render( ); - expect(screen.getByText('Install Ollama')).toBeTruthy(); - expect(screen.getByText('Start Server')).toBeTruthy(); - expect(screen.getByText('Pull gemma3:1b-it-qat')).toBeTruthy(); - }); - - it('calls onRepairAction with the correct action when button is clicked', () => { - const onRepairAction = vi.fn(); - const repairActions: RepairAction[] = [{ action: 'install_ollama' }]; - render( - - ); - fireEvent.click(screen.getByText('Install Ollama')); - expect(onRepairAction).toHaveBeenCalledWith({ action: 'install_ollama' }); - }); - - it('calls onRepairAction with pull_model action', () => { - const onRepairAction = vi.fn(); - const repairActions: RepairAction[] = [{ action: 'pull_model', model: 'gemma3:1b-it-qat' }]; - render( - - ); - fireEvent.click(screen.getByText('Pull gemma3:1b-it-qat')); - expect(onRepairAction).toHaveBeenCalledWith({ - action: 'pull_model', - model: 'gemma3:1b-it-qat', - }); + expect( + screen.getByText(/Manage the Ollama process and model pulls outside OpenHuman/) + ).toBeTruthy(); }); it('does not render repair actions section when repair_actions is empty', () => { @@ -212,4 +167,168 @@ describe('ModelStatusSection diagnostics', () => { render(); expect(screen.getByText(/Click.*Run Diagnostics/)).toBeTruthy(); }); + + it('shows external-runtime guidance when ollama is unavailable', () => { + render( + + ); + + expect(screen.getByText('Ollama runtime unavailable')).toBeTruthy(); + expect(screen.getByText(/external inference runtime/)).toBeTruthy(); + expect(screen.getByText('Ollama docs')).toBeTruthy(); + }); + + it('renders docs link instead of install controls when ollama is unavailable', () => { + render( + + ); + + expect(screen.queryByRole('button', { name: 'Install Ollama' })).toBeNull(); + expect(screen.queryByRole('button', { name: 'Set Path' })).toBeNull(); + expect(screen.getByRole('link', { name: 'Ollama docs' })).toBeTruthy(); + }); }); diff --git a/app/src/components/settings/panels/local-model/ModelStatusSection.tsx b/app/src/components/settings/panels/local-model/ModelStatusSection.tsx index a89089d735..9854635a3d 100644 --- a/app/src/components/settings/panels/local-model/ModelStatusSection.tsx +++ b/app/src/components/settings/panels/local-model/ModelStatusSection.tsx @@ -37,17 +37,6 @@ interface ModelStatusSectionProps { onRepairAction?: (action: RepairAction) => void; } -const repairActionLabel = (action: RepairAction): string => { - switch (action.action) { - case 'install_ollama': - return 'Install Ollama'; - case 'start_server': - return 'Start Server'; - case 'pull_model': - return `Pull ${action.model}`; - } -}; - const ModelStatusSection = ({ status, downloads, @@ -78,19 +67,26 @@ const ModelStatusSection = ({ onRunDiagnostics, onRepairAction, }: ModelStatusSectionProps) => { - // Core reports `ollama_available: false` when no Ollama binary is - // discoverable on disk. The backend short-circuits all `has_model` HTTP - // probes in that state, so model rows below will all read "missing". Surface - // a clear install CTA up front so users don't have to interpret the empty - // model state on their own. + // OpenHuman no longer installs or launches Ollama itself. When the runtime + // is unavailable, surface manual guidance instead of management controls. const showInstallOllamaCta = downloads?.ollama_available === false; + void isTriggeringDownload; + void bootstrapMessage; + void isInstalling; + void isInstallError; + void showErrorDetail; + void ollamaPathInput; + void isSettingPath; + void runtimeEnabled; + void onTriggerDownload; + void onSetOllamaPath; + void onClearOllamaPath; + void onSetOllamaPathInput; + void onToggleErrorDetail; + void onRepairAction; + if (showInstallOllamaCta) { - // No Ollama on disk — the runtime-status card and diagnostics panels - // below would just read "n/a" / "missing" everywhere, which is more - // confusing than helpful. Render only the install CTA, with the binary - // path setter inline for users who installed Ollama in a non-standard - // location that auto-discovery can't find. return (
@@ -107,80 +103,22 @@ const ModelStatusSection = ({ />
-
Ollama is not installed
+
Ollama runtime unavailable
- Local AI features (chat, vision, embedding) need the Ollama runtime. Install it below - — the installer runs silently and lands in your workspace; no console window will - appear. + OpenHuman now treats Ollama as an external inference runtime. Start your own Ollama + server, pull the models you want, and point workload routing at it.
- - Install manually + Ollama docs
- - {isInstallError && status?.error_detail && ( -
- - {showErrorDetail && ( -
-                {status.error_detail}
-              
- )} -
- )} - -
-
- Already installed in a custom location? -
-
- Point us at the binary and we'll use it instead of running the installer. -
-
- onSetOllamaPathInput(e.target.value)} - placeholder="C:\Users\you\AppData\Local\Programs\Ollama\ollama.exe" - className="flex-1 rounded-md border border-amber-300 bg-white px-2 py-1.5 text-xs text-stone-900 placeholder:text-stone-400 focus:border-amber-500 focus:outline-none" - /> - - {ollamaPathInput && ( - - )} -
-
); } @@ -272,7 +210,7 @@ const ModelStatusSection = ({ {status?.warning &&
{status.warning}
} {statusError &&
{statusError}
} - {isInstallError && status?.error_detail && ( + {status?.error_detail && (
)} - -
-
- Ollama Binary Path (optional) -
-
- onSetOllamaPathInput(e.target.value)} - placeholder="/usr/local/bin/ollama" - className="flex-1 rounded-md border border-stone-200 bg-white px-2 py-1.5 text-xs text-stone-900 placeholder:text-stone-400 focus:border-primary-500 focus:outline-none" - /> - - {ollamaPathInput && ( - - )} -
-
- -
- {status?.state === 'ready' ? ( - - - - - Running - - ) : ( - - )} - - {bootstrapMessage && {bootstrapMessage}} -
@@ -376,14 +251,14 @@ const ModelStatusSection = ({
{!diagnostics && !diagnosticsError && (

- Click “Run Diagnostics” to verify Ollama is running and models are - installed. + Click “Run Diagnostics” to verify your external Ollama endpoint is + reachable and has the expected models.

)} {isDiagnosticsLoading && (
- Checking Ollama server and models... + Checking Ollama endpoint and models...
)} {diagnosticsError && ( @@ -511,23 +386,10 @@ const ModelStatusSection = ({
)} - {diagnostics.repair_actions && diagnostics.repair_actions.length > 0 && ( -
-
- Suggested Fixes -
-
- {diagnostics.repair_actions.map((action, i) => ( - - ))} -
-
- )} +
+ Manage the Ollama process and model pulls outside OpenHuman, then rerun + diagnostics. +
)}
diff --git a/app/src/utils/tauriCommands/localAi.ts b/app/src/utils/tauriCommands/localAi.ts index c1b487d1cf..432de5f6f0 100644 --- a/app/src/utils/tauriCommands/localAi.ts +++ b/app/src/utils/tauriCommands/localAi.ts @@ -1,5 +1,9 @@ /** - * Local AI / Ollama commands. + * Local AI / Ollama-facing commands routed through the core. + * + * The renderer never talks to Ollama directly. It always calls the core, and + * the core decides whether to route a request to the configured inference + * backend (for example an external Ollama endpoint). */ import { callCoreRpc } from '../../services/coreRpcClient'; import { CommandResponse, isTauri, tauriErrorMessage } from './common'; @@ -51,10 +55,9 @@ export interface LocalAiAssetsStatus { tts: LocalAiAssetStatus; quantization: string; /** - * True when the core can find an Ollama binary on disk. When false the UI - * should render an "Install Ollama" CTA instead of model state — every - * Ollama-backed asset will be reported as `missing` and `/api/tags` - * probes are skipped entirely (no 30s timeout). + * True when the configured Ollama endpoint is reachable enough for model + * checks. When false the UI should render external-runtime guidance instead + * of pretending the app can install or launch Ollama itself. */ ollama_available: boolean; } diff --git a/app/test/e2e/specs/local-model-runtime.spec.ts b/app/test/e2e/specs/local-model-runtime.spec.ts index 71ec471749..aaa2aec0eb 100644 --- a/app/test/e2e/specs/local-model-runtime.spec.ts +++ b/app/test/e2e/specs/local-model-runtime.spec.ts @@ -43,9 +43,9 @@ async function waitForAnyText(candidates, timeout = 20_000) { return null; } -// Local model runtime requires Ollama binary which is not available in the -// Linux CI Docker container. The "Local model runtime" card and "Manage" -// button only appear on the home page when Ollama is detected. Skip on Linux. +// Local model runtime now talks to an external Ollama endpoint through core. +// CI does not provision a live Ollama server, so keep this spec skipped until +// a deterministic mockable local-runtime harness exists for WDIO. describe.skip('Local model runtime flow', () => { before(async () => { await startMockServer(); @@ -57,7 +57,7 @@ describe.skip('Local model runtime flow', () => { await stopMockServer(); }); - it('can trigger local model bootstrap from UI and enter active runtime state', async () => { + it('shows direct-runtime guidance instead of app-managed bootstrap controls', async () => { await triggerAuthDeepLink('e2e-local-model-token'); await waitForWindowVisible(25_000); await waitForWebView(15_000); @@ -84,14 +84,18 @@ describe.skip('Local model runtime flow', () => { 'Local model runtime is unavailable in this core build. Restart app after updating to the latest build.'; expect(await textExists(incompatibleError)).toBe(false); - await clickText('Bootstrap / Resume', 12_000); - await waitForAnyText(['Triggering...'], 8_000); - - const activeState = await waitForAnyText(['Downloading', 'Loading', 'Ready'], 25_000); - if (!activeState) { + const guidance = await waitForAnyText( + [ + 'Ollama runtime unavailable', + 'Manage the Ollama process and model pulls outside OpenHuman.', + 'Ollama docs', + ], + 25_000 + ); + if (!guidance) { const tree = await dumpAccessibilityTree(); - console.log('[LocalModelE2E] No active runtime state seen. Tree:\n', tree.slice(0, 5000)); + console.log('[LocalModelE2E] No direct-runtime guidance seen. Tree:\n', tree.slice(0, 5000)); } - expect(activeState).not.toBeNull(); + expect(guidance).not.toBeNull(); }); }); diff --git a/src/core/all.rs b/src/core/all.rs index 2b67247dc9..3bf468d0b8 100644 --- a/src/core/all.rs +++ b/src/core/all.rs @@ -149,6 +149,8 @@ fn build_registered_controllers() -> Vec { controllers.extend(crate::openhuman::service::all_service_registered_controllers()); // Data migration utilities controllers.extend(crate::openhuman::migration::all_migration_registered_controllers()); + // External inference runtime access + controllers.extend(crate::openhuman::inference::all_inference_registered_controllers()); // Local AI model management and inference controllers.extend(crate::openhuman::local_ai::all_local_ai_registered_controllers()); // People resolution and interaction scoring @@ -271,6 +273,7 @@ fn build_declared_controller_schemas() -> Vec { schemas.extend(crate::openhuman::credentials::all_credentials_controller_schemas()); schemas.extend(crate::openhuman::service::all_service_controller_schemas()); schemas.extend(crate::openhuman::migration::all_migration_controller_schemas()); + schemas.extend(crate::openhuman::inference::all_inference_controller_schemas()); schemas.extend(crate::openhuman::local_ai::all_local_ai_controller_schemas()); schemas.extend(crate::openhuman::people::all_people_controller_schemas()); schemas.extend( @@ -357,6 +360,7 @@ pub fn namespace_description(namespace: &str) -> Option<&'static str> { "doctor" => Some("Run diagnostics for workspace and runtime health."), "encrypt" => Some("Encrypt secure values managed by secret storage."), "health" => Some("Process and component health snapshots."), + "inference" => Some("Connect to configured text, vision, and embedding inference runtimes."), "local_ai" => Some("Local AI chat, inference, downloads, and media operations."), "migrate" => Some("Data migration utilities."), "screen_intelligence" => Some("Screen capture, permissions, and accessibility automation."), diff --git a/src/core/cli_tests.rs b/src/core/cli_tests.rs index 0e5e83752a..111586e358 100644 --- a/src/core/cli_tests.rs +++ b/src/core/cli_tests.rs @@ -24,6 +24,7 @@ fn grouped_schemas_contains_migrated_namespaces() { assert!(grouped.contains_key("auth")); assert!(grouped.contains_key("service")); assert!(grouped.contains_key("migrate")); + assert!(grouped.contains_key("inference")); assert!(grouped.contains_key("local_ai")); } diff --git a/src/openhuman/app_state/ops.rs b/src/openhuman/app_state/ops.rs index a37abccb53..309f9684ed 100644 --- a/src/openhuman/app_state/ops.rs +++ b/src/openhuman/app_state/ops.rs @@ -410,7 +410,7 @@ async fn build_runtime_snapshot(config: &Config) -> RuntimeSnapshot { .await }; - let local_ai = match crate::openhuman::local_ai::rpc::local_ai_status(config).await { + let local_ai = match crate::openhuman::inference::rpc::inference_status(config).await { Ok(outcome) => outcome.value, Err(error) => { warn!("{LOG_PREFIX} local_ai status failed during snapshot: {error}"); diff --git a/src/openhuman/channels/providers/presentation.rs b/src/openhuman/channels/providers/presentation.rs index 6f9049c7d4..16f57287bc 100644 --- a/src/openhuman/channels/providers/presentation.rs +++ b/src/openhuman/channels/providers/presentation.rs @@ -396,7 +396,8 @@ async fn try_reaction(user_message: &str) -> Option { return None; } - match crate::openhuman::local_ai::ops::local_ai_should_react(&config, user_message, "web").await + match crate::openhuman::inference::ops::inference_should_react(&config, user_message, "web") + .await { Ok(outcome) => { let decision = outcome.value; diff --git a/src/openhuman/inference/mod.rs b/src/openhuman/inference/mod.rs new file mode 100644 index 0000000000..79ec294b50 --- /dev/null +++ b/src/openhuman/inference/mod.rs @@ -0,0 +1,15 @@ +//! External inference domain. +//! +//! This module is the canonical controller surface for text / vision / +//! embedding inference. The underlying implementation still reuses the +//! existing local-runtime service during the migration away from the +//! `local_ai` catch-all namespace. + +pub mod ops; +mod schemas; + +pub use ops as rpc; +pub use schemas::{ + all_controller_schemas as all_inference_controller_schemas, + all_registered_controllers as all_inference_registered_controllers, +}; diff --git a/src/openhuman/inference/ops.rs b/src/openhuman/inference/ops.rs new file mode 100644 index 0000000000..a0f2d76688 --- /dev/null +++ b/src/openhuman/inference/ops.rs @@ -0,0 +1,89 @@ +//! JSON-RPC controller surface for inference operations. + +use crate::openhuman::config::Config; +use crate::openhuman::local_ai; +use crate::openhuman::local_ai::gif_decision::GifDecision; +use crate::openhuman::local_ai::ops::{LocalAiChatMessage, ReactionDecision}; +use crate::openhuman::local_ai::sentiment::SentimentResult; +use crate::openhuman::local_ai::{LocalAiEmbeddingResult, LocalAiStatus, TenorSearchResult}; +use crate::rpc::RpcOutcome; + +pub async fn inference_status(config: &Config) -> Result, String> { + local_ai::rpc::local_ai_status(config).await +} + +pub async fn inference_summarize( + config: &Config, + text: &str, + max_tokens: Option, +) -> Result, String> { + local_ai::rpc::local_ai_summarize(config, text, max_tokens).await +} + +pub async fn inference_prompt( + config: &Config, + prompt: &str, + max_tokens: Option, + no_think: Option, +) -> Result, String> { + local_ai::rpc::local_ai_prompt(config, prompt, max_tokens, no_think).await +} + +pub async fn inference_vision_prompt( + config: &Config, + prompt: &str, + image_refs: &[String], + max_tokens: Option, +) -> Result, String> { + local_ai::rpc::local_ai_vision_prompt(config, prompt, image_refs, max_tokens).await +} + +pub async fn inference_embed( + config: &Config, + inputs: &[String], +) -> Result, String> { + local_ai::rpc::local_ai_embed(config, inputs).await +} + +pub async fn inference_chat( + config: &Config, + messages: Vec, + max_tokens: Option, +) -> Result, String> { + local_ai::rpc::local_ai_chat(config, messages, max_tokens).await +} + +pub async fn inference_should_react( + config: &Config, + message: &str, + channel_type: &str, +) -> Result, String> { + local_ai::rpc::local_ai_should_react(config, message, channel_type).await +} + +pub async fn inference_analyze_sentiment( + config: &Config, + message: &str, +) -> Result, String> { + local_ai::sentiment::local_ai_analyze_sentiment(config, message).await +} + +pub async fn inference_should_send_gif( + config: &Config, + message: &str, + channel_type: &str, +) -> Result, String> { + local_ai::gif_decision::local_ai_should_send_gif(config, message, channel_type).await +} + +pub async fn inference_tenor_search( + config: &Config, + query: &str, + limit: Option, +) -> Result, String> { + local_ai::gif_decision::tenor_search(config, query, limit).await +} + +#[cfg(test)] +#[path = "ops_tests.rs"] +mod tests; diff --git a/src/openhuman/inference/ops_tests.rs b/src/openhuman/inference/ops_tests.rs new file mode 100644 index 0000000000..40870db77b --- /dev/null +++ b/src/openhuman/inference/ops_tests.rs @@ -0,0 +1,96 @@ +use super::*; +use tempfile::tempdir; + +fn disabled_config() -> Config { + let tmp = tempdir().expect("tempdir"); + let mut config = Config::default(); + config.workspace_dir = tmp.path().join("workspace"); + config.config_path = tmp.path().join("config.toml"); + config.local_ai.runtime_enabled = false; + config.local_ai.opt_in_confirmed = false; + config +} + +#[tokio::test] +async fn inference_status_reports_disabled_state_when_runtime_disabled() { + let config = disabled_config(); + let outcome = inference_status(&config).await.expect("status"); + assert!( + matches!(outcome.value.state.as_str(), "idle" | "disabled"), + "unexpected state: {}", + outcome.value.state + ); +} + +#[tokio::test] +async fn inference_prompt_reuses_local_ai_disabled_error() { + let config = disabled_config(); + let err = inference_prompt(&config, "hello", None, Some(true)) + .await + .expect_err("prompt should fail"); + assert!(err.contains("local ai is disabled")); +} + +#[tokio::test] +async fn inference_summarize_reuses_local_ai_disabled_error() { + let config = disabled_config(); + let err = inference_summarize(&config, "hello", None) + .await + .expect_err("summarize should fail"); + assert!(err.contains("local ai is disabled")); +} + +#[tokio::test] +async fn inference_embed_reuses_local_ai_disabled_error() { + let config = disabled_config(); + let err = inference_embed(&config, &["hello".to_string()]) + .await + .expect_err("embed should fail"); + assert!(err.contains("local ai is disabled")); +} + +#[tokio::test] +async fn inference_chat_rejects_empty_messages() { + let config = disabled_config(); + let err = inference_chat(&config, vec![], None) + .await + .expect_err("chat should fail"); + assert!(err.contains("must not be empty")); +} + +#[tokio::test] +async fn inference_should_react_short_circuits_for_empty_message() { + let config = disabled_config(); + let outcome = inference_should_react(&config, " ", "web") + .await + .expect("reaction decision"); + assert!(!outcome.value.should_react); + assert!(outcome.value.emoji.is_none()); +} + +#[tokio::test] +async fn inference_analyze_sentiment_handles_empty_message() { + let config = disabled_config(); + let outcome = inference_analyze_sentiment(&config, " ") + .await + .expect("sentiment"); + assert_eq!(outcome.value.valence, "neutral"); +} + +#[tokio::test] +async fn inference_should_send_gif_short_circuits_for_empty_message() { + let config = disabled_config(); + let outcome = inference_should_send_gif(&config, " ", "web") + .await + .expect("gif decision"); + assert!(!outcome.value.should_send_gif); +} + +#[tokio::test] +async fn inference_tenor_search_requires_query() { + let config = disabled_config(); + let err = inference_tenor_search(&config, " ", Some(3)) + .await + .expect_err("query validation should fail"); + assert!(err.contains("query is required")); +} diff --git a/src/openhuman/inference/schemas.rs b/src/openhuman/inference/schemas.rs new file mode 100644 index 0000000000..d6233ecf29 --- /dev/null +++ b/src/openhuman/inference/schemas.rs @@ -0,0 +1,325 @@ +use serde::de::DeserializeOwned; +use serde::Deserialize; +use serde_json::{Map, Value}; + +use crate::core::all::{ControllerFuture, RegisteredController}; +use crate::core::ControllerSchema; +use crate::openhuman::config::rpc as config_rpc; +use crate::rpc::RpcOutcome; + +#[derive(Debug, Deserialize)] +struct InferenceSummarizeParams { + text: String, + max_tokens: Option, +} + +#[derive(Debug, Deserialize)] +struct InferencePromptParams { + prompt: String, + max_tokens: Option, + no_think: Option, +} + +#[derive(Debug, Deserialize)] +struct InferenceVisionPromptParams { + prompt: String, + image_refs: Vec, + max_tokens: Option, +} + +#[derive(Debug, Deserialize)] +struct InferenceEmbedParams { + inputs: Vec, +} + +#[derive(Debug, Deserialize)] +struct InferenceChatMessageParam { + role: String, + content: String, +} + +#[derive(Debug, Deserialize)] +struct InferenceChatParams { + messages: Vec, + max_tokens: Option, +} + +#[derive(Debug, Deserialize)] +struct InferenceShouldReactParams { + message: String, + channel_type: String, +} + +#[derive(Debug, Deserialize)] +struct InferenceAnalyzeSentimentParams { + message: String, +} + +#[derive(Debug, Deserialize)] +struct InferenceShouldSendGifParams { + message: String, + channel_type: String, +} + +#[derive(Debug, Deserialize)] +struct InferenceTenorSearchParams { + query: String, + limit: Option, +} + +pub fn all_controller_schemas() -> Vec { + vec![ + schemas("status"), + schemas("summarize"), + schemas("prompt"), + schemas("vision_prompt"), + schemas("embed"), + schemas("chat"), + schemas("should_react"), + schemas("analyze_sentiment"), + schemas("should_send_gif"), + schemas("tenor_search"), + ] +} + +pub fn all_registered_controllers() -> Vec { + vec![ + RegisteredController { + schema: schemas("status"), + handler: handle_inference_status, + }, + RegisteredController { + schema: schemas("summarize"), + handler: handle_inference_summarize, + }, + RegisteredController { + schema: schemas("prompt"), + handler: handle_inference_prompt, + }, + RegisteredController { + schema: schemas("vision_prompt"), + handler: handle_inference_vision_prompt, + }, + RegisteredController { + schema: schemas("embed"), + handler: handle_inference_embed, + }, + RegisteredController { + schema: schemas("chat"), + handler: handle_inference_chat, + }, + RegisteredController { + schema: schemas("should_react"), + handler: handle_inference_should_react, + }, + RegisteredController { + schema: schemas("analyze_sentiment"), + handler: handle_inference_analyze_sentiment, + }, + RegisteredController { + schema: schemas("should_send_gif"), + handler: handle_inference_should_send_gif, + }, + RegisteredController { + schema: schemas("tenor_search"), + handler: handle_inference_tenor_search, + }, + ] +} + +pub fn schemas(function: &str) -> ControllerSchema { + let (source, target_function) = match function { + "status" => ( + crate::openhuman::local_ai::local_ai_controller_schema("local_ai_status"), + "status", + ), + "summarize" => ( + crate::openhuman::local_ai::local_ai_controller_schema("local_ai_summarize"), + "summarize", + ), + "prompt" => ( + crate::openhuman::local_ai::local_ai_controller_schema("local_ai_prompt"), + "prompt", + ), + "vision_prompt" => ( + crate::openhuman::local_ai::local_ai_controller_schema("local_ai_vision_prompt"), + "vision_prompt", + ), + "embed" => ( + crate::openhuman::local_ai::local_ai_controller_schema("local_ai_embed"), + "embed", + ), + "chat" => ( + crate::openhuman::local_ai::local_ai_controller_schema("local_ai_chat"), + "chat", + ), + "should_react" => ( + crate::openhuman::local_ai::local_ai_controller_schema("local_ai_should_react"), + "should_react", + ), + "analyze_sentiment" => ( + crate::openhuman::local_ai::local_ai_controller_schema("local_ai_analyze_sentiment"), + "analyze_sentiment", + ), + "should_send_gif" => ( + crate::openhuman::local_ai::local_ai_controller_schema("local_ai_should_send_gif"), + "should_send_gif", + ), + "tenor_search" => ( + crate::openhuman::local_ai::local_ai_controller_schema("local_ai_tenor_search"), + "tenor_search", + ), + other => panic!("unknown inference schema: {other}"), + }; + + ControllerSchema { + namespace: "inference", + function: target_function, + description: source.description, + inputs: source.inputs, + outputs: source.outputs, + } +} + +fn handle_inference_status(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + to_json(crate::openhuman::inference::rpc::inference_status(&config).await?) + }) +} + +fn handle_inference_summarize(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::rpc::inference_summarize(&config, &p.text, p.max_tokens) + .await?, + ) + }) +} + +fn handle_inference_prompt(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::rpc::inference_prompt( + &config, + &p.prompt, + p.max_tokens, + p.no_think, + ) + .await?, + ) + }) +} + +fn handle_inference_vision_prompt(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::rpc::inference_vision_prompt( + &config, + &p.prompt, + &p.image_refs, + p.max_tokens, + ) + .await?, + ) + }) +} + +fn handle_inference_embed(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json(crate::openhuman::inference::rpc::inference_embed(&config, &p.inputs).await?) + }) +} + +fn handle_inference_chat(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + let messages = p + .messages + .into_iter() + .map( + |message| crate::openhuman::local_ai::ops::LocalAiChatMessage { + role: message.role, + content: message.content, + }, + ) + .collect(); + to_json( + crate::openhuman::inference::rpc::inference_chat(&config, messages, p.max_tokens) + .await?, + ) + }) +} + +fn handle_inference_should_react(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::rpc::inference_should_react( + &config, + &p.message, + &p.channel_type, + ) + .await?, + ) + }) +} + +fn handle_inference_analyze_sentiment(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::rpc::inference_analyze_sentiment(&config, &p.message) + .await?, + ) + }) +} + +fn handle_inference_should_send_gif(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::rpc::inference_should_send_gif( + &config, + &p.message, + &p.channel_type, + ) + .await?, + ) + }) +} + +fn handle_inference_tenor_search(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::rpc::inference_tenor_search(&config, &p.query, p.limit) + .await?, + ) + }) +} + +fn deserialize_params(params: Map) -> Result { + serde_json::from_value(Value::Object(params)).map_err(|e| format!("invalid params: {e}")) +} + +fn to_json(outcome: RpcOutcome) -> Result { + outcome.into_cli_compatible_json() +} + +#[cfg(test)] +#[path = "schemas_tests.rs"] +mod tests; diff --git a/src/openhuman/inference/schemas_tests.rs b/src/openhuman/inference/schemas_tests.rs new file mode 100644 index 0000000000..86a682c1d0 --- /dev/null +++ b/src/openhuman/inference/schemas_tests.rs @@ -0,0 +1,78 @@ +use super::*; + +#[test] +fn inference_catalog_counts_match_and_nonempty() { + let declared = all_controller_schemas(); + let registered = all_registered_controllers(); + assert_eq!(declared.len(), registered.len()); + assert!(declared.len() >= 10); +} + +#[test] +fn inference_schemas_use_inference_namespace() { + for schema in all_controller_schemas() { + assert_eq!( + schema.namespace, "inference", + "function {}", + schema.function + ); + assert!(!schema.description.is_empty()); + assert!(!schema.outputs.is_empty()); + } +} + +#[test] +fn inference_schema_function_names_are_stable() { + let functions: Vec<&str> = all_controller_schemas() + .into_iter() + .map(|schema| schema.function) + .collect(); + assert!(functions.contains(&"status")); + assert!(functions.contains(&"prompt")); + assert!(functions.contains(&"vision_prompt")); + assert!(functions.contains(&"embed")); + assert!(functions.contains(&"chat")); +} + +#[test] +fn inference_prompt_schema_reuses_local_ai_shape_with_new_namespace() { + let schema = schemas("prompt"); + assert_eq!(schema.namespace, "inference"); + assert_eq!(schema.function, "prompt"); + assert!(schema.inputs.iter().any(|field| field.name == "prompt")); + assert!(schema.inputs.iter().any(|field| field.name == "max_tokens")); +} + +#[test] +fn inference_chat_schema_requires_messages() { + let schema = schemas("chat"); + assert_eq!(schema.namespace, "inference"); + assert_eq!(schema.function, "chat"); + assert!(schema + .inputs + .iter() + .any(|field| field.name == "messages" && field.required)); +} + +#[test] +fn inference_unknown_schema_panics() { + let panic = std::panic::catch_unwind(|| schemas("no_such_function")); + assert!(panic.is_err()); +} + +#[tokio::test] +async fn inference_status_handler_returns_cli_json() { + let value = handle_inference_status(Map::new()) + .await + .expect("handler value"); + assert!(value.get("result").is_some() || value.get("logs").is_some()); +} + +#[tokio::test] +async fn inference_prompt_handler_rejects_invalid_shape() { + let params = Map::from_iter([("prompt".to_string(), Value::Bool(true))]); + let err = handle_inference_prompt(params) + .await + .expect_err("invalid params"); + assert!(err.contains("invalid params")); +} diff --git a/src/openhuman/local_ai/mod.rs b/src/openhuman/local_ai/mod.rs index 596e983973..d3876b7be5 100644 --- a/src/openhuman/local_ai/mod.rs +++ b/src/openhuman/local_ai/mod.rs @@ -43,6 +43,7 @@ pub use presets::{ModelPreset, ModelTier, VisionMode}; pub use schemas::{ all_controller_schemas as all_local_ai_controller_schemas, all_registered_controllers as all_local_ai_registered_controllers, + schemas as local_ai_controller_schema, }; pub use sentiment::SentimentResult; pub(crate) use service::whisper_engine; diff --git a/src/openhuman/local_ai/ops.rs b/src/openhuman/local_ai/ops.rs index 1602f8bd57..35c2e500cf 100644 --- a/src/openhuman/local_ai/ops.rs +++ b/src/openhuman/local_ai/ops.rs @@ -176,23 +176,8 @@ pub async fn local_ai_status( pub async fn local_ai_shutdown_owned( config: &mut Config, ) -> Result, String> { - let service = local_ai::global(config); - service.shutdown_owned_ollama(config).await; - - // Shift any ollama-routed workload back to "cloud" (= primary). - let cleared = clear_ollama_workload_routes(config); - if cleared > 0 { - log::info!( - "[local_ai] shutdown_owned: shifted {cleared} ollama-routed workload(s) back to cloud" - ); - config.save().await.map_err(|e| e.to_string())?; - } - - service.mark_disabled(config); - Ok(RpcOutcome::single_log( - service.status(), - "local ai runtime gated off (owned daemon killed if any)", - )) + let _ = config; + Err("OpenHuman does not manage the Ollama process anymore. Stop or restart your external Ollama runtime directly.".to_string()) } /// Clear every per-workload `*_provider` field whose stored value starts @@ -234,21 +219,8 @@ pub async fn local_ai_download( config: &Config, force: bool, ) -> Result, String> { - let service = local_ai::global(config); - if force { - service.reset_to_idle(config); - } - let service_clone = service.clone(); - let config_clone = config.clone(); - tokio::spawn(async move { - if let Err(err) = service_clone.download_all_models(&config_clone).await { - service_clone.mark_degraded(err); - } - }); - Ok(RpcOutcome::single_log( - service.status(), - "local ai full model download triggered", - )) + let _ = (config, force); + Err("OpenHuman no longer downloads or starts Ollama for you. Start your external Ollama runtime and pull models yourself.".to_string()) } /// Triggers a download of all local AI assets and returns progress information. @@ -256,25 +228,8 @@ pub async fn local_ai_download_all_assets( config: &Config, force: bool, ) -> Result, String> { - let service = local_ai::global(config); - if force { - service.reset_to_idle(config); - } - let service_clone = service.clone(); - let config_clone = config.clone(); - tokio::spawn(async move { - if let Err(err) = service_clone.download_all_models(&config_clone).await { - service_clone.mark_degraded(err); - } - }); - let progress = service - .downloads_progress(config) - .await - .map_err(|e| e.to_string())?; - Ok(RpcOutcome::single_log( - progress, - "local ai full asset download triggered", - )) + let _ = (config, force); + Err("OpenHuman no longer downloads Ollama assets. Start your external Ollama runtime and manage model pulls yourself.".to_string()) } /// Generates a summary of the provided text using local AI models. @@ -467,14 +422,21 @@ pub async fn local_ai_download_asset( config: &Config, capability: &str, ) -> Result, String> { - let service = local_ai::global(config); - let output = service - .download_asset(config, capability.trim()) - .await - .map_err(|e| e.to_string())?; - Ok(RpcOutcome::single_log( - output, - "local ai asset download triggered", + let capability = capability.trim().to_ascii_lowercase(); + if matches!(capability.as_str(), "stt" | "tts") { + let service = local_ai::global(config); + let output = service + .download_asset(config, capability.as_str()) + .await + .map_err(|e| e.to_string())?; + return Ok(RpcOutcome::single_log( + output, + "local ai voice asset download triggered", + )); + } + + Err(format!( + "OpenHuman no longer downloads `{capability}` via Ollama. Start your external Ollama runtime and pull that model yourself." )) } diff --git a/src/openhuman/local_ai/schemas.rs b/src/openhuman/local_ai/schemas.rs index cf8c56f45e..d2b9bb5b45 100644 --- a/src/openhuman/local_ai/schemas.rs +++ b/src/openhuman/local_ai/schemas.rs @@ -925,45 +925,8 @@ fn handle_local_ai_diagnostics(_params: Map) -> ControllerFuture fn handle_local_ai_set_ollama_path(params: Map) -> ControllerFuture { Box::pin(async move { - let p = deserialize_params::(params)?; - let path_str = p.path.trim().to_string(); - tracing::debug!(path = %path_str, "[local_ai] set_ollama_path: validating"); - - let new_value = if path_str.is_empty() { - None - } else { - let path = std::path::Path::new(&path_str); - if !path.is_file() { - return Err(format!( - "Ollama binary not found at '{}'. Provide a valid path to the ollama executable.", - path_str - )); - } - Some(path_str.clone()) - }; - - let mut config = config_rpc::load_config_with_timeout().await?; - config.local_ai.ollama_binary_path = new_value.clone(); - config - .save() - .await - .map_err(|e| format!("save config: {e}"))?; - tracing::debug!(path = ?new_value, "[local_ai] set_ollama_path: config saved, triggering re-bootstrap"); - - let service = crate::openhuman::local_ai::global(&config); - service.reset_to_idle(&config); - let service_clone = service.clone(); - let config_clone = config.clone(); - tokio::spawn(async move { - service_clone.bootstrap(&config_clone).await; - }); - - let current_status = - serde_json::to_value(service.status()).map_err(|e| format!("serialize: {e}"))?; - Ok(serde_json::json!({ - "ollama_binary_path": new_value, - "status": current_status, - })) + let _ = deserialize_params::(params)?; + Err("OpenHuman no longer manages an Ollama binary path. Point your inference setup at an already-running Ollama-compatible endpoint instead.".to_string()) }) } diff --git a/src/openhuman/local_ai/schemas_tests.rs b/src/openhuman/local_ai/schemas_tests.rs index d898f1f562..b07e60568e 100644 --- a/src/openhuman/local_ai/schemas_tests.rs +++ b/src/openhuman/local_ai/schemas_tests.rs @@ -238,7 +238,7 @@ async fn handle_apply_preset_accepts_valid_tier_and_persists() { } #[tokio::test] -async fn handle_set_ollama_path_rejects_nonexistent_path() { +async fn handle_set_ollama_path_reports_external_runtime_contract() { let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); let tmp = TempDir::new().unwrap(); unsafe { @@ -252,22 +252,22 @@ async fn handle_set_ollama_path_rejects_nonexistent_path() { unsafe { std::env::remove_var("OPENHUMAN_WORKSPACE"); } - assert!(err.contains("Ollama binary not found")); + assert!(err.contains("no longer manages an Ollama binary path")); } #[tokio::test] -async fn handle_set_ollama_path_accepts_empty_string_to_clear() { +async fn handle_set_ollama_path_rejects_empty_string_too() { let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); let tmp = TempDir::new().unwrap(); unsafe { std::env::set_var("OPENHUMAN_WORKSPACE", tmp.path()); } let params = Map::from_iter([("path".to_string(), serde_json::json!(""))]); - // Empty path clears the setting — must not error. - let _ = handle_local_ai_set_ollama_path(params).await.expect("ok"); + let err = handle_local_ai_set_ollama_path(params).await.unwrap_err(); unsafe { std::env::remove_var("OPENHUMAN_WORKSPACE"); } + assert!(err.contains("no longer manages an Ollama binary path")); } /// Regression test for the CodeRabbit #7 race on PR #1755: when two diff --git a/src/openhuman/local_ai/service/assets.rs b/src/openhuman/local_ai/service/assets.rs index 8b281cbc7c..8802843406 100644 --- a/src/openhuman/local_ai/service/assets.rs +++ b/src/openhuman/local_ai/service/assets.rs @@ -37,23 +37,21 @@ impl LocalAiService { "[local_ai:assets:provider_routing] entry" ); - // Pre-flight precondition: if no Ollama binary exists anywhere - // discoverable, every Ollama-backed `has_model` call will fail (or - // time out). LM Studio still delegates embeddings to Ollama in this - // first provider slice, so it needs the same pre-flight for the - // embedding branch. + // External-runtime precondition: OpenHuman no longer installs or + // starts Ollama itself, so the interesting question is whether the + // user-managed runtime is reachable right now. let uses_ollama_assets = matches!( provider, LocalAiProvider::Ollama | LocalAiProvider::LmStudio ); let ollama_available = if uses_ollama_assets { - let present = self.ollama_binary_present(config); + let present = self.ollama_healthy().await; debug!( target: "local_ai::assets", %correlation_id, provider = %provider.as_str(), ollama_available = present, - "[local_ai:assets:provider_routing] ollama binary check" + "[local_ai:assets:provider_routing] ollama runtime check" ); present } else { @@ -121,7 +119,7 @@ impl LocalAiService { %correlation_id, provider = "ollama", model = %embedding_model, - "[local_ai:assets:provider_routing] lm studio embedding check skipped; ollama binary missing" + "[local_ai:assets:provider_routing] lm studio embedding check skipped; ollama runtime unavailable" ); false }; @@ -216,7 +214,7 @@ impl LocalAiService { trace!( target: "local_ai::assets", %correlation_id, - branch = "ollama_missing_binary", + branch = "ollama_runtime_unavailable", "[local_ai:assets:provider_routing] selected provider branch" ); (false, false, false) diff --git a/src/openhuman/local_ai/service/bootstrap.rs b/src/openhuman/local_ai/service/bootstrap.rs index 843f8d9477..30bff62f0a 100644 --- a/src/openhuman/local_ai/service/bootstrap.rs +++ b/src/openhuman/local_ai/service/bootstrap.rs @@ -301,30 +301,12 @@ impl LocalAiService { return; } - if let Err(first_err) = self.ensure_ollama_server(&effective_config).await { - log::warn!( - "[local_ai] ensure_ollama_server failed, retrying with fresh install: {first_err}" - ); - // Force a fresh install attempt before giving up. - { - let mut status = self.status.lock(); - status.state = "installing".to_string(); - status.warning = Some("Retrying Ollama installation...".to_string()); - status.error_detail = None; - status.error_category = None; - } - if let Err(err) = self.ensure_ollama_server_fresh(&effective_config).await { - let mut status = self.status.lock(); - status.state = "degraded".to_string(); - let is_install_error = status.error_category.as_deref() == Some("install"); - if is_install_error { - status.warning = Some(err); - } else { - status.error_category = Some("server".to_string()); - status.warning = Some(format_degraded_warning(&err, &effective_config)); - } - return; - } + if let Err(err) = self.ensure_ollama_server(&effective_config).await { + let mut status = self.status.lock(); + status.state = "degraded".to_string(); + status.error_category = Some("server".to_string()); + status.warning = Some(format_degraded_warning(&err, &effective_config)); + return; } if let Err(err) = self.ensure_models_available(&effective_config).await { diff --git a/src/openhuman/local_ai/service/ollama_admin.rs b/src/openhuman/local_ai/service/ollama_admin.rs index 7479ceb1fe..417134ba36 100644 --- a/src/openhuman/local_ai/service/ollama_admin.rs +++ b/src/openhuman/local_ai/service/ollama_admin.rs @@ -25,35 +25,22 @@ fn lm_studio_models_error_means_unreachable(error: &str) -> bool { impl LocalAiService { pub(in crate::openhuman::local_ai::service) async fn ensure_ollama_server( &self, - config: &Config, + _config: &Config, ) -> Result<(), String> { - // If openhuman crashed last session and left a daemon running, the - // spawn marker lets us recognise it and reclaim it (kill + respawn - // under owned-child tracking) instead of either leaking it forever - // or hitting an external daemon that just happens to be on :11434. - self.reclaim_orphan_if_ours(config).await; - if self.ollama_healthy().await { - // Server is running — verify it can actually execute models by checking - // if the runner works. A stale server with a missing binary will 500. if self.ollama_runner_ok().await { return Ok(()); } - // Runner is broken (e.g. binary moved). log::warn!("[local_ai] Ollama server responds but runner is broken"); - // Only restart if we own it. Killing an external daemon's - // broken runner is the user's job, not ours — friendly-fire. - self.kill_ollama_server().await; - if self.ollama_healthy().await { - // Our kill was a no-op (or didn't take effect) — daemon is external. - return Err("An external Ollama daemon on :11434 has a broken runner. \ - Restart it manually (or stop it so openhuman can take over)." - .to_string()); - } + return Err( + "Configured Ollama runtime is reachable but cannot execute models. Restart the external runtime and retry." + .to_string(), + ); } - - let ollama_cmd = self.resolve_or_install_ollama_binary(config).await?; - self.start_and_wait_for_server(config, &ollama_cmd).await + let base_url = ollama_base_url(); + Err(format!( + "OpenHuman no longer starts or installs Ollama automatically. Start your inference runtime yourself and make sure it is reachable at {base_url}." + )) } /// Like `ensure_ollama_server`, but forces a fresh install of the Ollama binary @@ -62,18 +49,7 @@ impl LocalAiService { &self, config: &Config, ) -> Result<(), String> { - // Force a fresh download regardless of existing binaries. - self.download_and_install_ollama(config).await?; - - let Some(ollama_cmd) = find_workspace_ollama_binary(config) else { - // Also check system path after install. - let system_bin = find_system_ollama_binary() - .ok_or_else(|| "Ollama installed but binary not found on system".to_string())?; - // Try to use the system binary directly. - return self.start_and_wait_for_server(config, &system_bin).await; - }; - - self.start_and_wait_for_server(config, &ollama_cmd).await + self.ensure_ollama_server(config).await } /// Check if a healthy daemon on `:11434` is actually openhuman's own @@ -496,7 +472,7 @@ impl LocalAiService { Ok(()) } - async fn ollama_healthy(&self) -> bool { + pub(in crate::openhuman::local_ai::service) async fn ollama_healthy(&self) -> bool { self.http .get(format!("{}/api/tags", ollama_base_url())) .timeout(std::time::Duration::from_secs(2)) @@ -883,38 +859,22 @@ impl LocalAiService { let binary_path = self.resolve_binary_path(config); let mut issues: Vec = Vec::new(); - let mut repair_actions: Vec = Vec::new(); + let repair_actions: Vec = Vec::new(); if !healthy { issues.push(format!( "Ollama server is not running or not reachable at {}", base_url )); - if binary_path.is_none() { - repair_actions.push(serde_json::json!({"action": "install_ollama"})); - } else { - repair_actions.push(serde_json::json!({ - "action": "start_server", - "binary_path": binary_path, - })); - } } if healthy && !chat_found { issues.push(format!("Chat model `{}` is not installed", expected_chat)); - repair_actions.push(serde_json::json!({ - "action": "pull_model", - "model": expected_chat, - })); } if healthy && config.local_ai.preload_embedding_model && !embedding_found { issues.push(format!( "Embedding model `{}` is not installed", expected_embedding )); - repair_actions.push(serde_json::json!({ - "action": "pull_model", - "model": expected_embedding, - })); } if healthy && matches!( @@ -927,10 +887,6 @@ impl LocalAiService { "Vision model `{}` is not installed", expected_vision )); - repair_actions.push(serde_json::json!({ - "action": "pull_model", - "model": expected_vision, - })); } if let Some(ref e) = tags_error { issues.push(format!("Failed to list models: {e}")); @@ -1064,7 +1020,7 @@ impl LocalAiService { .any(|name| name == &expected_chat.to_ascii_lowercase()); let mut issues: Vec = Vec::new(); - let mut repair_actions: Vec = Vec::new(); + let repair_actions: Vec = Vec::new(); if !healthy { let detail = models_error @@ -1075,25 +1031,14 @@ impl LocalAiService { "LM Studio server is not running or not reachable at {}{}", base_url, detail )); - repair_actions.push(serde_json::json!({ - "action": "start_lm_studio_server", - "base_url": base_url, - })); } if healthy && models_error.is_none() && models.is_empty() { issues.push("LM Studio is reachable but no models are loaded".to_string()); - repair_actions.push(serde_json::json!({ - "action": "load_lm_studio_model", - })); } else if healthy && models_error.is_none() && !chat_found { issues.push(format!( "Chat model `{}` is not loaded in LM Studio", expected_chat )); - repair_actions.push(serde_json::json!({ - "action": "load_lm_studio_model", - "model": expected_chat, - })); } if healthy { if let Some(ref err) = models_error { diff --git a/src/openhuman/local_ai/service/ollama_admin_tests.rs b/src/openhuman/local_ai/service/ollama_admin_tests.rs index e2a0511fa2..b821301d90 100644 --- a/src/openhuman/local_ai/service/ollama_admin_tests.rs +++ b/src/openhuman/local_ai/service/ollama_admin_tests.rs @@ -123,6 +123,100 @@ async fn ollama_healthy_returns_false_on_unreachable_url() { } } +#[tokio::test] +async fn ensure_ollama_server_requires_external_runtime_when_unreachable() { + let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + + unsafe { + std::env::set_var("OPENHUMAN_OLLAMA_BASE_URL", "http://127.0.0.1:1"); + } + + let config = Config::default(); + let service = LocalAiService::new(&config); + let err = service + .ensure_ollama_server(&config) + .await + .expect_err("unreachable runtime should fail"); + + unsafe { + std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL"); + } + + assert!( + err.contains("no longer starts or installs Ollama automatically"), + "unexpected error: {err}" + ); +} + +#[tokio::test] +async fn ensure_ollama_server_reports_broken_external_runner_without_restart_attempt() { + let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + + let app = Router::new() + .route("/api/tags", get(|| async { Json(json!({ "models": [] })) })) + .route( + "/api/show", + axum::routing::post(|| async { + ( + axum::http::StatusCode::INTERNAL_SERVER_ERROR, + "fork/exec /broken/ollama: no such file or directory", + ) + }), + ); + let base = spawn_mock(app).await; + unsafe { + std::env::set_var("OPENHUMAN_OLLAMA_BASE_URL", &base); + } + + let config = Config::default(); + let service = LocalAiService::new(&config); + let err = service + .ensure_ollama_server(&config) + .await + .expect_err("broken runner should fail"); + + unsafe { + std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL"); + } + + assert!( + err.contains("cannot execute models") || err.contains("Restart the external runtime"), + "unexpected error: {err}" + ); +} + +#[tokio::test] +async fn assets_status_marks_ollama_unavailable_when_runtime_is_down_even_if_binary_exists() { + let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + + unsafe { + std::env::set_var("OPENHUMAN_OLLAMA_BASE_URL", "http://127.0.0.1:1"); + } + let fake_ollama = std::env::current_exe().expect("current exe"); + let prev_ollama_bin = std::env::var_os("OLLAMA_BIN"); + unsafe { + std::env::set_var("OLLAMA_BIN", &fake_ollama); + } + + let config = Config::default(); + let service = LocalAiService::new(&config); + let status = service.assets_status(&config).await.expect("assets status"); + + unsafe { + std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL"); + match prev_ollama_bin { + Some(value) => std::env::set_var("OLLAMA_BIN", value), + None => std::env::remove_var("OLLAMA_BIN"), + } + } + + assert!( + !status.ollama_available, + "runtime-down status must not be treated as available" + ); + assert_ne!(status.chat.state, "ready"); +} + #[tokio::test] async fn diagnostics_reports_server_unreachable_when_url_unbound() { let _guard = crate::openhuman::local_ai::local_ai_test_guard(); @@ -151,8 +245,8 @@ async fn diagnostics_reports_server_unreachable_when_url_unbound() { .cloned() .unwrap_or_default(); assert!( - !repair_actions.is_empty(), - "unreachable server must produce at least one repair action" + repair_actions.is_empty(), + "OpenHuman should not suggest app-managed repair actions anymore" ); unsafe { std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL"); @@ -181,16 +275,13 @@ async fn diagnostics_with_running_server_but_missing_models_flags_issues() { // No models are installed → expected chat model issue surfaces. let issues = diag["issues"].as_array().cloned().unwrap_or_default(); assert!(!issues.is_empty()); - // Missing chat model should produce a pull_model repair action. let repair_actions = diag["repair_actions"] .as_array() .cloned() .unwrap_or_default(); assert!( - repair_actions - .iter() - .any(|a| a["action"].as_str() == Some("pull_model")), - "missing models must produce pull_model repair action" + repair_actions.is_empty(), + "missing models should no longer surface app-managed pull actions" ); unsafe { std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL"); @@ -286,7 +377,7 @@ async fn resolve_binary_path_finds_binary_via_ollama_bin_env() { } #[tokio::test] -async fn diagnostics_repair_actions_include_start_server_when_binary_known() { +async fn diagnostics_repair_actions_are_empty_when_binary_is_known_but_server_is_down() { let _guard = crate::openhuman::local_ai::local_ai_test_guard(); let tmp = tempfile::tempdir().unwrap(); @@ -312,10 +403,8 @@ async fn diagnostics_repair_actions_include_start_server_when_binary_known() { .cloned() .unwrap_or_default(); assert!( - repair_actions - .iter() - .any(|a| a["action"].as_str() == Some("start_server")), - "when binary is known but server is down, repair action should be start_server" + repair_actions.is_empty(), + "when server is down, diagnostics should not advertise app-managed start actions" ); unsafe { diff --git a/src/openhuman/local_ai/types.rs b/src/openhuman/local_ai/types.rs index 5a814fbba9..ca4b9425bf 100644 --- a/src/openhuman/local_ai/types.rs +++ b/src/openhuman/local_ai/types.rs @@ -95,11 +95,9 @@ pub struct LocalAiAssetsStatus { pub stt: LocalAiAssetStatus, pub tts: LocalAiAssetStatus, pub quantization: String, - /// True when an Ollama binary is discoverable on disk (workspace install, - /// system install, or via `OLLAMA_BIN`/configured path). When false, the - /// frontend should render an "Install Ollama" CTA instead of model state — - /// querying `/api/tags` against a missing server otherwise lets a 30s - /// connect timeout cascade through `has_model`. + /// True when the configured Ollama endpoint is reachable enough for model + /// checks. When false, the frontend should render external-runtime + /// guidance rather than app-managed install/start affordances. pub ollama_available: bool, } diff --git a/src/openhuman/mod.rs b/src/openhuman/mod.rs index 266a3ffb72..9b26111b72 100644 --- a/src/openhuman/mod.rs +++ b/src/openhuman/mod.rs @@ -35,6 +35,7 @@ pub mod embeddings; pub mod encryption; pub mod health; pub mod heartbeat; +pub mod inference; pub mod integrations; pub mod learning; pub mod local_ai; diff --git a/src/openhuman/subconscious/executor.rs b/src/openhuman/subconscious/executor.rs index a9e8305207..db2e99a09a 100644 --- a/src/openhuman/subconscious/executor.rs +++ b/src/openhuman/subconscious/executor.rs @@ -209,7 +209,7 @@ async fn execute_with_local_model( }, ]; - let outcome = crate::openhuman::local_ai::ops::local_ai_chat(&config, messages, None) + let outcome = crate::openhuman::inference::ops::inference_chat(&config, messages, None) .await .map_err(|e| format!("local model: {e}"))?; diff --git a/tests/json_rpc_e2e.rs b/tests/json_rpc_e2e.rs index a556f9e30f..812ed0b728 100644 --- a/tests/json_rpc_e2e.rs +++ b/tests/json_rpc_e2e.rs @@ -3340,6 +3340,186 @@ async fn json_rpc_local_ai_lm_studio_config_diagnostics_and_prompt() { rpc_join.abort(); } +#[tokio::test] +async fn json_rpc_inference_namespace_lm_studio_prompt_and_status() { + let _env_lock = json_rpc_e2e_env_lock(); + let tmp = tempdir().expect("tempdir"); + let home = tmp.path(); + let openhuman_home = home.join(".openhuman"); + + let _home_guard = EnvVarGuard::set_to_path("HOME", home); + let _workspace_guard = EnvVarGuard::unset("OPENHUMAN_WORKSPACE"); + let _backend_url_guard = EnvVarGuard::unset("BACKEND_URL"); + let _vite_backend_guard = EnvVarGuard::unset("VITE_BACKEND_URL"); + let _tier_guard = EnvVarGuard::unset("OPENHUMAN_LOCAL_AI_TIER"); + let _lm_env_guard = EnvVarGuard::unset("OPENHUMAN_LM_STUDIO_BASE_URL"); + let _lm_alias_env_guard = EnvVarGuard::unset("LM_STUDIO_BASE_URL"); + + let (mock_addr, mock_join) = serve_on_ephemeral(mock_upstream_router()).await; + let mock_origin = format!("http://{}", mock_addr); + write_min_config(&openhuman_home, &mock_origin); + + let lm_app = Router::new() + .route( + "/v1/models", + get(|| async { + Json(json!({ + "object": "list", + "data": [ + { "id": "local-model", "object": "model", "owned_by": "lm-studio" } + ] + })) + }), + ) + .route( + "/v1/chat/completions", + post(|Json(_body): Json| async move { + Json(json!({ + "id": "chatcmpl-inference-e2e", + "object": "chat.completion", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": "hello from inference namespace" + }, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 7, + "completion_tokens": 5, + "total_tokens": 12 + } + })) + }), + ); + let (lm_addr, lm_join) = serve_on_ephemeral(lm_app).await; + let lm_base = format!("http://{lm_addr}/v1"); + + let (rpc_addr, rpc_join) = serve_on_ephemeral(build_core_http_router(false)).await; + let rpc_base = format!("http://{}", rpc_addr); + tokio::time::sleep(Duration::from_millis(100)).await; + + let update = post_json_rpc( + &rpc_base, + 360, + "openhuman.config_update_local_ai_settings", + json!({ + "runtime_enabled": true, + "opt_in_confirmed": true, + "provider": "lm_studio", + "base_url": lm_base, + "model_id": "local-model", + "chat_model_id": "local-model" + }), + ) + .await; + assert_no_jsonrpc_error(&update, "update_local_ai_settings for inference namespace"); + + let status = post_json_rpc(&rpc_base, 361, "openhuman.inference_status", json!({})).await; + let status_result = assert_no_jsonrpc_error(&status, "inference_status"); + let status_payload = status_result.get("result").unwrap_or(status_result); + assert_eq!( + status_payload.get("provider").and_then(Value::as_str), + Some("lm_studio") + ); + + let prompt = post_json_rpc( + &rpc_base, + 362, + "openhuman.inference_prompt", + json!({ + "prompt": "hello", + "max_tokens": 16, + "no_think": true + }), + ) + .await; + let prompt_result = assert_no_jsonrpc_error(&prompt, "inference_prompt"); + assert_eq!( + extract_string_outcome(prompt_result), + "hello from inference namespace" + ); + + let summarize = post_json_rpc( + &rpc_base, + 363, + "openhuman.inference_summarize", + json!({ + "text": "summarize me", + "max_tokens": 16 + }), + ) + .await; + let summarize_result = assert_no_jsonrpc_error(&summarize, "inference_summarize"); + assert_eq!( + extract_string_outcome(summarize_result), + "hello from inference namespace" + ); + + lm_join.abort(); + mock_join.abort(); + rpc_join.abort(); +} + +#[tokio::test] +async fn json_rpc_inference_prompt_requires_external_ollama_runtime_when_unreachable() { + let _env_lock = json_rpc_e2e_env_lock(); + let tmp = tempdir().expect("tempdir"); + let home = tmp.path(); + let openhuman_home = home.join(".openhuman"); + + let _home_guard = EnvVarGuard::set_to_path("HOME", home); + let _workspace_guard = EnvVarGuard::unset("OPENHUMAN_WORKSPACE"); + let _backend_url_guard = EnvVarGuard::unset("BACKEND_URL"); + let _vite_backend_guard = EnvVarGuard::unset("VITE_BACKEND_URL"); + let _tier_guard = EnvVarGuard::unset("OPENHUMAN_LOCAL_AI_TIER"); + let _ollama_url_guard = EnvVarGuard::set("OPENHUMAN_OLLAMA_BASE_URL", "http://127.0.0.1:1"); + + let (mock_addr, mock_join) = serve_on_ephemeral(mock_upstream_router()).await; + let mock_origin = format!("http://{}", mock_addr); + write_min_config(&openhuman_home, &mock_origin); + + let (rpc_addr, rpc_join) = serve_on_ephemeral(build_core_http_router(false)).await; + let rpc_base = format!("http://{}", rpc_addr); + tokio::time::sleep(Duration::from_millis(100)).await; + + let update = post_json_rpc( + &rpc_base, + 364, + "openhuman.config_update_local_ai_settings", + json!({ + "runtime_enabled": true, + "opt_in_confirmed": true, + "provider": "ollama", + "model_id": "gemma3:1b-it-qat", + "chat_model_id": "gemma3:1b-it-qat" + }), + ) + .await; + assert_no_jsonrpc_error(&update, "update_local_ai_settings for unreachable ollama"); + + let prompt = post_json_rpc( + &rpc_base, + 365, + "openhuman.inference_prompt", + json!({ + "prompt": "hello", + "max_tokens": 16, + "no_think": true + }), + ) + .await; + let prompt_err = assert_jsonrpc_error(&prompt, "inference_prompt unreachable ollama"); + assert!( + prompt_err.contains("no longer starts or installs Ollama automatically"), + "unexpected error: {prompt_err}" + ); + + mock_join.abort(); + rpc_join.abort(); +} + // ── Billing & Team E2E tests ────────────────────────────────────────────────── /// End-to-end test for billing RPC methods. From 481482e1738df930966d31463ab466ad8d62b08b Mon Sep 17 00:00:00 2001 From: Steven Enamakel Date: Sat, 16 May 2026 15:31:24 -0700 Subject: [PATCH 02/18] Clarify external Ollama routing errors --- src/openhuman/local_ai/service/public_infer.rs | 12 ++++++++++-- tests/json_rpc_e2e.rs | 6 +++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/openhuman/local_ai/service/public_infer.rs b/src/openhuman/local_ai/service/public_infer.rs index 41a3420f94..ef49c75049 100644 --- a/src/openhuman/local_ai/service/public_infer.rs +++ b/src/openhuman/local_ai/service/public_infer.rs @@ -8,6 +8,14 @@ use crate::openhuman::local_ai::provider::{provider_from_config, LocalAiProvider use super::LocalAiService; +fn external_ollama_request_error(prefix: &str, error: &reqwest::Error) -> String { + let base_url = ollama_base_url(); + format!( + "{prefix}: OpenHuman routes inference through an external Ollama endpoint. \ + Make sure Ollama is already running and reachable at {base_url} ({error})" + ) +} + impl LocalAiService { pub async fn summarize( &self, @@ -259,7 +267,7 @@ impl LocalAiService { .json(&body) .send() .await - .map_err(|e| format!("ollama chat request failed: {e}"))?; + .map_err(|e| external_ollama_request_error("ollama chat request failed", &e))?; if !response.status().is_success() { let status = response.status(); @@ -509,7 +517,7 @@ impl LocalAiService { .json(&body) .send() .await - .map_err(|e| format!("ollama request failed: {e}"))?; + .map_err(|e| external_ollama_request_error("ollama request failed", &e))?; if !response.status().is_success() { let status = response.status(); let body = response.text().await.unwrap_or_default(); diff --git a/tests/json_rpc_e2e.rs b/tests/json_rpc_e2e.rs index 812ed0b728..06e95411ee 100644 --- a/tests/json_rpc_e2e.rs +++ b/tests/json_rpc_e2e.rs @@ -3511,8 +3511,12 @@ async fn json_rpc_inference_prompt_requires_external_ollama_runtime_when_unreach ) .await; let prompt_err = assert_jsonrpc_error(&prompt, "inference_prompt unreachable ollama"); + let prompt_err_message = prompt_err + .get("message") + .and_then(Value::as_str) + .unwrap_or_default(); assert!( - prompt_err.contains("no longer starts or installs Ollama automatically"), + prompt_err_message.contains("routes inference through an external Ollama endpoint"), "unexpected error: {prompt_err}" ); From 56c89f6fc2b0d4e9987336de4e4d0dcc902efd8b Mon Sep 17 00:00:00 2001 From: Steven Enamakel Date: Sat, 16 May 2026 15:41:17 -0700 Subject: [PATCH 03/18] Remove legacy Ollama management RPCs --- .../settings/panels/LocalModelDebugPanel.tsx | 72 ++----------- .../pages/onboarding/steps/LocalAIStep.tsx | 2 +- app/src/services/api/aiSettingsApi.ts | 21 ---- .../utils/__tests__/localAiBootstrap.test.ts | 14 +-- app/src/utils/localAiBootstrap.ts | 17 +-- app/src/utils/tauriCommands/localAi.ts | 47 -------- src/core/jsonrpc_tests.rs | 12 --- src/openhuman/local_ai/README.md | 2 +- src/openhuman/local_ai/ops.rs | 102 ++---------------- src/openhuman/local_ai/schemas.rs | 100 ----------------- src/openhuman/local_ai/schemas_tests.rs | 36 ------- 11 files changed, 21 insertions(+), 404 deletions(-) diff --git a/app/src/components/settings/panels/LocalModelDebugPanel.tsx b/app/src/components/settings/panels/LocalModelDebugPanel.tsx index 638e247040..25a8efcc8d 100644 --- a/app/src/components/settings/panels/LocalModelDebugPanel.tsx +++ b/app/src/components/settings/panels/LocalModelDebugPanel.tsx @@ -17,13 +17,10 @@ import { type LocalAiTtsResult, openhumanLocalAiAssetsStatus, openhumanLocalAiDiagnostics, - openhumanLocalAiDownload, - openhumanLocalAiDownloadAllAssets, openhumanLocalAiDownloadAsset, openhumanLocalAiDownloadsProgress, openhumanLocalAiEmbed, openhumanLocalAiPrompt, - openhumanLocalAiSetOllamaPath, openhumanLocalAiStatus, openhumanLocalAiSummarize, openhumanLocalAiTranscribe, @@ -60,8 +57,6 @@ const LocalModelDebugPanel = () => { const [assets, setAssets] = useState(null); const [downloads, setDownloads] = useState(null); const [statusError, setStatusError] = useState(''); - const [isTriggeringDownload, setIsTriggeringDownload] = useState(false); - const [bootstrapMessage, setBootstrapMessage] = useState(''); const [assetDownloadBusy, setAssetDownloadBusy] = useState>({}); const [summaryInput, setSummaryInput] = useState(''); @@ -97,8 +92,6 @@ const LocalModelDebugPanel = () => { const [diagnosticsError, setDiagnosticsError] = useState(''); const [showErrorDetail, setShowErrorDetail] = useState(false); - const [ollamaPathInput, setOllamaPathInput] = useState(''); - const [isSettingPath, setIsSettingPath] = useState(false); const progress = useMemo(() => { const downloadProgress = progressFromDownloads(downloads); @@ -158,29 +151,6 @@ const LocalModelDebugPanel = () => { }; }, []); - const triggerDownload = async (force: boolean) => { - if (!runtimeEnabled) return; - setIsTriggeringDownload(true); - setStatusError(''); - setBootstrapMessage(''); - try { - await openhumanLocalAiDownload(force); - await openhumanLocalAiDownloadAllAssets(force); - const freshStatus = await openhumanLocalAiStatus(); - setStatus(freshStatus.result); - if (freshStatus.result?.state === 'ready') { - setBootstrapMessage(force ? 'Re-bootstrap complete' : 'Models verified'); - } - setTimeout(() => setBootstrapMessage(''), 3000); - } catch (err) { - const message = - err instanceof Error ? err.message : 'Failed to trigger local model bootstrap'; - setStatusError(message); - } finally { - setIsTriggeringDownload(false); - } - }; - const runSummaryTest = async () => { if (!runtimeEnabled || !summaryInput.trim()) return; setIsSummaryLoading(true); @@ -305,32 +275,6 @@ const LocalModelDebugPanel = () => { } }; - const handleSetOllamaPath = async () => { - setIsSettingPath(true); - setStatusError(''); - try { - await openhumanLocalAiSetOllamaPath(ollamaPathInput); - await loadStatus(); - } catch (err) { - setStatusError(err instanceof Error ? err.message : 'Failed to set Ollama path'); - } finally { - setIsSettingPath(false); - } - }; - - const handleClearOllamaPath = async () => { - setOllamaPathInput(''); - setIsSettingPath(true); - try { - await openhumanLocalAiSetOllamaPath(''); - await loadStatus(); - } catch (err) { - setStatusError(err instanceof Error ? err.message : 'Failed to clear Ollama path'); - } finally { - setIsSettingPath(false); - } - }; - const handleRunDiagnostics = async () => { setIsDiagnosticsLoading(true); setDiagnosticsError(''); @@ -361,25 +305,25 @@ const LocalModelDebugPanel = () => { isDiagnosticsLoading={isDiagnosticsLoading} diagnosticsError={diagnosticsError} statusError={statusError} - isTriggeringDownload={isTriggeringDownload} - bootstrapMessage={bootstrapMessage} + isTriggeringDownload={false} + bootstrapMessage="" progress={progress} isIndeterminateDownload={isIndeterminateDownload} isInstalling={isInstalling} isInstallError={isInstallError} showErrorDetail={showErrorDetail} - ollamaPathInput={ollamaPathInput} - isSettingPath={isSettingPath} + ollamaPathInput="" + isSettingPath={false} downloadedText={downloadedText} speedText={speedText} etaText={etaText} statusTone={statusTone} runtimeEnabled={runtimeEnabled} onRefreshStatus={() => void loadStatus()} - onTriggerDownload={force => void triggerDownload(force)} - onSetOllamaPath={() => void handleSetOllamaPath()} - onClearOllamaPath={() => void handleClearOllamaPath()} - onSetOllamaPathInput={setOllamaPathInput} + onTriggerDownload={() => {}} + onSetOllamaPath={() => {}} + onClearOllamaPath={() => {}} + onSetOllamaPathInput={() => {}} onToggleErrorDetail={() => setShowErrorDetail(v => !v)} onRunDiagnostics={() => void handleRunDiagnostics()} /> diff --git a/app/src/pages/onboarding/steps/LocalAIStep.tsx b/app/src/pages/onboarding/steps/LocalAIStep.tsx index 3972ede2e0..8f324ad9cd 100644 --- a/app/src/pages/onboarding/steps/LocalAIStep.tsx +++ b/app/src/pages/onboarding/steps/LocalAIStep.tsx @@ -167,7 +167,7 @@ const LocalAIStep = ({ onNext, onBack: _onBack, onDownloadError }: LocalAIStepPr type="button" onClick={handleConsent} className="mt-3 w-full text-center text-xs text-stone-400 hover:text-stone-600 transition-colors"> - Use local AI instead (install Ollama now) + Use local AI instead (connect Ollama now) ); diff --git a/app/src/services/api/aiSettingsApi.ts b/app/src/services/api/aiSettingsApi.ts index 5a065b5244..7a5403c2ca 100644 --- a/app/src/services/api/aiSettingsApi.ts +++ b/app/src/services/api/aiSettingsApi.ts @@ -38,10 +38,7 @@ import { type ModelPresetResult, openhumanLocalAiApplyPreset, openhumanLocalAiDiagnostics, - openhumanLocalAiDownload, openhumanLocalAiPresets, - openhumanLocalAiSetOllamaPath, - openhumanLocalAiShutdownOwned, openhumanLocalAiStatus, type PresetsResponse, } from '../../utils/tauriCommands/localAi'; @@ -319,28 +316,10 @@ export async function setLocalRuntimeEnabled(enabled: boolean): Promise { await openhumanUpdateLocalAiSettings({ runtime_enabled: enabled, opt_in_confirmed: enabled }); } -/** - * Set / clear the user-configured Ollama binary path. - */ -export async function setLocalOllamaPath(path: string): Promise { - await openhumanLocalAiSetOllamaPath(path); -} - -/** - * Gate off the local-AI runtime. - */ -export async function shutdownLocalProvider(): Promise { - await setLocalRuntimeEnabled(false); - await openhumanLocalAiShutdownOwned(); -} - /** Convenience helpers re-exported so the panel imports from one place. */ export const localProvider = { applyPreset: (tier: string) => openhumanLocalAiApplyPreset(tier), - download: (retry: boolean) => openhumanLocalAiDownload(retry), setEnabled: (enabled: boolean) => setLocalRuntimeEnabled(enabled), - setBinaryPath: (path: string) => setLocalOllamaPath(path), - shutdown: () => shutdownLocalProvider(), }; export type { ModelPresetResult }; diff --git a/app/src/utils/__tests__/localAiBootstrap.test.ts b/app/src/utils/__tests__/localAiBootstrap.test.ts index 061534ed9b..a89e774b4a 100644 --- a/app/src/utils/__tests__/localAiBootstrap.test.ts +++ b/app/src/utils/__tests__/localAiBootstrap.test.ts @@ -7,7 +7,6 @@ import { vi.mock('../tauriCommands', () => ({ openhumanLocalAiApplyPreset: vi.fn(), - openhumanLocalAiDownloadAllAssets: vi.fn(), openhumanLocalAiPresets: vi.fn(), })); @@ -16,7 +15,7 @@ describe('localAiBootstrap', () => { vi.clearAllMocks(); }); - it('applies the recommended preset before starting background downloads when no tier is selected', async () => { + it('applies the recommended preset when no tier is selected', async () => { const tauriCommands = await import('../tauriCommands'); vi.mocked(tauriCommands.openhumanLocalAiPresets).mockResolvedValue({ presets: [], @@ -40,21 +39,10 @@ describe('localAiBootstrap', () => { embedding_model_id: 'all-minilm:latest', quantization: 'qat', }); - vi.mocked(tauriCommands.openhumanLocalAiDownloadAllAssets).mockResolvedValue({ - result: { state: 'downloading', progress: 0 } as never, - logs: [], - }); - const result = await bootstrapLocalAiWithRecommendedPreset(false, '[test]'); expect(tauriCommands.openhumanLocalAiPresets).toHaveBeenCalledOnce(); expect(tauriCommands.openhumanLocalAiApplyPreset).toHaveBeenCalledWith('ram_2_4gb'); - expect(tauriCommands.openhumanLocalAiDownloadAllAssets).toHaveBeenCalledWith(false); - expect( - vi.mocked(tauriCommands.openhumanLocalAiApplyPreset).mock.invocationCallOrder[0] - ).toBeLessThan( - vi.mocked(tauriCommands.openhumanLocalAiDownloadAllAssets).mock.invocationCallOrder[0] - ); expect(result.preset.hadSelectedTier).toBe(false); expect(result.preset.appliedTier).toBe('ram_2_4gb'); }); diff --git a/app/src/utils/localAiBootstrap.ts b/app/src/utils/localAiBootstrap.ts index fccb7f3423..ba6d4f9b2e 100644 --- a/app/src/utils/localAiBootstrap.ts +++ b/app/src/utils/localAiBootstrap.ts @@ -1,6 +1,5 @@ import { openhumanLocalAiApplyPreset, - openhumanLocalAiDownloadAllAssets, openhumanLocalAiPresets, type PresetsResponse, } from './tauriCommands'; @@ -95,23 +94,11 @@ export const ensureRecommendedLocalAiPresetIfNeeded = async ( }; }; -export const triggerLocalAiAssetBootstrap = async ( - force = false, - logPrefix = '[local-ai-bootstrap]' -) => { - console.debug(`${logPrefix} triggering local AI background bootstrap`, JSON.stringify({ force })); - return await retryLocalAiCommand( - force ? 're-bootstrap local AI assets' : 'bootstrap local AI assets', - () => openhumanLocalAiDownloadAllAssets(force), - logPrefix - ); -}; - export const bootstrapLocalAiWithRecommendedPreset = async ( force = false, logPrefix = '[local-ai-bootstrap]' ) => { + void force; const preset = await ensureRecommendedLocalAiPresetIfNeeded(logPrefix); - const download = await triggerLocalAiAssetBootstrap(force, logPrefix); - return { preset, download }; + return { preset }; }; diff --git a/app/src/utils/tauriCommands/localAi.ts b/app/src/utils/tauriCommands/localAi.ts index 432de5f6f0..a49730eec6 100644 --- a/app/src/utils/tauriCommands/localAi.ts +++ b/app/src/utils/tauriCommands/localAi.ts @@ -261,32 +261,6 @@ export async function openhumanLocalAiStatus(): Promise> { - try { - return await callCoreRpc>({ - method: 'openhuman.local_ai_download', - params: { force: force ?? false }, - }); - } catch (err) { - const message = tauriErrorMessage(err); - if (message.includes('unknown method: openhuman.local_ai_download')) { - return await openhumanLocalAiStatus(); - } - throw new Error(message); - } -} - -export async function openhumanLocalAiDownloadAllAssets( - force?: boolean -): Promise> { - return await callCoreRpc>({ - method: 'openhuman.local_ai_download_all_assets', - params: { force: force ?? false }, - }); -} - export async function openhumanLocalAiSummarize( text: string, maxTokens?: number @@ -468,24 +442,3 @@ export async function openhumanLocalAiDiagnostics(): Promise params: {}, }); } - -export async function openhumanLocalAiSetOllamaPath( - path: string -): Promise<{ ollama_binary_path: string | null; status: LocalAiStatus }> { - return await callCoreRpc<{ ollama_binary_path: string | null; status: LocalAiStatus }>({ - method: 'openhuman.local_ai_set_ollama_path', - params: { path }, - }); -} - -/** - * Gate off the local-AI runtime: kills the Ollama daemon only if OpenHuman - * spawned it (external daemons are left running), and forces status to - * `"disabled"` so the UI flips immediately. - */ -export async function openhumanLocalAiShutdownOwned(): Promise> { - return await callCoreRpc>({ - method: 'openhuman.local_ai_shutdown_owned', - params: {}, - }); -} diff --git a/src/core/jsonrpc_tests.rs b/src/core/jsonrpc_tests.rs index ad2433eb8e..fdd7856deb 100644 --- a/src/core/jsonrpc_tests.rs +++ b/src/core/jsonrpc_tests.rs @@ -281,18 +281,6 @@ async fn invoke_migrate_openclaw_rejects_unknown_param() { assert!(err.contains("unknown param 'x'")); } -#[tokio::test] -async fn invoke_local_ai_download_asset_missing_required_param_fails_validation() { - let err = invoke_method( - default_state(), - "openhuman.local_ai_download_asset", - json!({}), - ) - .await - .expect_err("missing capability should fail"); - assert!(err.contains("missing required param 'capability'")); -} - #[test] fn http_schema_dump_includes_openhuman_and_core_methods() { let dump = build_http_schema_dump(); diff --git a/src/openhuman/local_ai/README.md b/src/openhuman/local_ai/README.md index 2490c5463a..8a60bfce90 100644 --- a/src/openhuman/local_ai/README.md +++ b/src/openhuman/local_ai/README.md @@ -13,7 +13,7 @@ On-device inference stack. Owns the bundled Ollama runtime, LM Studio local-serv - `pub struct GifDecision` / `pub struct TenorGifResult` / `pub struct TenorSearchResult` — `gif_decision.rs`. - Status / progress / result types: `pub struct LocalAiStatus`, `LocalAiAssetStatus`, `LocalAiAssetsStatus`, `LocalAiDownloadProgressItem`, `LocalAiDownloadsProgress`, `LocalAiEmbeddingResult`, `LocalAiSpeechResult`, `LocalAiTtsResult` — `types.rs`. - `pub mod ops` (re-exported as `rpc`) — `ops.rs` — typed Rust wrappers around each capability (`agent_chat`, `agent_chat_simple`, `summarize`, `prompt`, `vision_prompt`, `embed`, `transcribe`, `tts`, `should_react`, `analyze_sentiment`, `should_send_gif`, `tenor_search`). -- RPC `local_ai.{agent_chat, agent_chat_simple, local_ai_status, local_ai_download, local_ai_download_all_assets, local_ai_summarize, local_ai_prompt, local_ai_vision_prompt, local_ai_embed, local_ai_transcribe, local_ai_transcribe_bytes, local_ai_tts, local_ai_assets_status, local_ai_downloads_progress, local_ai_download_asset, local_ai_device_profile, local_ai_presets, local_ai_apply_preset, local_ai_diagnostics, local_ai_set_ollama_path, local_ai_chat, local_ai_should_react, local_ai_analyze_sentiment, local_ai_should_send_gif, local_ai_tenor_search}` — `schemas.rs`. +- RPC `local_ai.{agent_chat, agent_chat_simple, local_ai_status, local_ai_summarize, local_ai_prompt, local_ai_vision_prompt, local_ai_embed, local_ai_transcribe, local_ai_transcribe_bytes, local_ai_tts, local_ai_assets_status, local_ai_downloads_progress, local_ai_download_asset, local_ai_device_profile, local_ai_presets, local_ai_apply_preset, local_ai_diagnostics, local_ai_chat, local_ai_should_react, local_ai_analyze_sentiment, local_ai_should_send_gif, local_ai_tenor_search}` — `schemas.rs`. ## Calls into diff --git a/src/openhuman/local_ai/ops.rs b/src/openhuman/local_ai/ops.rs index 35c2e500cf..24de41a222 100644 --- a/src/openhuman/local_ai/ops.rs +++ b/src/openhuman/local_ai/ops.rs @@ -153,85 +153,6 @@ pub async fn local_ai_status( )) } -/// Stop the local-AI runtime, killing the Ollama daemon ONLY if OpenHuman -/// spawned it, and shift any workload routed to `ollama:` back to -/// `"cloud"` (= primary). -/// -/// Three coordinated effects: -/// -/// 1. **Daemon shutdown** — `shutdown_owned_ollama` kills the child process -/// only when the spawn marker matches. External daemons (system service, -/// user-launched `ollama serve`, daemons from another OpenHuman workspace) -/// are left untouched, per the same friendly-fire-avoidance rule -/// `ensure_ollama_server` follows at startup. -/// -/// 2. **Routing shift** — every `*_provider` field starting with `ollama:` -/// is cleared (set to `None`, which resolves to `"cloud"` at the factory). -/// Without this, the next chat call routed to `reasoning` (or any other -/// workload the user had set to `ollama:`) would fail at factory -/// build time. The shift is one-way: re-enabling local AI does NOT -/// restore the previous Ollama routes — the user re-picks. -/// -/// 3. **Status forced to disabled** so the UI reflects the gate immediately. -pub async fn local_ai_shutdown_owned( - config: &mut Config, -) -> Result, String> { - let _ = config; - Err("OpenHuman does not manage the Ollama process anymore. Stop or restart your external Ollama runtime directly.".to_string()) -} - -/// Clear every per-workload `*_provider` field whose stored value starts -/// with `"ollama:"`. Returns the count of fields actually changed so the -/// caller can decide whether to persist. -fn clear_ollama_workload_routes(config: &mut Config) -> usize { - fn clear_if_ollama(field: &mut Option) -> bool { - let is_ollama = field - .as_deref() - .map(|s| s.trim().starts_with("ollama:")) - .unwrap_or(false); - if is_ollama { - *field = None; - true - } else { - false - } - } - let mut changed = 0; - for field in [ - &mut config.reasoning_provider, - &mut config.agentic_provider, - &mut config.coding_provider, - &mut config.memory_provider, - &mut config.embeddings_provider, - &mut config.heartbeat_provider, - &mut config.learning_provider, - &mut config.subconscious_provider, - ] { - if clear_if_ollama(field) { - changed += 1; - } - } - changed -} - -/// Triggers a full download of all required local AI models. -pub async fn local_ai_download( - config: &Config, - force: bool, -) -> Result, String> { - let _ = (config, force); - Err("OpenHuman no longer downloads or starts Ollama for you. Start your external Ollama runtime and pull models yourself.".to_string()) -} - -/// Triggers a download of all local AI assets and returns progress information. -pub async fn local_ai_download_all_assets( - config: &Config, - force: bool, -) -> Result, String> { - let _ = (config, force); - Err("OpenHuman no longer downloads Ollama assets. Start your external Ollama runtime and manage model pulls yourself.".to_string()) -} - /// Generates a summary of the provided text using local AI models. pub async fn local_ai_summarize( config: &Config, @@ -422,21 +343,14 @@ pub async fn local_ai_download_asset( config: &Config, capability: &str, ) -> Result, String> { - let capability = capability.trim().to_ascii_lowercase(); - if matches!(capability.as_str(), "stt" | "tts") { - let service = local_ai::global(config); - let output = service - .download_asset(config, capability.as_str()) - .await - .map_err(|e| e.to_string())?; - return Ok(RpcOutcome::single_log( - output, - "local ai voice asset download triggered", - )); - } - - Err(format!( - "OpenHuman no longer downloads `{capability}` via Ollama. Start your external Ollama runtime and pull that model yourself." + let service = local_ai::global(config); + let output = service + .download_asset(config, capability.trim()) + .await + .map_err(|e| e.to_string())?; + Ok(RpcOutcome::single_log( + output, + "local ai voice asset download triggered", )) } diff --git a/src/openhuman/local_ai/schemas.rs b/src/openhuman/local_ai/schemas.rs index d2b9bb5b45..473d01ccd8 100644 --- a/src/openhuman/local_ai/schemas.rs +++ b/src/openhuman/local_ai/schemas.rs @@ -14,11 +14,6 @@ struct AgentChatParams { temperature: Option, } -#[derive(Debug, Deserialize)] -struct LocalAiDownloadParams { - force: Option, -} - #[derive(Debug, Deserialize)] struct LocalAiSummarizeParams { text: String, @@ -71,11 +66,6 @@ struct LocalAiApplyPresetParams { tier: String, } -#[derive(Debug, Deserialize)] -struct LocalAiSetOllamaPathParams { - path: String, -} - #[derive(Debug, Deserialize)] struct LocalAiChatMessageParam { role: String, @@ -138,9 +128,6 @@ pub fn all_controller_schemas() -> Vec { schemas("agent_chat"), schemas("agent_chat_simple"), schemas("local_ai_status"), - schemas("local_ai_shutdown_owned"), - schemas("local_ai_download"), - schemas("local_ai_download_all_assets"), schemas("local_ai_summarize"), schemas("local_ai_prompt"), schemas("local_ai_vision_prompt"), @@ -154,7 +141,6 @@ pub fn all_controller_schemas() -> Vec { schemas("local_ai_device_profile"), schemas("local_ai_presets"), schemas("local_ai_apply_preset"), - schemas("local_ai_set_ollama_path"), schemas("local_ai_diagnostics"), schemas("local_ai_chat"), schemas("local_ai_should_react"), @@ -182,18 +168,6 @@ pub fn all_registered_controllers() -> Vec { schema: schemas("local_ai_status"), handler: handle_local_ai_status, }, - RegisteredController { - schema: schemas("local_ai_shutdown_owned"), - handler: handle_local_ai_shutdown_owned, - }, - RegisteredController { - schema: schemas("local_ai_download"), - handler: handle_local_ai_download, - }, - RegisteredController { - schema: schemas("local_ai_download_all_assets"), - handler: handle_local_ai_download_all_assets, - }, RegisteredController { schema: schemas("local_ai_summarize"), handler: handle_local_ai_summarize, @@ -246,10 +220,6 @@ pub fn all_registered_controllers() -> Vec { schema: schemas("local_ai_apply_preset"), handler: handle_local_ai_apply_preset, }, - RegisteredController { - schema: schemas("local_ai_set_ollama_path"), - handler: handle_local_ai_set_ollama_path, - }, RegisteredController { schema: schemas("local_ai_diagnostics"), handler: handle_local_ai_diagnostics, @@ -324,30 +294,6 @@ pub fn schemas(function: &str) -> ControllerSchema { inputs: vec![], outputs: vec![json_output("status", "Local AI status payload.")], }, - "local_ai_shutdown_owned" => ControllerSchema { - namespace: "local_ai", - function: "shutdown_owned", - description: - "Gate off the local AI runtime. Kills the Ollama daemon only \ - if OpenHuman spawned it (external daemons are left running). \ - Forces status to \"disabled\" so the UI flips immediately.", - inputs: vec![], - outputs: vec![json_output("status", "Local AI status after shutdown.")], - }, - "local_ai_download" => ControllerSchema { - namespace: "local_ai", - function: "download", - description: "Trigger local AI model download bootstrap.", - inputs: vec![optional_bool("force", "Reset state before download.")], - outputs: vec![json_output("status", "Local AI status payload.")], - }, - "local_ai_download_all_assets" => ControllerSchema { - namespace: "local_ai", - function: "download_all_assets", - description: "Trigger full local AI asset download.", - inputs: vec![optional_bool("force", "Reset state before download.")], - outputs: vec![json_output("progress", "Download progress payload.")], - }, "local_ai_summarize" => ControllerSchema { namespace: "local_ai", function: "summarize", @@ -488,13 +434,6 @@ pub fn schemas(function: &str) -> ControllerSchema { inputs: vec![], outputs: vec![json_output("diagnostics", "Diagnostic report.")], }, - "local_ai_set_ollama_path" => ControllerSchema { - namespace: "local_ai", - function: "set_ollama_path", - description: "Set a custom Ollama binary path, persist to config, and trigger re-bootstrap.", - inputs: vec![required_string("path", "Absolute path to Ollama binary. Empty string to clear.")], - outputs: vec![json_output("result", "Updated status.")], - }, "local_ai_chat" => ControllerSchema { namespace: "local_ai", function: "chat", @@ -649,38 +588,6 @@ fn handle_local_ai_status(_params: Map) -> ControllerFuture { }) } -fn handle_local_ai_shutdown_owned(_params: Map) -> ControllerFuture { - Box::pin(async move { - let mut config = config_rpc::load_config_with_timeout().await?; - to_json(crate::openhuman::local_ai::rpc::local_ai_shutdown_owned(&mut config).await?) - }) -} - -fn handle_local_ai_download(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::rpc::local_ai_download(&config, p.force.unwrap_or(false)) - .await?, - ) - }) -} - -fn handle_local_ai_download_all_assets(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::rpc::local_ai_download_all_assets( - &config, - p.force.unwrap_or(false), - ) - .await?, - ) - }) -} - fn handle_local_ai_summarize(params: Map) -> ControllerFuture { Box::pin(async move { let p = deserialize_params::(params)?; @@ -923,13 +830,6 @@ fn handle_local_ai_diagnostics(_params: Map) -> ControllerFuture }) } -fn handle_local_ai_set_ollama_path(params: Map) -> ControllerFuture { - Box::pin(async move { - let _ = deserialize_params::(params)?; - Err("OpenHuman no longer manages an Ollama binary path. Point your inference setup at an already-running Ollama-compatible endpoint instead.".to_string()) - }) -} - fn handle_local_ai_should_react(params: Map) -> ControllerFuture { Box::pin(async move { let p = deserialize_params::(params)?; diff --git a/src/openhuman/local_ai/schemas_tests.rs b/src/openhuman/local_ai/schemas_tests.rs index b07e60568e..07ccc76481 100644 --- a/src/openhuman/local_ai/schemas_tests.rs +++ b/src/openhuman/local_ai/schemas_tests.rs @@ -30,8 +30,6 @@ fn every_registered_key_resolves_to_non_unknown_schema() { "agent_chat", "agent_chat_simple", "local_ai_status", - "local_ai_download", - "local_ai_download_all_assets", "local_ai_summarize", "local_ai_prompt", "local_ai_vision_prompt", @@ -45,7 +43,6 @@ fn every_registered_key_resolves_to_non_unknown_schema() { "local_ai_device_profile", "local_ai_presets", "local_ai_apply_preset", - "local_ai_set_ollama_path", "local_ai_diagnostics", "local_ai_chat", "local_ai_should_react", @@ -237,39 +234,6 @@ async fn handle_apply_preset_accepts_valid_tier_and_persists() { assert!(result.get("chat_model_id").is_some()); } -#[tokio::test] -async fn handle_set_ollama_path_reports_external_runtime_contract() { - let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - let tmp = TempDir::new().unwrap(); - unsafe { - std::env::set_var("OPENHUMAN_WORKSPACE", tmp.path()); - } - let params = Map::from_iter([( - "path".to_string(), - serde_json::json!("/this/path/should/not/exist/ollama"), - )]); - let err = handle_local_ai_set_ollama_path(params).await.unwrap_err(); - unsafe { - std::env::remove_var("OPENHUMAN_WORKSPACE"); - } - assert!(err.contains("no longer manages an Ollama binary path")); -} - -#[tokio::test] -async fn handle_set_ollama_path_rejects_empty_string_too() { - let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - let tmp = TempDir::new().unwrap(); - unsafe { - std::env::set_var("OPENHUMAN_WORKSPACE", tmp.path()); - } - let params = Map::from_iter([("path".to_string(), serde_json::json!(""))]); - let err = handle_local_ai_set_ollama_path(params).await.unwrap_err(); - unsafe { - std::env::remove_var("OPENHUMAN_WORKSPACE"); - } - assert!(err.contains("no longer manages an Ollama binary path")); -} - /// Regression test for the CodeRabbit #7 race on PR #1755: when two /// concurrent RPC calls (e.g. a double-click, or the auto-install firing /// alongside a manual click) hit `handle_local_ai_install_whisper` at From 8223b5c591b63c314fb90f2a71e8a1bb2b960760 Mon Sep 17 00:00:00 2001 From: Steven Enamakel Date: Sat, 16 May 2026 15:58:09 -0700 Subject: [PATCH 04/18] Add direct runtime inference coverage --- .../local-model/ModelDownloadSection.test.tsx | 53 +++++++++++++++++++ .../local_ai/service/public_infer_tests.rs | 20 +++++++ 2 files changed, 73 insertions(+) diff --git a/app/src/components/settings/panels/local-model/ModelDownloadSection.test.tsx b/app/src/components/settings/panels/local-model/ModelDownloadSection.test.tsx index 1e377919bf..47e89f7607 100644 --- a/app/src/components/settings/panels/local-model/ModelDownloadSection.test.tsx +++ b/app/src/components/settings/panels/local-model/ModelDownloadSection.test.tsx @@ -64,4 +64,57 @@ describe('ModelDownloadSection runtime gate', () => { expect(props.onRunSummaryTest).not.toHaveBeenCalled(); expect(props.onRunPromptTest).not.toHaveBeenCalled(); }); + + it('shows external-runtime guidance for ollama-backed assets', () => { + render( + + ); + + expect( + screen.getAllByText('Manage this model in your external runtime.').length + ).toBeGreaterThan(0); + expect(screen.getAllByRole('button', { name: 'Download' }).length).toBeGreaterThan(0); + }); }); diff --git a/src/openhuman/local_ai/service/public_infer_tests.rs b/src/openhuman/local_ai/service/public_infer_tests.rs index 20c95ae683..da6f77f188 100644 --- a/src/openhuman/local_ai/service/public_infer_tests.rs +++ b/src/openhuman/local_ai/service/public_infer_tests.rs @@ -96,6 +96,26 @@ async fn inference_errors_on_non_success_status() { } } +#[tokio::test] +async fn inference_connection_failure_mentions_external_ollama_runtime() { + let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + + unsafe { + std::env::set_var("OPENHUMAN_OLLAMA_BASE_URL", "http://127.0.0.1:1"); + } + + let config = enabled_config(); + let service = ready_service(&config); + let err = service.prompt(&config, "hi", None, true).await.unwrap_err(); + + unsafe { + std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL"); + } + + assert!(err.contains("external Ollama endpoint"), "unexpected error: {err}"); + assert!(err.contains("already running"), "unexpected error: {err}"); +} + #[tokio::test] async fn inference_errors_on_empty_response_when_allow_empty_false() { let _guard = crate::openhuman::local_ai::local_ai_test_guard(); From b1abf16b9232f5aa1df112cfd0a6baac82278695 Mon Sep 17 00:00:00 2001 From: Steven Enamakel Date: Sat, 16 May 2026 15:59:48 -0700 Subject: [PATCH 05/18] Expand local model UI coverage --- .../DeviceCapabilitySection.test.tsx | 87 ++++++++++++++++++ .../api/__tests__/aiSettingsApi.test.ts | 89 +++++++++++++++++-- 2 files changed, 168 insertions(+), 8 deletions(-) create mode 100644 app/src/components/settings/panels/local-model/DeviceCapabilitySection.test.tsx diff --git a/app/src/components/settings/panels/local-model/DeviceCapabilitySection.test.tsx b/app/src/components/settings/panels/local-model/DeviceCapabilitySection.test.tsx new file mode 100644 index 0000000000..bd80d55267 --- /dev/null +++ b/app/src/components/settings/panels/local-model/DeviceCapabilitySection.test.tsx @@ -0,0 +1,87 @@ +import { fireEvent, render, screen, waitFor } from '@testing-library/react'; +import { describe, expect, it, vi, beforeEach } from 'vitest'; + +import DeviceCapabilitySection from './DeviceCapabilitySection'; + +const mockApplyPreset = vi.fn(); + +vi.mock('../../../../utils/tauriCommands', () => ({ + openhumanLocalAiApplyPreset: (...args: unknown[]) => mockApplyPreset(...args), +})); + +const makePresetsData = (overrides: Record = {}) => ({ + presets: [ + { + tier: 'ram_2_4gb', + label: '2-4 GB', + description: 'Small local tier', + chat_model_id: 'gemma3:1b-it-qat', + vision_model_id: '', + embedding_model_id: 'bge-m3', + quantization: 'q4', + vision_mode: 'disabled', + supports_screen_summary: false, + target_ram_gb: 4, + min_ram_gb: 2, + approx_download_gb: 1.2, + }, + ], + recommended_tier: 'ram_2_4gb', + current_tier: 'ram_2_4gb', + selected_tier: 'ram_2_4gb', + recommend_disabled: false, + local_ai_enabled: true, + device: { + total_ram_bytes: 16 * 1024 * 1024 * 1024, + cpu_count: 8, + cpu_brand: 'Test CPU', + os_name: 'macOS', + os_version: '15', + has_gpu: true, + gpu_description: 'Test GPU', + }, + ...overrides, +}); + +describe('DeviceCapabilitySection', () => { + beforeEach(() => { + mockApplyPreset.mockReset(); + }); + + it('renders external runtime guidance when ollama is unavailable', () => { + render( + '16 GB'} + ollamaAvailable={false} + /> + ); + + expect(screen.getByText(/Run Ollama first/i)).toBeTruthy(); + expect(screen.getByRole('link', { name: 'Ollama docs' })).toBeTruthy(); + expect(screen.getByTitle('Run Ollama first to use this tier')).toBeTruthy(); + }); + + it('allows selecting the disabled cloud fallback tier', async () => { + mockApplyPreset.mockResolvedValueOnce({ applied_tier: 'disabled' }); + + render( + '16 GB'} + /> + ); + + fireEvent.click(screen.getByRole('button', { name: /Disabled.*0 GB/i })); + + await waitFor(() => { + expect(mockApplyPreset).toHaveBeenCalledWith('disabled'); + }); + }); +}); diff --git a/app/src/services/api/__tests__/aiSettingsApi.test.ts b/app/src/services/api/__tests__/aiSettingsApi.test.ts index a9d9ed5ca9..be7f565e69 100644 --- a/app/src/services/api/__tests__/aiSettingsApi.test.ts +++ b/app/src/services/api/__tests__/aiSettingsApi.test.ts @@ -12,11 +12,14 @@ import { type AISettings, clearCloudProviderKey, listProviderModels, + loadLocalProviderSnapshot, + localProvider, loadAISettings, parseProviderString, type ProviderRef, saveAISettings, serializeProviderRef, + setLocalRuntimeEnabled, setCloudProviderKey, } from '../aiSettingsApi'; @@ -25,10 +28,15 @@ import { const mockOpenhumanGetClientConfig = vi.fn(); const mockAuthListProviderCredentials = vi.fn(); const mockOpenhumanUpdateModelSettings = vi.fn(); +const mockOpenhumanUpdateLocalAiSettings = vi.fn(); const mockAuthStoreProviderCredentials = vi.fn(); const mockAuthRemoveProviderCredentials = vi.fn(); const mockCallCoreRpc = vi.fn(); const mockIsTauri = vi.fn(() => true); +const mockOpenhumanLocalAiStatus = vi.fn(); +const mockOpenhumanLocalAiDiagnostics = vi.fn(); +const mockOpenhumanLocalAiPresets = vi.fn(); +const mockOpenhumanLocalAiApplyPreset = vi.fn(); vi.mock('../../coreRpcClient', () => ({ callCoreRpc: (a: unknown) => mockCallCoreRpc(a) })); @@ -46,17 +54,14 @@ vi.mock('../../../utils/tauriCommands/auth', () => ({ vi.mock('../../../utils/tauriCommands/config', () => ({ openhumanGetClientConfig: () => mockOpenhumanGetClientConfig(), openhumanUpdateModelSettings: (a: unknown) => mockOpenhumanUpdateModelSettings(a), - openhumanUpdateLocalAiSettings: vi.fn().mockResolvedValue({ result: {} }), + openhumanUpdateLocalAiSettings: (a: unknown) => mockOpenhumanUpdateLocalAiSettings(a), })); vi.mock('../../../utils/tauriCommands/localAi', () => ({ - openhumanLocalAiStatus: vi.fn().mockResolvedValue({ result: null }), - openhumanLocalAiDiagnostics: vi.fn().mockResolvedValue(null), - openhumanLocalAiPresets: vi.fn().mockResolvedValue(null), - openhumanLocalAiApplyPreset: vi.fn().mockResolvedValue({}), - openhumanLocalAiDownload: vi.fn().mockResolvedValue({}), - openhumanLocalAiSetOllamaPath: vi.fn().mockResolvedValue({}), - openhumanLocalAiShutdownOwned: vi.fn().mockResolvedValue({}), + openhumanLocalAiStatus: (...args: unknown[]) => mockOpenhumanLocalAiStatus(...args), + openhumanLocalAiDiagnostics: (...args: unknown[]) => mockOpenhumanLocalAiDiagnostics(...args), + openhumanLocalAiPresets: (...args: unknown[]) => mockOpenhumanLocalAiPresets(...args), + openhumanLocalAiApplyPreset: (...args: unknown[]) => mockOpenhumanLocalAiApplyPreset(...args), })); // ─── Helpers ───────────────────────────────────────────────────────────────── @@ -174,6 +179,11 @@ describe('loadAISettings', () => { beforeEach(() => { mockOpenhumanGetClientConfig.mockReset(); mockAuthListProviderCredentials.mockReset(); + mockOpenhumanUpdateLocalAiSettings.mockReset(); + mockOpenhumanLocalAiStatus.mockReset(); + mockOpenhumanLocalAiDiagnostics.mockReset(); + mockOpenhumanLocalAiPresets.mockReset(); + mockOpenhumanLocalAiApplyPreset.mockReset(); }); it('returns cloudProviders with has_api_key=false when no profiles stored', async () => { @@ -362,6 +372,69 @@ describe('loadAISettings', () => { }); }); +describe('local provider facade', () => { + beforeEach(() => { + mockOpenhumanUpdateLocalAiSettings.mockReset(); + mockOpenhumanLocalAiStatus.mockReset(); + mockOpenhumanLocalAiDiagnostics.mockReset(); + mockOpenhumanLocalAiPresets.mockReset(); + mockOpenhumanLocalAiApplyPreset.mockReset(); + }); + + it('loadLocalProviderSnapshot joins status diagnostics and presets', async () => { + mockOpenhumanLocalAiStatus.mockResolvedValue({ result: { state: 'ready' } }); + mockOpenhumanLocalAiDiagnostics.mockResolvedValue({ + installed_models: [{ name: 'gemma3:1b-it-qat', size: 123 }], + }); + mockOpenhumanLocalAiPresets.mockResolvedValue({ + recommended_tier: 'ram_2_4gb', + current_tier: 'ram_2_4gb', + selected_tier: 'ram_2_4gb', + presets: [], + device: { + total_ram_bytes: 1, + cpu_count: 1, + cpu_brand: 'cpu', + os_name: 'os', + os_version: '1', + has_gpu: false, + gpu_description: null, + }, + }); + + const snapshot = await loadLocalProviderSnapshot(); + + expect(snapshot.status).toEqual({ state: 'ready' }); + expect(snapshot.installedModels).toEqual([{ name: 'gemma3:1b-it-qat', size: 123 }]); + expect(snapshot.presets?.recommended_tier).toBe('ram_2_4gb'); + }); + + it('setLocalRuntimeEnabled updates runtime_enabled and opt_in_confirmed together', async () => { + mockOpenhumanUpdateLocalAiSettings.mockResolvedValue({ result: {} }); + + await setLocalRuntimeEnabled(true); + + expect(mockOpenhumanUpdateLocalAiSettings).toHaveBeenCalledWith({ + runtime_enabled: true, + opt_in_confirmed: true, + }); + }); + + it('localProvider facade delegates applyPreset and setEnabled', async () => { + mockOpenhumanLocalAiApplyPreset.mockResolvedValue({ applied_tier: 'ram_2_4gb' }); + mockOpenhumanUpdateLocalAiSettings.mockResolvedValue({ result: {} }); + + await localProvider.applyPreset('ram_2_4gb'); + await localProvider.setEnabled(false); + + expect(mockOpenhumanLocalAiApplyPreset).toHaveBeenCalledWith('ram_2_4gb'); + expect(mockOpenhumanUpdateLocalAiSettings).toHaveBeenCalledWith({ + runtime_enabled: false, + opt_in_confirmed: false, + }); + }); +}); + // ─── saveAISettings ────────────────────────────────────────────────────────── describe('saveAISettings', () => { From 12bed1a522dec88b292f509796eee6e76d75add6 Mon Sep 17 00:00:00 2001 From: Steven Enamakel Date: Sat, 16 May 2026 17:24:58 -0700 Subject: [PATCH 06/18] test: polish direct runtime coverage --- .../DeviceCapabilitySection.test.tsx | 2 +- .../local-model/DeviceCapabilitySection.tsx | 8 +++---- .../local-model/ModelDownloadSection.test.tsx | 24 +++---------------- .../local-model/ModelStatusSection.test.tsx | 5 +--- .../panels/local-model/ModelStatusSection.tsx | 3 +-- .../api/__tests__/aiSettingsApi.test.ts | 4 ++-- .../local_ai/service/public_infer_tests.rs | 5 +++- 7 files changed, 16 insertions(+), 35 deletions(-) diff --git a/app/src/components/settings/panels/local-model/DeviceCapabilitySection.test.tsx b/app/src/components/settings/panels/local-model/DeviceCapabilitySection.test.tsx index bd80d55267..f9434c5309 100644 --- a/app/src/components/settings/panels/local-model/DeviceCapabilitySection.test.tsx +++ b/app/src/components/settings/panels/local-model/DeviceCapabilitySection.test.tsx @@ -1,5 +1,5 @@ import { fireEvent, render, screen, waitFor } from '@testing-library/react'; -import { describe, expect, it, vi, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; import DeviceCapabilitySection from './DeviceCapabilitySection'; diff --git a/app/src/components/settings/panels/local-model/DeviceCapabilitySection.tsx b/app/src/components/settings/panels/local-model/DeviceCapabilitySection.tsx index 8fdf648319..de95ccb127 100644 --- a/app/src/components/settings/panels/local-model/DeviceCapabilitySection.tsx +++ b/app/src/components/settings/panels/local-model/DeviceCapabilitySection.tsx @@ -176,10 +176,10 @@ const DeviceCapabilitySection = ({ ) : ( <>
- Run Ollama first. Local - tiers depend on an externally managed Ollama endpoint. Start it yourself, pull the - models you want, and keep using “Disabled (cloud fallback)” until the - runtime is reachable. + Run Ollama first. Local tiers + depend on an externally managed Ollama endpoint. Start it yourself, pull the models + you want, and keep using “Disabled (cloud fallback)” until the runtime + is reachable.
{ path: 'ollama://gemma3:1b-it-qat', warning: null, }, - vision: { - id: '', - provider: 'ollama', - state: 'disabled', - path: null, - warning: null, - }, + vision: { id: '', provider: 'ollama', state: 'disabled', path: null, warning: null }, embedding: { id: 'bge-m3', provider: 'ollama', @@ -93,20 +87,8 @@ describe('ModelDownloadSection runtime gate', () => { path: 'ollama://bge-m3', warning: null, }, - stt: { - id: 'whisper', - provider: 'whisper', - state: 'ondemand', - path: null, - warning: null, - }, - tts: { - id: 'piper', - provider: 'piper', - state: 'ondemand', - path: null, - warning: null, - }, + stt: { id: 'whisper', provider: 'whisper', state: 'ondemand', path: null, warning: null }, + tts: { id: 'piper', provider: 'piper', state: 'ondemand', path: null, warning: null }, ollama_available: true, }} /> diff --git a/app/src/components/settings/panels/local-model/ModelStatusSection.test.tsx b/app/src/components/settings/panels/local-model/ModelStatusSection.test.tsx index c511b8a77a..03bfcffbda 100644 --- a/app/src/components/settings/panels/local-model/ModelStatusSection.test.tsx +++ b/app/src/components/settings/panels/local-model/ModelStatusSection.test.tsx @@ -126,10 +126,7 @@ describe('ModelStatusSection diagnostics', () => { render( ); expect( diff --git a/app/src/components/settings/panels/local-model/ModelStatusSection.tsx b/app/src/components/settings/panels/local-model/ModelStatusSection.tsx index 9854635a3d..8c2c93b002 100644 --- a/app/src/components/settings/panels/local-model/ModelStatusSection.tsx +++ b/app/src/components/settings/panels/local-model/ModelStatusSection.tsx @@ -387,8 +387,7 @@ const ModelStatusSection = ({ )}
- Manage the Ollama process and model pulls outside OpenHuman, then rerun - diagnostics. + Manage the Ollama process and model pulls outside OpenHuman, then rerun diagnostics.
)} diff --git a/app/src/services/api/__tests__/aiSettingsApi.test.ts b/app/src/services/api/__tests__/aiSettingsApi.test.ts index be7f565e69..65850f447e 100644 --- a/app/src/services/api/__tests__/aiSettingsApi.test.ts +++ b/app/src/services/api/__tests__/aiSettingsApi.test.ts @@ -12,15 +12,15 @@ import { type AISettings, clearCloudProviderKey, listProviderModels, + loadAISettings, loadLocalProviderSnapshot, localProvider, - loadAISettings, parseProviderString, type ProviderRef, saveAISettings, serializeProviderRef, - setLocalRuntimeEnabled, setCloudProviderKey, + setLocalRuntimeEnabled, } from '../aiSettingsApi'; // ─── Mock declarations (must be hoisted before imports) ─────────────────────── diff --git a/src/openhuman/local_ai/service/public_infer_tests.rs b/src/openhuman/local_ai/service/public_infer_tests.rs index da6f77f188..44b62cdc90 100644 --- a/src/openhuman/local_ai/service/public_infer_tests.rs +++ b/src/openhuman/local_ai/service/public_infer_tests.rs @@ -112,7 +112,10 @@ async fn inference_connection_failure_mentions_external_ollama_runtime() { std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL"); } - assert!(err.contains("external Ollama endpoint"), "unexpected error: {err}"); + assert!( + err.contains("external Ollama endpoint"), + "unexpected error: {err}" + ); assert!(err.contains("already running"), "unexpected error: {err}"); } From fe06af54faa5383857a5d538bd53f9e5358c7fcc Mon Sep 17 00:00:00 2001 From: Steven Enamakel Date: Sat, 16 May 2026 17:34:04 -0700 Subject: [PATCH 07/18] fix: address inference review follow-ups --- src/openhuman/inference/ops.rs | 111 ++++++++++++++++++++++++--- src/openhuman/inference/ops_tests.rs | 22 +++--- src/openhuman/local_ai/ops.rs | 5 +- 3 files changed, 113 insertions(+), 25 deletions(-) diff --git a/src/openhuman/inference/ops.rs b/src/openhuman/inference/ops.rs index a0f2d76688..d65c04f95e 100644 --- a/src/openhuman/inference/ops.rs +++ b/src/openhuman/inference/ops.rs @@ -7,9 +7,18 @@ use crate::openhuman::local_ai::ops::{LocalAiChatMessage, ReactionDecision}; use crate::openhuman::local_ai::sentiment::SentimentResult; use crate::openhuman::local_ai::{LocalAiEmbeddingResult, LocalAiStatus, TenorSearchResult}; use crate::rpc::RpcOutcome; +use tracing::{debug, error}; + +const LOG_PREFIX: &str = "[inference::ops]"; pub async fn inference_status(config: &Config) -> Result, String> { - local_ai::rpc::local_ai_status(config).await + debug!("{LOG_PREFIX} status:start"); + let result = local_ai::rpc::local_ai_status(config).await; + match &result { + Ok(outcome) => debug!(state = %outcome.value.state, "{LOG_PREFIX} status:ok"), + Err(err) => error!(error = %err, "{LOG_PREFIX} status:error"), + } + result } pub async fn inference_summarize( @@ -17,7 +26,13 @@ pub async fn inference_summarize( text: &str, max_tokens: Option, ) -> Result, String> { - local_ai::rpc::local_ai_summarize(config, text, max_tokens).await + debug!(text_len = text.len(), ?max_tokens, "{LOG_PREFIX} summarize:start"); + let result = local_ai::rpc::local_ai_summarize(config, text, max_tokens).await; + match &result { + Ok(outcome) => debug!(output_len = outcome.value.len(), "{LOG_PREFIX} summarize:ok"), + Err(err) => error!(error = %err, "{LOG_PREFIX} summarize:error"), + } + result } pub async fn inference_prompt( @@ -26,7 +41,18 @@ pub async fn inference_prompt( max_tokens: Option, no_think: Option, ) -> Result, String> { - local_ai::rpc::local_ai_prompt(config, prompt, max_tokens, no_think).await + debug!( + prompt_len = prompt.len(), + ?max_tokens, + ?no_think, + "{LOG_PREFIX} prompt:start" + ); + let result = local_ai::rpc::local_ai_prompt(config, prompt, max_tokens, no_think).await; + match &result { + Ok(outcome) => debug!(output_len = outcome.value.len(), "{LOG_PREFIX} prompt:ok"), + Err(err) => error!(error = %err, "{LOG_PREFIX} prompt:error"), + } + result } pub async fn inference_vision_prompt( @@ -35,14 +61,35 @@ pub async fn inference_vision_prompt( image_refs: &[String], max_tokens: Option, ) -> Result, String> { - local_ai::rpc::local_ai_vision_prompt(config, prompt, image_refs, max_tokens).await + debug!( + prompt_len = prompt.len(), + image_count = image_refs.len(), + ?max_tokens, + "{LOG_PREFIX} vision_prompt:start" + ); + let result = local_ai::rpc::local_ai_vision_prompt(config, prompt, image_refs, max_tokens).await; + match &result { + Ok(outcome) => debug!(output_len = outcome.value.len(), "{LOG_PREFIX} vision_prompt:ok"), + Err(err) => error!(error = %err, "{LOG_PREFIX} vision_prompt:error"), + } + result } pub async fn inference_embed( config: &Config, inputs: &[String], ) -> Result, String> { - local_ai::rpc::local_ai_embed(config, inputs).await + debug!(input_count = inputs.len(), "{LOG_PREFIX} embed:start"); + let result = local_ai::rpc::local_ai_embed(config, inputs).await; + match &result { + Ok(outcome) => debug!( + vector_count = outcome.value.vectors.len(), + dimensions = outcome.value.dimensions, + "{LOG_PREFIX} embed:ok" + ), + Err(err) => error!(error = %err, "{LOG_PREFIX} embed:error"), + } + result } pub async fn inference_chat( @@ -50,7 +97,13 @@ pub async fn inference_chat( messages: Vec, max_tokens: Option, ) -> Result, String> { - local_ai::rpc::local_ai_chat(config, messages, max_tokens).await + debug!(message_count = messages.len(), ?max_tokens, "{LOG_PREFIX} chat:start"); + let result = local_ai::rpc::local_ai_chat(config, messages, max_tokens).await; + match &result { + Ok(outcome) => debug!(output_len = outcome.value.len(), "{LOG_PREFIX} chat:ok"), + Err(err) => error!(error = %err, "{LOG_PREFIX} chat:error"), + } + result } pub async fn inference_should_react( @@ -58,14 +111,33 @@ pub async fn inference_should_react( message: &str, channel_type: &str, ) -> Result, String> { - local_ai::rpc::local_ai_should_react(config, message, channel_type).await + debug!( + message_len = message.len(), + channel_type, + "{LOG_PREFIX} should_react:start" + ); + let result = local_ai::rpc::local_ai_should_react(config, message, channel_type).await; + match &result { + Ok(outcome) => debug!( + should_react = outcome.value.should_react, + "{LOG_PREFIX} should_react:ok" + ), + Err(err) => error!(error = %err, "{LOG_PREFIX} should_react:error"), + } + result } pub async fn inference_analyze_sentiment( config: &Config, message: &str, ) -> Result, String> { - local_ai::sentiment::local_ai_analyze_sentiment(config, message).await + debug!(message_len = message.len(), "{LOG_PREFIX} analyze_sentiment:start"); + let result = local_ai::sentiment::local_ai_analyze_sentiment(config, message).await; + match &result { + Ok(outcome) => debug!(valence = %outcome.value.valence, "{LOG_PREFIX} analyze_sentiment:ok"), + Err(err) => error!(error = %err, "{LOG_PREFIX} analyze_sentiment:error"), + } + result } pub async fn inference_should_send_gif( @@ -73,7 +145,20 @@ pub async fn inference_should_send_gif( message: &str, channel_type: &str, ) -> Result, String> { - local_ai::gif_decision::local_ai_should_send_gif(config, message, channel_type).await + debug!( + message_len = message.len(), + channel_type, + "{LOG_PREFIX} should_send_gif:start" + ); + let result = local_ai::gif_decision::local_ai_should_send_gif(config, message, channel_type).await; + match &result { + Ok(outcome) => debug!( + should_send_gif = outcome.value.should_send_gif, + "{LOG_PREFIX} should_send_gif:ok" + ), + Err(err) => error!(error = %err, "{LOG_PREFIX} should_send_gif:error"), + } + result } pub async fn inference_tenor_search( @@ -81,7 +166,13 @@ pub async fn inference_tenor_search( query: &str, limit: Option, ) -> Result, String> { - local_ai::gif_decision::tenor_search(config, query, limit).await + debug!(query_len = query.len(), ?limit, "{LOG_PREFIX} tenor_search:start"); + let result = local_ai::gif_decision::tenor_search(config, query, limit).await; + match &result { + Ok(outcome) => debug!(result_count = outcome.value.results.len(), "{LOG_PREFIX} tenor_search:ok"), + Err(err) => error!(error = %err, "{LOG_PREFIX} tenor_search:error"), + } + result } #[cfg(test)] diff --git a/src/openhuman/inference/ops_tests.rs b/src/openhuman/inference/ops_tests.rs index 40870db77b..e95fcfbed0 100644 --- a/src/openhuman/inference/ops_tests.rs +++ b/src/openhuman/inference/ops_tests.rs @@ -1,19 +1,19 @@ use super::*; use tempfile::tempdir; -fn disabled_config() -> Config { +fn disabled_config() -> (Config, tempfile::TempDir) { let tmp = tempdir().expect("tempdir"); let mut config = Config::default(); config.workspace_dir = tmp.path().join("workspace"); config.config_path = tmp.path().join("config.toml"); config.local_ai.runtime_enabled = false; config.local_ai.opt_in_confirmed = false; - config + (config, tmp) } #[tokio::test] async fn inference_status_reports_disabled_state_when_runtime_disabled() { - let config = disabled_config(); + let (config, _tmp) = disabled_config(); let outcome = inference_status(&config).await.expect("status"); assert!( matches!(outcome.value.state.as_str(), "idle" | "disabled"), @@ -24,7 +24,7 @@ async fn inference_status_reports_disabled_state_when_runtime_disabled() { #[tokio::test] async fn inference_prompt_reuses_local_ai_disabled_error() { - let config = disabled_config(); + let (config, _tmp) = disabled_config(); let err = inference_prompt(&config, "hello", None, Some(true)) .await .expect_err("prompt should fail"); @@ -33,7 +33,7 @@ async fn inference_prompt_reuses_local_ai_disabled_error() { #[tokio::test] async fn inference_summarize_reuses_local_ai_disabled_error() { - let config = disabled_config(); + let (config, _tmp) = disabled_config(); let err = inference_summarize(&config, "hello", None) .await .expect_err("summarize should fail"); @@ -42,7 +42,7 @@ async fn inference_summarize_reuses_local_ai_disabled_error() { #[tokio::test] async fn inference_embed_reuses_local_ai_disabled_error() { - let config = disabled_config(); + let (config, _tmp) = disabled_config(); let err = inference_embed(&config, &["hello".to_string()]) .await .expect_err("embed should fail"); @@ -51,7 +51,7 @@ async fn inference_embed_reuses_local_ai_disabled_error() { #[tokio::test] async fn inference_chat_rejects_empty_messages() { - let config = disabled_config(); + let (config, _tmp) = disabled_config(); let err = inference_chat(&config, vec![], None) .await .expect_err("chat should fail"); @@ -60,7 +60,7 @@ async fn inference_chat_rejects_empty_messages() { #[tokio::test] async fn inference_should_react_short_circuits_for_empty_message() { - let config = disabled_config(); + let (config, _tmp) = disabled_config(); let outcome = inference_should_react(&config, " ", "web") .await .expect("reaction decision"); @@ -70,7 +70,7 @@ async fn inference_should_react_short_circuits_for_empty_message() { #[tokio::test] async fn inference_analyze_sentiment_handles_empty_message() { - let config = disabled_config(); + let (config, _tmp) = disabled_config(); let outcome = inference_analyze_sentiment(&config, " ") .await .expect("sentiment"); @@ -79,7 +79,7 @@ async fn inference_analyze_sentiment_handles_empty_message() { #[tokio::test] async fn inference_should_send_gif_short_circuits_for_empty_message() { - let config = disabled_config(); + let (config, _tmp) = disabled_config(); let outcome = inference_should_send_gif(&config, " ", "web") .await .expect("gif decision"); @@ -88,7 +88,7 @@ async fn inference_should_send_gif_short_circuits_for_empty_message() { #[tokio::test] async fn inference_tenor_search_requires_query() { - let config = disabled_config(); + let (config, _tmp) = disabled_config(); let err = inference_tenor_search(&config, " ", Some(3)) .await .expect_err("query validation should fail"); diff --git a/src/openhuman/local_ai/ops.rs b/src/openhuman/local_ai/ops.rs index 24de41a222..d1da46db44 100644 --- a/src/openhuman/local_ai/ops.rs +++ b/src/openhuman/local_ai/ops.rs @@ -348,10 +348,7 @@ pub async fn local_ai_download_asset( .download_asset(config, capability.trim()) .await .map_err(|e| e.to_string())?; - Ok(RpcOutcome::single_log( - output, - "local ai voice asset download triggered", - )) + Ok(RpcOutcome::single_log(output, "local ai asset download triggered")) } /// A single message in a local AI chat conversation. From febf2fa35336a69a277764b99efbf62accadeea4 Mon Sep 17 00:00:00 2001 From: Steven Enamakel Date: Sat, 16 May 2026 17:35:08 -0700 Subject: [PATCH 08/18] chore: apply rustfmt review follow-ups --- src/openhuman/inference/ops.rs | 54 +++++++++++++++++++++++++--------- src/openhuman/local_ai/ops.rs | 5 +++- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/openhuman/inference/ops.rs b/src/openhuman/inference/ops.rs index d65c04f95e..a04052a21f 100644 --- a/src/openhuman/inference/ops.rs +++ b/src/openhuman/inference/ops.rs @@ -26,10 +26,17 @@ pub async fn inference_summarize( text: &str, max_tokens: Option, ) -> Result, String> { - debug!(text_len = text.len(), ?max_tokens, "{LOG_PREFIX} summarize:start"); + debug!( + text_len = text.len(), + ?max_tokens, + "{LOG_PREFIX} summarize:start" + ); let result = local_ai::rpc::local_ai_summarize(config, text, max_tokens).await; match &result { - Ok(outcome) => debug!(output_len = outcome.value.len(), "{LOG_PREFIX} summarize:ok"), + Ok(outcome) => debug!( + output_len = outcome.value.len(), + "{LOG_PREFIX} summarize:ok" + ), Err(err) => error!(error = %err, "{LOG_PREFIX} summarize:error"), } result @@ -67,9 +74,13 @@ pub async fn inference_vision_prompt( ?max_tokens, "{LOG_PREFIX} vision_prompt:start" ); - let result = local_ai::rpc::local_ai_vision_prompt(config, prompt, image_refs, max_tokens).await; + let result = + local_ai::rpc::local_ai_vision_prompt(config, prompt, image_refs, max_tokens).await; match &result { - Ok(outcome) => debug!(output_len = outcome.value.len(), "{LOG_PREFIX} vision_prompt:ok"), + Ok(outcome) => debug!( + output_len = outcome.value.len(), + "{LOG_PREFIX} vision_prompt:ok" + ), Err(err) => error!(error = %err, "{LOG_PREFIX} vision_prompt:error"), } result @@ -97,7 +108,11 @@ pub async fn inference_chat( messages: Vec, max_tokens: Option, ) -> Result, String> { - debug!(message_count = messages.len(), ?max_tokens, "{LOG_PREFIX} chat:start"); + debug!( + message_count = messages.len(), + ?max_tokens, + "{LOG_PREFIX} chat:start" + ); let result = local_ai::rpc::local_ai_chat(config, messages, max_tokens).await; match &result { Ok(outcome) => debug!(output_len = outcome.value.len(), "{LOG_PREFIX} chat:ok"), @@ -113,8 +128,7 @@ pub async fn inference_should_react( ) -> Result, String> { debug!( message_len = message.len(), - channel_type, - "{LOG_PREFIX} should_react:start" + channel_type, "{LOG_PREFIX} should_react:start" ); let result = local_ai::rpc::local_ai_should_react(config, message, channel_type).await; match &result { @@ -131,10 +145,15 @@ pub async fn inference_analyze_sentiment( config: &Config, message: &str, ) -> Result, String> { - debug!(message_len = message.len(), "{LOG_PREFIX} analyze_sentiment:start"); + debug!( + message_len = message.len(), + "{LOG_PREFIX} analyze_sentiment:start" + ); let result = local_ai::sentiment::local_ai_analyze_sentiment(config, message).await; match &result { - Ok(outcome) => debug!(valence = %outcome.value.valence, "{LOG_PREFIX} analyze_sentiment:ok"), + Ok(outcome) => { + debug!(valence = %outcome.value.valence, "{LOG_PREFIX} analyze_sentiment:ok") + } Err(err) => error!(error = %err, "{LOG_PREFIX} analyze_sentiment:error"), } result @@ -147,10 +166,10 @@ pub async fn inference_should_send_gif( ) -> Result, String> { debug!( message_len = message.len(), - channel_type, - "{LOG_PREFIX} should_send_gif:start" + channel_type, "{LOG_PREFIX} should_send_gif:start" ); - let result = local_ai::gif_decision::local_ai_should_send_gif(config, message, channel_type).await; + let result = + local_ai::gif_decision::local_ai_should_send_gif(config, message, channel_type).await; match &result { Ok(outcome) => debug!( should_send_gif = outcome.value.should_send_gif, @@ -166,10 +185,17 @@ pub async fn inference_tenor_search( query: &str, limit: Option, ) -> Result, String> { - debug!(query_len = query.len(), ?limit, "{LOG_PREFIX} tenor_search:start"); + debug!( + query_len = query.len(), + ?limit, + "{LOG_PREFIX} tenor_search:start" + ); let result = local_ai::gif_decision::tenor_search(config, query, limit).await; match &result { - Ok(outcome) => debug!(result_count = outcome.value.results.len(), "{LOG_PREFIX} tenor_search:ok"), + Ok(outcome) => debug!( + result_count = outcome.value.results.len(), + "{LOG_PREFIX} tenor_search:ok" + ), Err(err) => error!(error = %err, "{LOG_PREFIX} tenor_search:error"), } result diff --git a/src/openhuman/local_ai/ops.rs b/src/openhuman/local_ai/ops.rs index d1da46db44..5a453b8031 100644 --- a/src/openhuman/local_ai/ops.rs +++ b/src/openhuman/local_ai/ops.rs @@ -348,7 +348,10 @@ pub async fn local_ai_download_asset( .download_asset(config, capability.trim()) .await .map_err(|e| e.to_string())?; - Ok(RpcOutcome::single_log(output, "local ai asset download triggered")) + Ok(RpcOutcome::single_log( + output, + "local ai asset download triggered", + )) } /// A single message in a local AI chat conversation. From 97facae5927524ddb29d819ab3f39597a3d5e378 Mon Sep 17 00:00:00 2001 From: Steven Enamakel Date: Sat, 16 May 2026 17:58:22 -0700 Subject: [PATCH 09/18] Remove unused Tenor backend search helper --- src/api/rest.rs | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/src/api/rest.rs b/src/api/rest.rs index d068041540..81bfbb3a92 100644 --- a/src/api/rest.rs +++ b/src/api/rest.rs @@ -819,28 +819,6 @@ impl BackendOAuthClient { .await } - /// Searches for GIFs using the Tenor integration. - pub async fn search_tenor_gifs( - &self, - bearer_jwt: &str, - query: &str, - limit: Option, - ) -> Result { - anyhow::ensure!(!query.trim().is_empty(), "query is required"); - let body = serde_json::json!({ - "query": query.trim(), - "limit": limit.unwrap_or(5), - "contentFilter": "medium", - }); - self.authed_json( - bearer_jwt, - Method::POST, - "agent-integrations/tenor/search", - Some(body), - ) - .await - } - /// Creates a new thread in a communication channel. pub async fn create_channel_thread( &self, From 9c89f067a3c4c0d80df396cdf2ca2f4bda8be910 Mon Sep 17 00:00:00 2001 From: Steven Enamakel Date: Sat, 16 May 2026 18:07:22 -0700 Subject: [PATCH 10/18] chore: apply module ordering format --- src/openhuman/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openhuman/mod.rs b/src/openhuman/mod.rs index b6d6737a0f..6194c6ae7e 100644 --- a/src/openhuman/mod.rs +++ b/src/openhuman/mod.rs @@ -36,8 +36,8 @@ pub mod embeddings; pub mod encryption; pub mod health; pub mod heartbeat; -pub mod inference; pub mod http_host; +pub mod inference; pub mod integrations; pub mod javascript; pub mod learning; From 8464d55ab984607cddaf2491ef2d697abd0caa12 Mon Sep 17 00:00:00 2001 From: Steven Enamakel Date: Sat, 16 May 2026 18:17:30 -0700 Subject: [PATCH 11/18] Move inference RPCs out of local_ai namespace --- .../local-model/ModelDownloadSection.tsx | 2 +- app/src/utils/__tests__/tauriCommands.test.ts | 2 +- src/core/observability.rs | 2 +- src/openhuman/inference/ops.rs | 45 +-- src/openhuman/inference/ops_tests.rs | 18 - src/openhuman/inference/schemas.rs | 222 ++++++----- src/openhuman/inference/schemas_tests.rs | 4 +- src/openhuman/local_ai/schemas.rs | 348 ------------------ tests/json_rpc_e2e.rs | 2 +- 9 files changed, 133 insertions(+), 512 deletions(-) diff --git a/app/src/components/settings/panels/local-model/ModelDownloadSection.tsx b/app/src/components/settings/panels/local-model/ModelDownloadSection.tsx index 5ac567c75f..ae45f1e067 100644 --- a/app/src/components/settings/panels/local-model/ModelDownloadSection.tsx +++ b/app/src/components/settings/panels/local-model/ModelDownloadSection.tsx @@ -155,7 +155,7 @@ const ModelDownloadSection = ({ />
- Calls `openhuman.local_ai_summarize` via Rust core + Calls `openhuman.inference_summarize` via Rust core