diff --git a/Cargo.lock b/Cargo.lock index 1a15232824..b12897f9ae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -257,6 +257,16 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9dbc3a507a82b17ba0d98f6ce8fd6954ea0c8152e98009d36a40d8dcc8ce078a" +[[package]] +name = "assert-json-diff" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "assign" version = "1.1.1" @@ -5062,6 +5072,7 @@ dependencies = [ "whatsapp-rust-tokio-transport", "whatsapp-rust-ureq-http-client", "whisper-rs", + "wiremock", "xz2", "zip", ] @@ -9471,6 +9482,29 @@ dependencies = [ "memchr", ] +[[package]] +name = "wiremock" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08db1edfb05d9b3c1542e521aea074442088292f00b5f28e435c714a98f85031" +dependencies = [ + "assert-json-diff", + "base64 0.22.1", + "deadpool", + "futures", + "http 1.4.0", + "http-body-util", + "hyper", + "hyper-util", + "log", + "once_cell", + "regex", + "serde", + "serde_json", + "tokio", + "url", +] + [[package]] name = "wit-bindgen" version = "0.51.0" diff --git a/Cargo.toml b/Cargo.toml index 6ca3331df9..031cf9d4bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -169,6 +169,8 @@ rppal = { version = "0.22", optional = true } # crates we never use (and that bloat the dev Cargo.lock noticeably). # TestTransport only needs the `test` feature. sentry = { version = "0.47.0", default-features = false, features = ["test"] } +# Mock HTTP server for provider E2E tests (inference_provider_e2e). +wiremock = "0.6" [features] sandbox-landlock = ["dep:landlock"] diff --git a/app/src/components/settings/panels/LocalModelDebugPanel.tsx b/app/src/components/settings/panels/LocalModelDebugPanel.tsx index 638e247040..25a8efcc8d 100644 --- a/app/src/components/settings/panels/LocalModelDebugPanel.tsx +++ b/app/src/components/settings/panels/LocalModelDebugPanel.tsx @@ -17,13 +17,10 @@ import { type LocalAiTtsResult, openhumanLocalAiAssetsStatus, openhumanLocalAiDiagnostics, - openhumanLocalAiDownload, - openhumanLocalAiDownloadAllAssets, openhumanLocalAiDownloadAsset, openhumanLocalAiDownloadsProgress, openhumanLocalAiEmbed, openhumanLocalAiPrompt, - openhumanLocalAiSetOllamaPath, openhumanLocalAiStatus, openhumanLocalAiSummarize, openhumanLocalAiTranscribe, @@ -60,8 +57,6 @@ const LocalModelDebugPanel = () => { const [assets, setAssets] = useState(null); const [downloads, setDownloads] = useState(null); const [statusError, setStatusError] = useState(''); - const [isTriggeringDownload, setIsTriggeringDownload] = useState(false); - const [bootstrapMessage, setBootstrapMessage] = useState(''); const [assetDownloadBusy, setAssetDownloadBusy] = useState>({}); const [summaryInput, setSummaryInput] = useState(''); @@ -97,8 +92,6 @@ const LocalModelDebugPanel = () => { const [diagnosticsError, setDiagnosticsError] = useState(''); const [showErrorDetail, setShowErrorDetail] = useState(false); - const [ollamaPathInput, setOllamaPathInput] = useState(''); - const [isSettingPath, setIsSettingPath] = useState(false); const progress = useMemo(() => { const downloadProgress = progressFromDownloads(downloads); @@ -158,29 +151,6 @@ const LocalModelDebugPanel = () => { }; }, []); - const triggerDownload = async (force: boolean) => { - if (!runtimeEnabled) return; - setIsTriggeringDownload(true); - setStatusError(''); - setBootstrapMessage(''); - try { - await openhumanLocalAiDownload(force); - await openhumanLocalAiDownloadAllAssets(force); - const freshStatus = await openhumanLocalAiStatus(); - setStatus(freshStatus.result); - if (freshStatus.result?.state === 'ready') { - setBootstrapMessage(force ? 'Re-bootstrap complete' : 'Models verified'); - } - setTimeout(() => setBootstrapMessage(''), 3000); - } catch (err) { - const message = - err instanceof Error ? err.message : 'Failed to trigger local model bootstrap'; - setStatusError(message); - } finally { - setIsTriggeringDownload(false); - } - }; - const runSummaryTest = async () => { if (!runtimeEnabled || !summaryInput.trim()) return; setIsSummaryLoading(true); @@ -305,32 +275,6 @@ const LocalModelDebugPanel = () => { } }; - const handleSetOllamaPath = async () => { - setIsSettingPath(true); - setStatusError(''); - try { - await openhumanLocalAiSetOllamaPath(ollamaPathInput); - await loadStatus(); - } catch (err) { - setStatusError(err instanceof Error ? err.message : 'Failed to set Ollama path'); - } finally { - setIsSettingPath(false); - } - }; - - const handleClearOllamaPath = async () => { - setOllamaPathInput(''); - setIsSettingPath(true); - try { - await openhumanLocalAiSetOllamaPath(''); - await loadStatus(); - } catch (err) { - setStatusError(err instanceof Error ? err.message : 'Failed to clear Ollama path'); - } finally { - setIsSettingPath(false); - } - }; - const handleRunDiagnostics = async () => { setIsDiagnosticsLoading(true); setDiagnosticsError(''); @@ -361,25 +305,25 @@ const LocalModelDebugPanel = () => { isDiagnosticsLoading={isDiagnosticsLoading} diagnosticsError={diagnosticsError} statusError={statusError} - isTriggeringDownload={isTriggeringDownload} - bootstrapMessage={bootstrapMessage} + isTriggeringDownload={false} + bootstrapMessage="" progress={progress} isIndeterminateDownload={isIndeterminateDownload} isInstalling={isInstalling} isInstallError={isInstallError} showErrorDetail={showErrorDetail} - ollamaPathInput={ollamaPathInput} - isSettingPath={isSettingPath} + ollamaPathInput="" + isSettingPath={false} downloadedText={downloadedText} speedText={speedText} etaText={etaText} statusTone={statusTone} runtimeEnabled={runtimeEnabled} onRefreshStatus={() => void loadStatus()} - onTriggerDownload={force => void triggerDownload(force)} - onSetOllamaPath={() => void handleSetOllamaPath()} - onClearOllamaPath={() => void handleClearOllamaPath()} - onSetOllamaPathInput={setOllamaPathInput} + onTriggerDownload={() => {}} + onSetOllamaPath={() => {}} + onClearOllamaPath={() => {}} + onSetOllamaPathInput={() => {}} onToggleErrorDetail={() => setShowErrorDetail(v => !v)} onRunDiagnostics={() => void handleRunDiagnostics()} /> diff --git a/app/src/components/settings/panels/local-model/DeviceCapabilitySection.test.tsx b/app/src/components/settings/panels/local-model/DeviceCapabilitySection.test.tsx new file mode 100644 index 0000000000..f9434c5309 --- /dev/null +++ b/app/src/components/settings/panels/local-model/DeviceCapabilitySection.test.tsx @@ -0,0 +1,87 @@ +import { fireEvent, render, screen, waitFor } from '@testing-library/react'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +import DeviceCapabilitySection from './DeviceCapabilitySection'; + +const mockApplyPreset = vi.fn(); + +vi.mock('../../../../utils/tauriCommands', () => ({ + openhumanLocalAiApplyPreset: (...args: unknown[]) => mockApplyPreset(...args), +})); + +const makePresetsData = (overrides: Record = {}) => ({ + presets: [ + { + tier: 'ram_2_4gb', + label: '2-4 GB', + description: 'Small local tier', + chat_model_id: 'gemma3:1b-it-qat', + vision_model_id: '', + embedding_model_id: 'bge-m3', + quantization: 'q4', + vision_mode: 'disabled', + supports_screen_summary: false, + target_ram_gb: 4, + min_ram_gb: 2, + approx_download_gb: 1.2, + }, + ], + recommended_tier: 'ram_2_4gb', + current_tier: 'ram_2_4gb', + selected_tier: 'ram_2_4gb', + recommend_disabled: false, + local_ai_enabled: true, + device: { + total_ram_bytes: 16 * 1024 * 1024 * 1024, + cpu_count: 8, + cpu_brand: 'Test CPU', + os_name: 'macOS', + os_version: '15', + has_gpu: true, + gpu_description: 'Test GPU', + }, + ...overrides, +}); + +describe('DeviceCapabilitySection', () => { + beforeEach(() => { + mockApplyPreset.mockReset(); + }); + + it('renders external runtime guidance when ollama is unavailable', () => { + render( + '16 GB'} + ollamaAvailable={false} + /> + ); + + expect(screen.getByText(/Run Ollama first/i)).toBeTruthy(); + expect(screen.getByRole('link', { name: 'Ollama docs' })).toBeTruthy(); + expect(screen.getByTitle('Run Ollama first to use this tier')).toBeTruthy(); + }); + + it('allows selecting the disabled cloud fallback tier', async () => { + mockApplyPreset.mockResolvedValueOnce({ applied_tier: 'disabled' }); + + render( + '16 GB'} + /> + ); + + fireEvent.click(screen.getByRole('button', { name: /Disabled.*0 GB/i })); + + await waitFor(() => { + expect(mockApplyPreset).toHaveBeenCalledWith('disabled'); + }); + }); +}); diff --git a/app/src/components/settings/panels/local-model/DeviceCapabilitySection.tsx b/app/src/components/settings/panels/local-model/DeviceCapabilitySection.tsx index b910d2e2f9..de95ccb127 100644 --- a/app/src/components/settings/panels/local-model/DeviceCapabilitySection.tsx +++ b/app/src/components/settings/panels/local-model/DeviceCapabilitySection.tsx @@ -14,30 +14,15 @@ interface DeviceCapabilitySectionProps { formatRamGb: (bytes: number) => string; onPresetApplied?: (result: ApplyPresetResult) => void; /** - * When `false`, the Ollama runtime isn't installed yet. Local tiers - * require Ollama, so they're rendered disabled with a notice that - * lets the user install Ollama in place. The "Disabled (cloud - * fallback)" option stays enabled since it doesn't need Ollama. + * When `false`, the external Ollama runtime isn't reachable yet. Local tiers + * stay disabled until the user runs Ollama themselves. The "Disabled (cloud + * fallback)" option stays enabled since it doesn't depend on Ollama. */ ollamaAvailable?: boolean; - /** - * Triggers the same install pipeline the Runtime Status section uses. - * Wired only when `ollamaAvailable === false` to surface an inline - * Install Ollama button next to the locked tiers. - */ onTriggerOllamaInstall?: () => void; - /** True while an install pipeline is already running. */ isTriggeringInstall?: boolean; - /** - * Live state from `local_ai_status` so the notice can show real install - * progress: `installing`, `downloading`, `degraded`, etc. The button's - * own `isTriggeringInstall` only covers the RPC round-trip (~ms); - * `installState` covers the entire backend pipeline (~60s). - */ installState?: string; - /** Latest `status.warning` text — shown under the progress label. */ installWarning?: string | null; - /** Latest `status.error_detail` — shown when state is `degraded`. */ installError?: string | null; } @@ -57,9 +42,13 @@ const DeviceCapabilitySection = ({ installWarning, installError, }: DeviceCapabilitySectionProps) => { - const installInProgress = - installState === 'installing' || installState === 'downloading' || installState === 'loading'; - const installFailed = installState === 'degraded'; + void onTriggerOllamaInstall; + void isTriggeringInstall; + void installState; + void installWarning; + void installError; + const installInProgress = false; + const installFailed = false; const [applying, setApplying] = useState(null); const [applyError, setApplyError] = useState(''); const [applySuccess, setApplySuccess] = useState(null); @@ -187,26 +176,18 @@ const DeviceCapabilitySection = ({ ) : ( <>
- Install Ollama first. Local - tiers run on the Ollama runtime, which isn't installed yet. The “Disabled - (cloud fallback)” option stays available either way. + Run Ollama first. Local tiers + depend on an externally managed Ollama endpoint. Start it yourself, pull the models + you want, and keep using “Disabled (cloud fallback)” until the runtime + is reachable.
- {onTriggerOllamaInstall && ( - - )} - Install manually + Ollama docs
@@ -257,7 +238,7 @@ const DeviceCapabilitySection = ({ key={preset.tier} onClick={() => void handleApply(preset.tier)} disabled={applying !== null || locked} - title={locked ? 'Install Ollama first to use this tier' : undefined} + title={locked ? 'Run Ollama first to use this tier' : undefined} className={`w-full text-left rounded-lg border p-3 transition-colors ${ isCurrent ? 'border-primary-400 bg-primary-50' diff --git a/app/src/components/settings/panels/local-model/ModelDownloadSection.test.tsx b/app/src/components/settings/panels/local-model/ModelDownloadSection.test.tsx index 1e377919bf..a5156241b8 100644 --- a/app/src/components/settings/panels/local-model/ModelDownloadSection.test.tsx +++ b/app/src/components/settings/panels/local-model/ModelDownloadSection.test.tsx @@ -64,4 +64,39 @@ describe('ModelDownloadSection runtime gate', () => { expect(props.onRunSummaryTest).not.toHaveBeenCalled(); expect(props.onRunPromptTest).not.toHaveBeenCalled(); }); + + it('shows external-runtime guidance for ollama-backed assets', () => { + render( + + ); + + expect( + screen.getAllByText('Manage this model in your external runtime.').length + ).toBeGreaterThan(0); + expect(screen.getAllByRole('button', { name: 'Download' }).length).toBeGreaterThan(0); + }); }); diff --git a/app/src/components/settings/panels/local-model/ModelDownloadSection.tsx b/app/src/components/settings/panels/local-model/ModelDownloadSection.tsx index 994f2fe42d..ae45f1e067 100644 --- a/app/src/components/settings/panels/local-model/ModelDownloadSection.tsx +++ b/app/src/components/settings/panels/local-model/ModelDownloadSection.tsx @@ -126,12 +126,18 @@ const ModelDownloadSection = ({ {item?.path && (
{item.path}
)} - + {item?.provider === 'ollama' || item?.provider === 'lm_studio' ? ( +
+ Manage this model in your external runtime. +
+ ) : ( + + )} ))} @@ -149,7 +155,7 @@ const ModelDownloadSection = ({ />
- Calls `openhuman.local_ai_summarize` via Rust core + Calls `openhuman.inference_summarize` via Rust core
- Install manually + Ollama docs
- - {isInstallError && status?.error_detail && ( -
- - {showErrorDetail && ( -
-                {status.error_detail}
-              
- )} -
- )} - -
-
- Already installed in a custom location? -
-
- Point us at the binary and we'll use it instead of running the installer. -
-
- onSetOllamaPathInput(e.target.value)} - placeholder="C:\Users\you\AppData\Local\Programs\Ollama\ollama.exe" - className="flex-1 rounded-md border border-amber-300 bg-white px-2 py-1.5 text-xs text-stone-900 placeholder:text-stone-400 focus:border-amber-500 focus:outline-none" - /> - - {ollamaPathInput && ( - - )} -
-
); } @@ -272,7 +210,7 @@ const ModelStatusSection = ({ {status?.warning &&
{status.warning}
} {statusError &&
{statusError}
} - {isInstallError && status?.error_detail && ( + {status?.error_detail && (
)} - -
-
- Ollama Binary Path (optional) -
-
- onSetOllamaPathInput(e.target.value)} - placeholder="/usr/local/bin/ollama" - className="flex-1 rounded-md border border-stone-200 bg-white px-2 py-1.5 text-xs text-stone-900 placeholder:text-stone-400 focus:border-primary-500 focus:outline-none" - /> - - {ollamaPathInput && ( - - )} -
-
- -
- {status?.state === 'ready' ? ( - - - - - Running - - ) : ( - - )} - - {bootstrapMessage && {bootstrapMessage}} -
@@ -376,14 +251,14 @@ const ModelStatusSection = ({
{!diagnostics && !diagnosticsError && (

- Click “Run Diagnostics” to verify Ollama is running and models are - installed. + Click “Run Diagnostics” to verify your external Ollama endpoint is + reachable and has the expected models.

)} {isDiagnosticsLoading && (
- Checking Ollama server and models... + Checking Ollama endpoint and models...
)} {diagnosticsError && ( @@ -511,23 +386,9 @@ const ModelStatusSection = ({
)} - {diagnostics.repair_actions && diagnostics.repair_actions.length > 0 && ( -
-
- Suggested Fixes -
-
- {diagnostics.repair_actions.map((action, i) => ( - - ))} -
-
- )} +
+ Manage the Ollama process and model pulls outside OpenHuman, then rerun diagnostics. +
)}
diff --git a/app/src/pages/onboarding/steps/LocalAIStep.tsx b/app/src/pages/onboarding/steps/LocalAIStep.tsx index 3972ede2e0..8f324ad9cd 100644 --- a/app/src/pages/onboarding/steps/LocalAIStep.tsx +++ b/app/src/pages/onboarding/steps/LocalAIStep.tsx @@ -167,7 +167,7 @@ const LocalAIStep = ({ onNext, onBack: _onBack, onDownloadError }: LocalAIStepPr type="button" onClick={handleConsent} className="mt-3 w-full text-center text-xs text-stone-400 hover:text-stone-600 transition-colors"> - Use local AI instead (install Ollama now) + Use local AI instead (connect Ollama now) ); diff --git a/app/src/services/__tests__/coreRpcClient.test.ts b/app/src/services/__tests__/coreRpcClient.test.ts index 84667bddd8..33faf8aee2 100644 --- a/app/src/services/__tests__/coreRpcClient.test.ts +++ b/app/src/services/__tests__/coreRpcClient.test.ts @@ -176,7 +176,7 @@ describe('coreRpcClient', () => { ['openhuman.set_browser_allow_all', 'openhuman.config_set_browser_allow_all'], ['openhuman.update_browser_settings', 'openhuman.config_update_browser_settings'], ['openhuman.update_memory_settings', 'openhuman.config_update_memory_settings'], - ['openhuman.update_model_settings', 'openhuman.config_update_model_settings'], + ['openhuman.update_model_settings', 'openhuman.inference_update_model_settings'], ['openhuman.update_runtime_settings', 'openhuman.config_update_runtime_settings'], [ 'openhuman.update_screen_intelligence_settings', diff --git a/app/src/services/__tests__/rpcMethods.test.ts b/app/src/services/__tests__/rpcMethods.test.ts index 502e886832..581cf58554 100644 --- a/app/src/services/__tests__/rpcMethods.test.ts +++ b/app/src/services/__tests__/rpcMethods.test.ts @@ -46,7 +46,7 @@ describe('rpcMethods catalog', () => { test('legacy aliases point at canonical method values', () => { expect(LEGACY_METHOD_ALIASES['openhuman.update_model_settings']).toBe( - CORE_RPC_METHODS.configUpdateModelSettings + CORE_RPC_METHODS.inferenceUpdateModelSettings ); expect(LEGACY_METHOD_ALIASES['openhuman.workspace_onboarding_flag_set']).toBe( CORE_RPC_METHODS.configWorkspaceOnboardingFlagSet @@ -64,7 +64,11 @@ describe('rpcMethods catalog', () => { 'utf8' ), fs.readFileSync( - path.resolve(__dirname, '../../../../src/openhuman/providers/schemas.rs'), + path.resolve(__dirname, '../../../../src/openhuman/inference/provider/schemas.rs'), + 'utf8' + ), + fs.readFileSync( + path.resolve(__dirname, '../../../../src/openhuman/inference/schemas.rs'), 'utf8' ), ].join('\n'); @@ -75,9 +79,11 @@ describe('rpcMethods catalog', () => { const methodRoot = method.slice('openhuman.'.length); const namespace = methodRoot.startsWith('screen_intelligence_') ? 'screen_intelligence' - : methodRoot.startsWith('providers_') - ? 'providers' - : 'config'; + : methodRoot.startsWith('inference_') + ? 'inference' + : methodRoot.startsWith('providers_') + ? 'providers' + : 'config'; const fnName = methodRoot.slice(`${namespace}_`.length); expect(schemaSources).toContain(`namespace: "${namespace}"`); expect(schemaSources).toContain(`function: "${fnName}"`); diff --git a/app/src/services/api/__tests__/aiSettingsApi.test.ts b/app/src/services/api/__tests__/aiSettingsApi.test.ts index a9d9ed5ca9..673e27a2b3 100644 --- a/app/src/services/api/__tests__/aiSettingsApi.test.ts +++ b/app/src/services/api/__tests__/aiSettingsApi.test.ts @@ -13,11 +13,14 @@ import { clearCloudProviderKey, listProviderModels, loadAISettings, + loadLocalProviderSnapshot, + localProvider, parseProviderString, type ProviderRef, saveAISettings, serializeProviderRef, setCloudProviderKey, + setLocalRuntimeEnabled, } from '../aiSettingsApi'; // ─── Mock declarations (must be hoisted before imports) ─────────────────────── @@ -25,10 +28,15 @@ import { const mockOpenhumanGetClientConfig = vi.fn(); const mockAuthListProviderCredentials = vi.fn(); const mockOpenhumanUpdateModelSettings = vi.fn(); +const mockOpenhumanUpdateLocalAiSettings = vi.fn(); const mockAuthStoreProviderCredentials = vi.fn(); const mockAuthRemoveProviderCredentials = vi.fn(); const mockCallCoreRpc = vi.fn(); const mockIsTauri = vi.fn(() => true); +const mockOpenhumanLocalAiStatus = vi.fn(); +const mockOpenhumanLocalAiDiagnostics = vi.fn(); +const mockOpenhumanLocalAiPresets = vi.fn(); +const mockOpenhumanLocalAiApplyPreset = vi.fn(); vi.mock('../../coreRpcClient', () => ({ callCoreRpc: (a: unknown) => mockCallCoreRpc(a) })); @@ -46,17 +54,14 @@ vi.mock('../../../utils/tauriCommands/auth', () => ({ vi.mock('../../../utils/tauriCommands/config', () => ({ openhumanGetClientConfig: () => mockOpenhumanGetClientConfig(), openhumanUpdateModelSettings: (a: unknown) => mockOpenhumanUpdateModelSettings(a), - openhumanUpdateLocalAiSettings: vi.fn().mockResolvedValue({ result: {} }), + openhumanUpdateLocalAiSettings: (a: unknown) => mockOpenhumanUpdateLocalAiSettings(a), })); vi.mock('../../../utils/tauriCommands/localAi', () => ({ - openhumanLocalAiStatus: vi.fn().mockResolvedValue({ result: null }), - openhumanLocalAiDiagnostics: vi.fn().mockResolvedValue(null), - openhumanLocalAiPresets: vi.fn().mockResolvedValue(null), - openhumanLocalAiApplyPreset: vi.fn().mockResolvedValue({}), - openhumanLocalAiDownload: vi.fn().mockResolvedValue({}), - openhumanLocalAiSetOllamaPath: vi.fn().mockResolvedValue({}), - openhumanLocalAiShutdownOwned: vi.fn().mockResolvedValue({}), + openhumanLocalAiStatus: (...args: unknown[]) => mockOpenhumanLocalAiStatus(...args), + openhumanLocalAiDiagnostics: (...args: unknown[]) => mockOpenhumanLocalAiDiagnostics(...args), + openhumanLocalAiPresets: (...args: unknown[]) => mockOpenhumanLocalAiPresets(...args), + openhumanLocalAiApplyPreset: (...args: unknown[]) => mockOpenhumanLocalAiApplyPreset(...args), })); // ─── Helpers ───────────────────────────────────────────────────────────────── @@ -174,6 +179,11 @@ describe('loadAISettings', () => { beforeEach(() => { mockOpenhumanGetClientConfig.mockReset(); mockAuthListProviderCredentials.mockReset(); + mockOpenhumanUpdateLocalAiSettings.mockReset(); + mockOpenhumanLocalAiStatus.mockReset(); + mockOpenhumanLocalAiDiagnostics.mockReset(); + mockOpenhumanLocalAiPresets.mockReset(); + mockOpenhumanLocalAiApplyPreset.mockReset(); }); it('returns cloudProviders with has_api_key=false when no profiles stored', async () => { @@ -362,6 +372,69 @@ describe('loadAISettings', () => { }); }); +describe('local provider facade', () => { + beforeEach(() => { + mockOpenhumanUpdateLocalAiSettings.mockReset(); + mockOpenhumanLocalAiStatus.mockReset(); + mockOpenhumanLocalAiDiagnostics.mockReset(); + mockOpenhumanLocalAiPresets.mockReset(); + mockOpenhumanLocalAiApplyPreset.mockReset(); + }); + + it('loadLocalProviderSnapshot joins status diagnostics and presets', async () => { + mockOpenhumanLocalAiStatus.mockResolvedValue({ result: { state: 'ready' } }); + mockOpenhumanLocalAiDiagnostics.mockResolvedValue({ + installed_models: [{ name: 'gemma3:1b-it-qat', size: 123 }], + }); + mockOpenhumanLocalAiPresets.mockResolvedValue({ + recommended_tier: 'ram_2_4gb', + current_tier: 'ram_2_4gb', + selected_tier: 'ram_2_4gb', + presets: [], + device: { + total_ram_bytes: 1, + cpu_count: 1, + cpu_brand: 'cpu', + os_name: 'os', + os_version: '1', + has_gpu: false, + gpu_description: null, + }, + }); + + const snapshot = await loadLocalProviderSnapshot(); + + expect(snapshot.status).toEqual({ state: 'ready' }); + expect(snapshot.installedModels).toEqual([{ name: 'gemma3:1b-it-qat', size: 123 }]); + expect(snapshot.presets?.recommended_tier).toBe('ram_2_4gb'); + }); + + it('setLocalRuntimeEnabled updates runtime_enabled and opt_in_confirmed together', async () => { + mockOpenhumanUpdateLocalAiSettings.mockResolvedValue({ result: {} }); + + await setLocalRuntimeEnabled(true); + + expect(mockOpenhumanUpdateLocalAiSettings).toHaveBeenCalledWith({ + runtime_enabled: true, + opt_in_confirmed: true, + }); + }); + + it('localProvider facade delegates applyPreset and setEnabled', async () => { + mockOpenhumanLocalAiApplyPreset.mockResolvedValue({ applied_tier: 'ram_2_4gb' }); + mockOpenhumanUpdateLocalAiSettings.mockResolvedValue({ result: {} }); + + await localProvider.applyPreset('ram_2_4gb'); + await localProvider.setEnabled(false); + + expect(mockOpenhumanLocalAiApplyPreset).toHaveBeenCalledWith('ram_2_4gb'); + expect(mockOpenhumanUpdateLocalAiSettings).toHaveBeenCalledWith({ + runtime_enabled: false, + opt_in_confirmed: false, + }); + }); +}); + // ─── saveAISettings ────────────────────────────────────────────────────────── describe('saveAISettings', () => { @@ -539,7 +612,7 @@ describe('listProviderModels', () => { mockIsTauri.mockReturnValue(true); }); - it('dispatches openhuman.providers_list_models with provider_id and returns models', async () => { + it('dispatches openhuman.inference_list_models with provider_id and returns models', async () => { mockCallCoreRpc.mockResolvedValue({ result: { models: [ @@ -552,7 +625,7 @@ describe('listProviderModels', () => { const models = await listProviderModels('p_openai_1'); expect(mockCallCoreRpc).toHaveBeenCalledWith({ - method: 'openhuman.providers_list_models', + method: 'openhuman.inference_list_models', params: { provider_id: 'p_openai_1' }, }); expect(models).toHaveLength(2); diff --git a/app/src/services/api/aiSettingsApi.ts b/app/src/services/api/aiSettingsApi.ts index 5a065b5244..bf5b7f544e 100644 --- a/app/src/services/api/aiSettingsApi.ts +++ b/app/src/services/api/aiSettingsApi.ts @@ -4,7 +4,7 @@ * Sits between the panel's React state and the Rust JSON-RPC core. Three * orthogonal surfaces in one place: * - * 1. Cloud providers + per-workload routing → `openhuman.update_model_settings` + * 1. Cloud providers + per-workload routing → `openhuman.inference_update_model_settings` * 2. API keys for cloud providers → `openhuman.auth_*_provider_credentials` * (encrypted at rest in * `auth-profiles.json`) @@ -16,7 +16,6 @@ * presentation. */ import { callCoreRpc } from '../../services/coreRpcClient'; -import { CORE_RPC_METHODS } from '../../services/rpcMethods'; import { authListProviderCredentials, type AuthProfileSummary, @@ -38,10 +37,7 @@ import { type ModelPresetResult, openhumanLocalAiApplyPreset, openhumanLocalAiDiagnostics, - openhumanLocalAiDownload, openhumanLocalAiPresets, - openhumanLocalAiSetOllamaPath, - openhumanLocalAiShutdownOwned, openhumanLocalAiStatus, type PresetsResponse, } from '../../utils/tauriCommands/localAi'; @@ -274,7 +270,7 @@ export async function listProviderModels(providerId: string): Promise({ - method: CORE_RPC_METHODS.providersListModels, + method: 'openhuman.inference_list_models', params: { provider_id: providerId }, }); return res?.result?.models ?? []; @@ -319,28 +315,10 @@ export async function setLocalRuntimeEnabled(enabled: boolean): Promise { await openhumanUpdateLocalAiSettings({ runtime_enabled: enabled, opt_in_confirmed: enabled }); } -/** - * Set / clear the user-configured Ollama binary path. - */ -export async function setLocalOllamaPath(path: string): Promise { - await openhumanLocalAiSetOllamaPath(path); -} - -/** - * Gate off the local-AI runtime. - */ -export async function shutdownLocalProvider(): Promise { - await setLocalRuntimeEnabled(false); - await openhumanLocalAiShutdownOwned(); -} - /** Convenience helpers re-exported so the panel imports from one place. */ export const localProvider = { applyPreset: (tier: string) => openhumanLocalAiApplyPreset(tier), - download: (retry: boolean) => openhumanLocalAiDownload(retry), setEnabled: (enabled: boolean) => setLocalRuntimeEnabled(enabled), - setBinaryPath: (path: string) => setLocalOllamaPath(path), - shutdown: () => shutdownLocalProvider(), }; export type { ModelPresetResult }; diff --git a/app/src/services/rpcMethods.ts b/app/src/services/rpcMethods.ts index bc9320522f..008e539fb7 100644 --- a/app/src/services/rpcMethods.ts +++ b/app/src/services/rpcMethods.ts @@ -15,7 +15,15 @@ export const CORE_RPC_METHODS = { configWorkspaceOnboardingFlagExists: 'openhuman.config_workspace_onboarding_flag_exists', configWorkspaceOnboardingFlagSet: 'openhuman.config_workspace_onboarding_flag_set', corePing: 'core.ping', - providersListModels: 'openhuman.providers_list_models', + inferenceApplyPreset: 'openhuman.inference_apply_preset', + inferenceDiagnostics: 'openhuman.inference_diagnostics', + inferenceDeviceProfile: 'openhuman.inference_device_profile', + inferenceGetClientConfig: 'openhuman.inference_get_client_config', + inferenceListModels: 'openhuman.inference_list_models', + inferencePresets: 'openhuman.inference_presets', + inferenceUpdateLocalSettings: 'openhuman.inference_update_local_settings', + inferenceUpdateModelSettings: 'openhuman.inference_update_model_settings', + providersListModels: 'openhuman.inference_list_models', screenIntelligenceStatus: 'openhuman.screen_intelligence_status', } as const; @@ -32,15 +40,20 @@ export const LEGACY_METHOD_ALIASES: Record = { 'openhuman.update_browser_settings': CORE_RPC_METHODS.configUpdateBrowserSettings, 'openhuman.update_composio_trigger_settings': CORE_RPC_METHODS.configUpdateComposioTriggerSettings, - 'openhuman.update_local_ai_settings': CORE_RPC_METHODS.configUpdateLocalAiSettings, + 'openhuman.update_local_ai_settings': CORE_RPC_METHODS.inferenceUpdateLocalSettings, 'openhuman.update_memory_settings': CORE_RPC_METHODS.configUpdateMemorySettings, - 'openhuman.update_model_settings': CORE_RPC_METHODS.configUpdateModelSettings, + 'openhuman.update_model_settings': CORE_RPC_METHODS.inferenceUpdateModelSettings, 'openhuman.update_runtime_settings': CORE_RPC_METHODS.configUpdateRuntimeSettings, 'openhuman.update_screen_intelligence_settings': CORE_RPC_METHODS.configUpdateScreenIntelligenceSettings, 'openhuman.workspace_onboarding_flag_exists': CORE_RPC_METHODS.configWorkspaceOnboardingFlagExists, 'openhuman.workspace_onboarding_flag_set': CORE_RPC_METHODS.configWorkspaceOnboardingFlagSet, + 'openhuman.local_ai_apply_preset': CORE_RPC_METHODS.inferenceApplyPreset, + 'openhuman.local_ai_device_profile': CORE_RPC_METHODS.inferenceDeviceProfile, + 'openhuman.local_ai_diagnostics': CORE_RPC_METHODS.inferenceDiagnostics, + 'openhuman.local_ai_presets': CORE_RPC_METHODS.inferencePresets, + 'openhuman.providers_list_models': CORE_RPC_METHODS.inferenceListModels, }; export function normalizeRpcMethod(method: string): string { diff --git a/app/src/utils/__tests__/localAiBootstrap.test.ts b/app/src/utils/__tests__/localAiBootstrap.test.ts index 061534ed9b..a89e774b4a 100644 --- a/app/src/utils/__tests__/localAiBootstrap.test.ts +++ b/app/src/utils/__tests__/localAiBootstrap.test.ts @@ -7,7 +7,6 @@ import { vi.mock('../tauriCommands', () => ({ openhumanLocalAiApplyPreset: vi.fn(), - openhumanLocalAiDownloadAllAssets: vi.fn(), openhumanLocalAiPresets: vi.fn(), })); @@ -16,7 +15,7 @@ describe('localAiBootstrap', () => { vi.clearAllMocks(); }); - it('applies the recommended preset before starting background downloads when no tier is selected', async () => { + it('applies the recommended preset when no tier is selected', async () => { const tauriCommands = await import('../tauriCommands'); vi.mocked(tauriCommands.openhumanLocalAiPresets).mockResolvedValue({ presets: [], @@ -40,21 +39,10 @@ describe('localAiBootstrap', () => { embedding_model_id: 'all-minilm:latest', quantization: 'qat', }); - vi.mocked(tauriCommands.openhumanLocalAiDownloadAllAssets).mockResolvedValue({ - result: { state: 'downloading', progress: 0 } as never, - logs: [], - }); - const result = await bootstrapLocalAiWithRecommendedPreset(false, '[test]'); expect(tauriCommands.openhumanLocalAiPresets).toHaveBeenCalledOnce(); expect(tauriCommands.openhumanLocalAiApplyPreset).toHaveBeenCalledWith('ram_2_4gb'); - expect(tauriCommands.openhumanLocalAiDownloadAllAssets).toHaveBeenCalledWith(false); - expect( - vi.mocked(tauriCommands.openhumanLocalAiApplyPreset).mock.invocationCallOrder[0] - ).toBeLessThan( - vi.mocked(tauriCommands.openhumanLocalAiDownloadAllAssets).mock.invocationCallOrder[0] - ); expect(result.preset.hadSelectedTier).toBe(false); expect(result.preset.appliedTier).toBe('ram_2_4gb'); }); diff --git a/app/src/utils/__tests__/tauriCommands.test.ts b/app/src/utils/__tests__/tauriCommands.test.ts index 31be1c4282..af394047a1 100644 --- a/app/src/utils/__tests__/tauriCommands.test.ts +++ b/app/src/utils/__tests__/tauriCommands.test.ts @@ -98,7 +98,7 @@ describe('tauriCommands', () => { }); test('openhumanLocalAiStatus returns upgrade hint on unknown method', async () => { - mockCallCoreRpc.mockRejectedValueOnce(new Error('unknown method: openhuman.local_ai_status')); + mockCallCoreRpc.mockRejectedValueOnce(new Error('unknown method: openhuman.inference_status')); await expect(openhumanLocalAiStatus()).rejects.toThrow( 'Local model runtime is unavailable in this core build. Restart app after updating to the latest build.' diff --git a/app/src/utils/localAiBootstrap.ts b/app/src/utils/localAiBootstrap.ts index fccb7f3423..ba6d4f9b2e 100644 --- a/app/src/utils/localAiBootstrap.ts +++ b/app/src/utils/localAiBootstrap.ts @@ -1,6 +1,5 @@ import { openhumanLocalAiApplyPreset, - openhumanLocalAiDownloadAllAssets, openhumanLocalAiPresets, type PresetsResponse, } from './tauriCommands'; @@ -95,23 +94,11 @@ export const ensureRecommendedLocalAiPresetIfNeeded = async ( }; }; -export const triggerLocalAiAssetBootstrap = async ( - force = false, - logPrefix = '[local-ai-bootstrap]' -) => { - console.debug(`${logPrefix} triggering local AI background bootstrap`, JSON.stringify({ force })); - return await retryLocalAiCommand( - force ? 're-bootstrap local AI assets' : 'bootstrap local AI assets', - () => openhumanLocalAiDownloadAllAssets(force), - logPrefix - ); -}; - export const bootstrapLocalAiWithRecommendedPreset = async ( force = false, logPrefix = '[local-ai-bootstrap]' ) => { + void force; const preset = await ensureRecommendedLocalAiPresetIfNeeded(logPrefix); - const download = await triggerLocalAiAssetBootstrap(force, logPrefix); - return { preset, download }; + return { preset }; }; diff --git a/app/src/utils/tauriCommands/__tests__/config.test.ts b/app/src/utils/tauriCommands/__tests__/config.test.ts index 1733263649..4f091d2467 100644 --- a/app/src/utils/tauriCommands/__tests__/config.test.ts +++ b/app/src/utils/tauriCommands/__tests__/config.test.ts @@ -22,7 +22,7 @@ describe('openhumanGetClientConfig', () => { await expect(openhumanGetClientConfig()).rejects.toThrow(/Not running in Tauri/i); }); - it('dispatches openhuman.config_get_client_config and returns the response', async () => { + it('dispatches openhuman.inference_get_client_config and returns the response', async () => { const expected = { result: { api_url: 'https://api.openai.com/v1/chat/completions', @@ -36,7 +36,7 @@ describe('openhumanGetClientConfig', () => { const got = await openhumanGetClientConfig(); - expect(callCoreRpc).toHaveBeenCalledWith({ method: 'openhuman.config_get_client_config' }); + expect(callCoreRpc).toHaveBeenCalledWith({ method: 'openhuman.inference_get_client_config' }); expect(got).toEqual(expected); }); }); diff --git a/app/src/utils/tauriCommands/config.test.ts b/app/src/utils/tauriCommands/config.test.ts index 5ea1c1d131..c643aeafaf 100644 --- a/app/src/utils/tauriCommands/config.test.ts +++ b/app/src/utils/tauriCommands/config.test.ts @@ -35,7 +35,7 @@ describe('tauriCommands/config', () => { expect(mockCallCoreRpc).not.toHaveBeenCalled(); }); - test('forwards the patch to openhuman.config_update_local_ai_settings', async () => { + test('forwards the patch to openhuman.inference_update_local_settings', async () => { mockCallCoreRpc.mockResolvedValue({ result: { config: {}, workspace_dir: '/tmp', config_path: '/tmp/cfg.toml' }, logs: [], @@ -52,7 +52,7 @@ describe('tauriCommands/config', () => { }; await openhumanUpdateLocalAiSettings(patch); expect(mockCallCoreRpc).toHaveBeenCalledWith({ - method: 'openhuman.config_update_local_ai_settings', + method: 'openhuman.inference_update_local_settings', params: patch, }); }); diff --git a/app/src/utils/tauriCommands/config.ts b/app/src/utils/tauriCommands/config.ts index 7094fba1bc..625d7bb8f8 100644 --- a/app/src/utils/tauriCommands/config.ts +++ b/app/src/utils/tauriCommands/config.ts @@ -225,7 +225,7 @@ export async function openhumanGetClientConfig(): Promise>({ - method: 'openhuman.config_get_client_config', + method: 'openhuman.inference_get_client_config', }); } @@ -236,7 +236,7 @@ export async function openhumanUpdateModelSettings( throw new Error('Not running in Tauri'); } return await callCoreRpc>({ - method: CORE_RPC_METHODS.configUpdateModelSettings, + method: 'openhuman.inference_update_model_settings', params: update, }); } @@ -296,7 +296,7 @@ export async function openhumanUpdateLocalAiSettings( throw new Error('Not running in Tauri'); } return await callCoreRpc>({ - method: 'openhuman.config_update_local_ai_settings', + method: 'openhuman.inference_update_local_settings', params: update, }); } diff --git a/app/src/utils/tauriCommands/localAi.ts b/app/src/utils/tauriCommands/localAi.ts index c1b487d1cf..bcae39a194 100644 --- a/app/src/utils/tauriCommands/localAi.ts +++ b/app/src/utils/tauriCommands/localAi.ts @@ -1,5 +1,9 @@ /** - * Local AI / Ollama commands. + * Local AI / Ollama-facing commands routed through the core. + * + * The renderer never talks to Ollama directly. It always calls the core, and + * the core decides whether to route a request to the configured inference + * backend (for example an external Ollama endpoint). */ import { callCoreRpc } from '../../services/coreRpcClient'; import { CommandResponse, isTauri, tauriErrorMessage } from './common'; @@ -51,10 +55,9 @@ export interface LocalAiAssetsStatus { tts: LocalAiAssetStatus; quantization: string; /** - * True when the core can find an Ollama binary on disk. When false the UI - * should render an "Install Ollama" CTA instead of model state — every - * Ollama-backed asset will be reported as `missing` and `/api/tags` - * probes are skipped entirely (no 30s timeout). + * True when the configured Ollama endpoint is reachable enough for model + * checks. When false the UI should render external-runtime guidance instead + * of pretending the app can install or launch Ollama itself. */ ollama_available: boolean; } @@ -125,38 +128,6 @@ export interface SentimentResult { confidence: number; } -export interface GifDecision { - should_send_gif: boolean; - search_query: string | null; -} - -export interface TenorMediaFormat { - url: string; - dims: [number, number]; - size: number; - duration?: number; -} - -export interface TenorGifResult { - id: string; - title: string; - contentDescription: string; - url: string; - media: { - gif?: TenorMediaFormat; - tinygif?: TenorMediaFormat; - mediumgif?: TenorMediaFormat; - mp4?: TenorMediaFormat; - tinymp4?: TenorMediaFormat; - }; - created: number; -} - -export interface TenorSearchResult { - results: TenorGifResult[]; - next: string; -} - export interface DeviceProfileResult { total_ram_bytes: number; cpu_count: number; @@ -245,11 +216,11 @@ export async function openhumanAgentChat( export async function openhumanLocalAiStatus(): Promise> { try { return await callCoreRpc>({ - method: 'openhuman.local_ai_status', + method: 'openhuman.inference_status', }); } catch (err) { const message = tauriErrorMessage(err); - if (message.includes('unknown method: openhuman.local_ai_status')) { + if (message.includes('unknown method: openhuman.inference_status')) { throw new Error( 'Local model runtime is unavailable in this core build. Restart app after updating to the latest build.' ); @@ -258,38 +229,12 @@ export async function openhumanLocalAiStatus(): Promise> { - try { - return await callCoreRpc>({ - method: 'openhuman.local_ai_download', - params: { force: force ?? false }, - }); - } catch (err) { - const message = tauriErrorMessage(err); - if (message.includes('unknown method: openhuman.local_ai_download')) { - return await openhumanLocalAiStatus(); - } - throw new Error(message); - } -} - -export async function openhumanLocalAiDownloadAllAssets( - force?: boolean -): Promise> { - return await callCoreRpc>({ - method: 'openhuman.local_ai_download_all_assets', - params: { force: force ?? false }, - }); -} - export async function openhumanLocalAiSummarize( text: string, maxTokens?: number ): Promise> { return await callCoreRpc>({ - method: 'openhuman.local_ai_summarize', + method: 'openhuman.inference_summarize', params: { text, max_tokens: maxTokens }, }); } @@ -300,7 +245,7 @@ export async function openhumanLocalAiPrompt( noThink?: boolean ): Promise> { return await callCoreRpc>({ - method: 'openhuman.local_ai_prompt', + method: 'openhuman.inference_prompt', params: { prompt, max_tokens: maxTokens, no_think: noThink }, }); } @@ -311,7 +256,7 @@ export async function openhumanLocalAiVisionPrompt( maxTokens?: number ): Promise> { return await callCoreRpc>({ - method: 'openhuman.local_ai_vision_prompt', + method: 'openhuman.inference_vision_prompt', params: { prompt, image_refs: imageRefs, max_tokens: maxTokens }, }); } @@ -320,7 +265,7 @@ export async function openhumanLocalAiEmbed( inputs: string[] ): Promise> { return await callCoreRpc>({ - method: 'openhuman.local_ai_embed', + method: 'openhuman.inference_embed', params: { inputs }, }); } @@ -355,70 +300,45 @@ export async function openhumanLocalAiTts( } /** - * Multi-turn chat completion via the local Ollama model. + * Multi-turn chat completion via the configured inference provider. */ export async function openhumanLocalAiChat( messages: LocalAiChatMessage[], maxTokens?: number ): Promise> { return await callCoreRpc>({ - method: 'openhuman.local_ai_chat', + method: 'openhuman.inference_chat', params: { messages, max_tokens: maxTokens }, }); } /** - * Ask the local model whether the assistant should react to a user message - * with an emoji. + * Ask the configured inference provider whether the assistant should react to + * a user message with an emoji. */ export async function openhumanLocalAiShouldReact( message: string, channelType: string ): Promise> { return await callCoreRpc>({ - method: 'openhuman.local_ai_should_react', + method: 'openhuman.inference_should_react', params: { message, channel_type: channelType }, }); } /** - * Classify the emotion and sentiment of a user message via the local model. + * Classify the emotion and sentiment of a user message via the configured + * inference provider. */ export async function openhumanLocalAiAnalyzeSentiment( message: string ): Promise> { return await callCoreRpc>({ - method: 'openhuman.local_ai_analyze_sentiment', + method: 'openhuman.inference_analyze_sentiment', params: { message }, }); } -/** - * Ask the local model whether a GIF response is appropriate for this message. - */ -export async function openhumanLocalAiShouldSendGif( - message: string, - channelType: string -): Promise> { - return await callCoreRpc>({ - method: 'openhuman.local_ai_should_send_gif', - params: { message, channel_type: channelType }, - }); -} - -/** - * Search for GIFs via the backend Tenor proxy. - */ -export async function openhumanLocalAiTenorSearch( - query: string, - limit?: number -): Promise> { - return await callCoreRpc>({ - method: 'openhuman.local_ai_tenor_search', - params: { query, limit }, - }); -} - export async function openhumanLocalAiAssetsStatus(): Promise< CommandResponse > { @@ -445,44 +365,23 @@ export async function openhumanLocalAiDownloadAsset( } export async function openhumanLocalAiDeviceProfile(): Promise { - return await callCoreRpc({ method: 'openhuman.local_ai_device_profile' }); + return await callCoreRpc({ method: 'openhuman.inference_device_profile' }); } export async function openhumanLocalAiPresets(): Promise { - return await callCoreRpc({ method: 'openhuman.local_ai_presets' }); + return await callCoreRpc({ method: 'openhuman.inference_presets' }); } export async function openhumanLocalAiApplyPreset(tier: string): Promise { return await callCoreRpc({ - method: 'openhuman.local_ai_apply_preset', + method: 'openhuman.inference_apply_preset', params: { tier }, }); } export async function openhumanLocalAiDiagnostics(): Promise { return await callCoreRpc({ - method: 'openhuman.local_ai_diagnostics', - params: {}, - }); -} - -export async function openhumanLocalAiSetOllamaPath( - path: string -): Promise<{ ollama_binary_path: string | null; status: LocalAiStatus }> { - return await callCoreRpc<{ ollama_binary_path: string | null; status: LocalAiStatus }>({ - method: 'openhuman.local_ai_set_ollama_path', - params: { path }, - }); -} - -/** - * Gate off the local-AI runtime: kills the Ollama daemon only if OpenHuman - * spawned it (external daemons are left running), and forces status to - * `"disabled"` so the UI flips immediately. - */ -export async function openhumanLocalAiShutdownOwned(): Promise> { - return await callCoreRpc>({ - method: 'openhuman.local_ai_shutdown_owned', + method: 'openhuman.inference_diagnostics', params: {}, }); } diff --git a/app/test/e2e/specs/local-model-runtime.spec.ts b/app/test/e2e/specs/local-model-runtime.spec.ts index 71ec471749..aaa2aec0eb 100644 --- a/app/test/e2e/specs/local-model-runtime.spec.ts +++ b/app/test/e2e/specs/local-model-runtime.spec.ts @@ -43,9 +43,9 @@ async function waitForAnyText(candidates, timeout = 20_000) { return null; } -// Local model runtime requires Ollama binary which is not available in the -// Linux CI Docker container. The "Local model runtime" card and "Manage" -// button only appear on the home page when Ollama is detected. Skip on Linux. +// Local model runtime now talks to an external Ollama endpoint through core. +// CI does not provision a live Ollama server, so keep this spec skipped until +// a deterministic mockable local-runtime harness exists for WDIO. describe.skip('Local model runtime flow', () => { before(async () => { await startMockServer(); @@ -57,7 +57,7 @@ describe.skip('Local model runtime flow', () => { await stopMockServer(); }); - it('can trigger local model bootstrap from UI and enter active runtime state', async () => { + it('shows direct-runtime guidance instead of app-managed bootstrap controls', async () => { await triggerAuthDeepLink('e2e-local-model-token'); await waitForWindowVisible(25_000); await waitForWebView(15_000); @@ -84,14 +84,18 @@ describe.skip('Local model runtime flow', () => { 'Local model runtime is unavailable in this core build. Restart app after updating to the latest build.'; expect(await textExists(incompatibleError)).toBe(false); - await clickText('Bootstrap / Resume', 12_000); - await waitForAnyText(['Triggering...'], 8_000); - - const activeState = await waitForAnyText(['Downloading', 'Loading', 'Ready'], 25_000); - if (!activeState) { + const guidance = await waitForAnyText( + [ + 'Ollama runtime unavailable', + 'Manage the Ollama process and model pulls outside OpenHuman.', + 'Ollama docs', + ], + 25_000 + ); + if (!guidance) { const tree = await dumpAccessibilityTree(); - console.log('[LocalModelE2E] No active runtime state seen. Tree:\n', tree.slice(0, 5000)); + console.log('[LocalModelE2E] No direct-runtime guidance seen. Tree:\n', tree.slice(0, 5000)); } - expect(activeState).not.toBeNull(); + expect(guidance).not.toBeNull(); }); }); diff --git a/e2e/docker-compose.yml b/e2e/docker-compose.yml index fbdd02b572..97bf1a55ba 100644 --- a/e2e/docker-compose.yml +++ b/e2e/docker-compose.yml @@ -76,6 +76,19 @@ services: # ubuntu-22.04 runners give CEF. shm_size: 2gb + # Rust inference provider E2E tests (wiremock-based, no live LLM needed). + # Run: docker compose -f e2e/docker-compose.yml run --rm inference-e2e + inference-e2e: + image: ${OPENHUMAN_CI_IMAGE:-ghcr.io/tinyhumansai/openhuman_ci:latest} + working_dir: /workspace + volumes: + - ../:/workspace + - e2e-cargo-registry:/usr/local/cargo/registry + - e2e-cargo-git:/usr/local/cargo/git + - e2e-rust-target:/workspace/target + entrypoint: ["bash", "-lc"] + command: ["./scripts/test-rust-inference-e2e.sh"] + volumes: e2e-cargo-registry: e2e-cargo-git: diff --git a/gitbooks/developing/e2e-testing.md b/gitbooks/developing/e2e-testing.md index 79e1fc6e82..5371200ff8 100644 --- a/gitbooks/developing/e2e-testing.md +++ b/gitbooks/developing/e2e-testing.md @@ -215,3 +215,17 @@ bash app/scripts/e2e-agent-review.sh ``` Artifacts land in `app/test/e2e/artifacts/-agent-review/`. Full details + helper API: [`AGENT-OBSERVABILITY.md`](AGENT-OBSERVABILITY.md). Any failing test triggers `wdio.conf.ts`'s `afterTest` hook, which writes `failure-*.png` + `failure-*.source.xml` into the same run dir. + +--- + +## Rust inference provider E2E + +These tests (`tests/inference_provider_e2e.rs`) use **wiremock** to mock HTTP upstreams and require no live LLM API calls. They cover OpenAI-compat chat, Anthropic auth style, per-model temperature suppression, Ollama local provider, and the `/v1` HTTP endpoint auth layer. + +```bash +# Local: +bash scripts/test-rust-inference-e2e.sh + +# Via Docker (Linux, same image as CI): +docker compose -f e2e/docker-compose.yml run --rm inference-e2e +``` diff --git a/scripts/test-rust-inference-e2e.sh b/scripts/test-rust-inference-e2e.sh new file mode 100755 index 0000000000..2760b7082a --- /dev/null +++ b/scripts/test-rust-inference-e2e.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Run the inference provider E2E tests (tests/inference_provider_e2e.rs). +# +# These tests use wiremock to mock HTTP upstreams — no live LLM API is needed. +# They exercise: +# - OpenAI-compatible chat and streaming paths +# - Anthropic auth style header verification +# - Per-model temperature suppression (o1/o3/o4/gpt-5 patterns) +# - Ollama local provider (via OpenAI-compat /v1 endpoint) +# - /v1/chat/completions and /v1/models HTTP endpoint auth layer +# +# Usage: +# bash scripts/test-rust-inference-e2e.sh +# +# Via Docker (Linux): +# docker compose -f e2e/docker-compose.yml run --rm inference-e2e +# +# The shared mock backend is NOT required by these tests (they use wiremock +# directly), but this script delegates to test-rust-with-mock.sh for +# consistency with the rest of the Rust test runner tooling. +set -euo pipefail +exec bash "$(dirname "$0")/test-rust-with-mock.sh" --test inference_provider_e2e "$@" diff --git a/src/api/rest.rs b/src/api/rest.rs index d068041540..d85e3b0ad9 100644 --- a/src/api/rest.rs +++ b/src/api/rest.rs @@ -533,7 +533,7 @@ impl BackendOAuthClient { let is_transient_infra = crate::core::observability::is_transient_http_status_code(status_code); let is_budget_exhausted = status_code == 400 - && crate::openhuman::providers::is_budget_exhausted_message(&text); + && crate::openhuman::inference::provider::is_budget_exhausted_message(&text); if is_budget_exhausted { tracing::info!( method = method.as_str(), @@ -819,28 +819,6 @@ impl BackendOAuthClient { .await } - /// Searches for GIFs using the Tenor integration. - pub async fn search_tenor_gifs( - &self, - bearer_jwt: &str, - query: &str, - limit: Option, - ) -> Result { - anyhow::ensure!(!query.trim().is_empty(), "query is required"); - let body = serde_json::json!({ - "query": query.trim(), - "limit": limit.unwrap_or(5), - "contentFilter": "medium", - }); - self.authed_json( - bearer_jwt, - Method::POST, - "agent-integrations/tenor/search", - Some(body), - ) - .await - } - /// Creates a new thread in a communication channel. pub async fn create_channel_thread( &self, diff --git a/src/core/all.rs b/src/core/all.rs index b5db7f3ab8..7185a5ed3b 100644 --- a/src/core/all.rs +++ b/src/core/all.rs @@ -143,8 +143,6 @@ fn build_registered_controllers() -> Vec { .extend(crate::openhuman::channels::controllers::all_channels_registered_controllers()); // Persistent configuration management controllers.extend(crate::openhuman::config::all_config_registered_controllers()); - // Cloud provider model catalog queries - controllers.extend(crate::openhuman::providers::all_providers_registered_controllers()); // Local sidecar reachability + backend Socket.IO state diagnostics (#1527) controllers.extend(crate::openhuman::connectivity::all_connectivity_registered_controllers()); // User credentials and session management @@ -153,8 +151,10 @@ fn build_registered_controllers() -> Vec { controllers.extend(crate::openhuman::service::all_service_registered_controllers()); // Data migration utilities controllers.extend(crate::openhuman::migration::all_migration_registered_controllers()); - // Local AI model management and inference - controllers.extend(crate::openhuman::local_ai::all_local_ai_registered_controllers()); + // Unified inference domain: text / vision / embedding / local runtime / cloud providers. + // (Formerly split across inference, local_ai, and providers namespaces.) + controllers.extend(crate::openhuman::inference::all_inference_registered_controllers()); + controllers.extend(crate::openhuman::inference::all_local_ai_registered_controllers()); // People resolution and interaction scoring controllers.extend(crate::openhuman::people::all_people_registered_controllers()); // Screen capture and UI analysis @@ -276,12 +276,12 @@ fn build_declared_controller_schemas() -> Vec { .extend(crate::openhuman::channels::providers::web::all_web_channel_controller_schemas()); schemas.extend(crate::openhuman::channels::controllers::all_channels_controller_schemas()); schemas.extend(crate::openhuman::config::all_config_controller_schemas()); - schemas.extend(crate::openhuman::providers::all_providers_controller_schemas()); schemas.extend(crate::openhuman::connectivity::all_connectivity_controller_schemas()); schemas.extend(crate::openhuman::credentials::all_credentials_controller_schemas()); schemas.extend(crate::openhuman::service::all_service_controller_schemas()); schemas.extend(crate::openhuman::migration::all_migration_controller_schemas()); - schemas.extend(crate::openhuman::local_ai::all_local_ai_controller_schemas()); + schemas.extend(crate::openhuman::inference::all_inference_controller_schemas()); + schemas.extend(crate::openhuman::inference::all_local_ai_controller_schemas()); schemas.extend(crate::openhuman::people::all_people_controller_schemas()); schemas.extend( crate::openhuman::screen_intelligence::all_screen_intelligence_controller_schemas(), @@ -370,6 +370,7 @@ pub fn namespace_description(namespace: &str) -> Option<&'static str> { "doctor" => Some("Run diagnostics for workspace and runtime health."), "encrypt" => Some("Encrypt secure values managed by secret storage."), "health" => Some("Process and component health snapshots."), + "inference" => Some("Connect to configured text, vision, and embedding inference runtimes."), "local_ai" => Some("Local AI chat, inference, downloads, and media operations."), "migrate" => Some("Data migration utilities."), "javascript" => Some("First-class JavaScript runtime bridge for listing and dispatching tools."), diff --git a/src/core/cli_tests.rs b/src/core/cli_tests.rs index 0e5e83752a..111586e358 100644 --- a/src/core/cli_tests.rs +++ b/src/core/cli_tests.rs @@ -24,6 +24,7 @@ fn grouped_schemas_contains_migrated_namespaces() { assert!(grouped.contains_key("auth")); assert!(grouped.contains_key("service")); assert!(grouped.contains_key("migrate")); + assert!(grouped.contains_key("inference")); assert!(grouped.contains_key("local_ai")); } diff --git a/src/core/jsonrpc.rs b/src/core/jsonrpc.rs index 32bbb0c58d..da8376965e 100644 --- a/src/core/jsonrpc.rs +++ b/src/core/jsonrpc.rs @@ -123,8 +123,9 @@ pub async fn rpc_handler(State(state): State, Json(req): Json Resu crate::core::event_bus::publish_global( crate::core::event_bus::DomainEvent::SessionExpired { source: format!("jsonrpc.invoke_method:{method}"), - reason: crate::openhuman::providers::ops::sanitize_api_error(msg), + reason: crate::openhuman::inference::provider::ops::sanitize_api_error(msg), }, ); } @@ -554,6 +555,8 @@ pub fn build_core_http_router(socketio_enabled: bool) -> Router { .route("/rpc", post(rpc_handler)) .route("/ws/dictation", get(dictation_ws_handler)) .route("/auth/telegram", get(telegram_auth_handler)) + // OpenAI-compatible inference endpoint (/v1/chat/completions, /v1/models) + .nest("/v1", crate::openhuman::inference::http::router()) .fallback(not_found_handler) .layer(middleware::from_fn(http_request_log_middleware)) .layer(middleware::from_fn(crate::core::auth::rpc_auth_middleware)) @@ -1033,7 +1036,7 @@ async fn run_server_inner( // daemon was externally managed) and clear the spawn marker so the // next launch doesn't try to reclaim a daemon that's already dead. // Bounded so a wedged Ollama can't hold up app shutdown. - if let Some(svc) = crate::openhuman::local_ai::try_global() { + if let Some(svc) = crate::openhuman::inference::local::try_global() { let cfg = crate::openhuman::config::Config::load_or_init() .await .unwrap_or_default(); diff --git a/src/core/jsonrpc_tests.rs b/src/core/jsonrpc_tests.rs index a9e3d908fc..3eed02cf0f 100644 --- a/src/core/jsonrpc_tests.rs +++ b/src/core/jsonrpc_tests.rs @@ -281,18 +281,6 @@ async fn invoke_migrate_openclaw_rejects_unknown_param() { assert!(err.contains("unknown param 'x'")); } -#[tokio::test] -async fn invoke_local_ai_download_asset_missing_required_param_fails_validation() { - let err = invoke_method( - default_state(), - "openhuman.local_ai_download_asset", - json!({}), - ) - .await - .expect_err("missing capability should fail"); - assert!(err.contains("missing required param 'capability'")); -} - #[test] fn http_schema_dump_includes_openhuman_and_core_methods() { let dump = build_http_schema_dump(); diff --git a/src/core/legacy_aliases.rs b/src/core/legacy_aliases.rs index e1c67b94f5..32fbe5e400 100644 --- a/src/core/legacy_aliases.rs +++ b/src/core/legacy_aliases.rs @@ -53,7 +53,7 @@ const LEGACY_ALIASES: &[(&str, &str)] = &[ ), ( "openhuman.update_local_ai_settings", - "openhuman.config_update_local_ai_settings", + "openhuman.inference_update_local_settings", ), ( "openhuman.update_memory_settings", @@ -61,7 +61,7 @@ const LEGACY_ALIASES: &[(&str, &str)] = &[ ), ( "openhuman.update_model_settings", - "openhuman.config_update_model_settings", + "openhuman.inference_update_model_settings", ), ( "openhuman.update_runtime_settings", @@ -79,6 +79,23 @@ const LEGACY_ALIASES: &[(&str, &str)] = &[ "openhuman.workspace_onboarding_flag_set", "openhuman.config_workspace_onboarding_flag_set", ), + ( + "openhuman.local_ai_apply_preset", + "openhuman.inference_apply_preset", + ), + ( + "openhuman.local_ai_device_profile", + "openhuman.inference_device_profile", + ), + ( + "openhuman.local_ai_diagnostics", + "openhuman.inference_diagnostics", + ), + ("openhuman.local_ai_presets", "openhuman.inference_presets"), + ( + "openhuman.providers_list_models", + "openhuman.inference_list_models", + ), ]; /// Returns the server-side legacy → canonical RPC alias table. diff --git a/src/core/observability.rs b/src/core/observability.rs index 68ad0054b4..8f685f17d8 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -31,7 +31,7 @@ pub type Tag<'a> = (&'a str, &'a str); /// - **504** Gateway Timeout /// /// Single source of truth for both the call-site classifier -/// (`openhuman::providers::ops::should_report_provider_http_failure`) and the +/// (`openhuman::inference::provider::ops::should_report_provider_http_failure`) and the /// `before_send` filter (`is_transient_provider_http_failure`). Update here /// and both sites pick it up — keeps the two layers from drifting. pub const TRANSIENT_PROVIDER_HTTP_STATUSES: &[u16] = &[408, 429, 502, 503, 504, 520]; @@ -123,7 +123,7 @@ pub fn expected_error_kind(message: &str) -> Option { if is_local_ai_capability_unavailable_message(&lower) { return Some(ExpectedErrorKind::LocalAiCapabilityUnavailable); } - if crate::openhuman::providers::is_budget_exhausted_message(message) { + if crate::openhuman::inference::provider::is_budget_exhausted_message(message) { return Some(ExpectedErrorKind::BudgetExhausted); } if is_session_expired_message(message) { @@ -617,7 +617,7 @@ pub(crate) fn report_error_message( /// that the reliable-provider layer already handles via retry + fallback. /// /// The primary suppression lives at the call site -/// (`openhuman::providers::ops::should_report_provider_http_failure`), +/// (`openhuman::inference::provider::ops::should_report_provider_http_failure`), /// which short-circuits transient codes before `report_error` ever fires. /// This helper is intended for use inside the `sentry::ClientOptions` /// `before_send` hook as defense-in-depth — it catches any future call @@ -920,7 +920,7 @@ fn event_contains_budget_exhausted_message(event: &sentry::protocol::Event<'_>) if event .message .as_deref() - .is_some_and(crate::openhuman::providers::is_budget_exhausted_message) + .is_some_and(crate::openhuman::inference::provider::is_budget_exhausted_message) { return true; } @@ -929,7 +929,7 @@ fn event_contains_budget_exhausted_message(event: &sentry::protocol::Event<'_>) exception .value .as_deref() - .is_some_and(crate::openhuman::providers::is_budget_exhausted_message) + .is_some_and(crate::openhuman::inference::provider::is_budget_exhausted_message) }) } @@ -2130,7 +2130,7 @@ mod tests { "local ai is disabled", "rpc", "invoke_method", - &[("method", "openhuman.local_ai_prompt")], + &[("method", "openhuman.inference_prompt")], ); report_error_or_expected( "ollama API key not set", diff --git a/src/main.rs b/src/main.rs index 51d346fa5c..e2c456db55 100644 --- a/src/main.rs +++ b/src/main.rs @@ -53,7 +53,7 @@ fn main() { // still fires for genuine outages. Per-attempt reports flood // Sentry — see OPENHUMAN-TAURI-2E (~1393 events), -84 (~1050), // -T (~871). The primary fix lives in - // `openhuman::providers::ops::should_report_provider_http_failure` + // `openhuman::inference::provider::ops::should_report_provider_http_failure` // (transient codes excluded). This filter catches any future call // site that bypasses it. if openhuman_core::core::observability::is_transient_provider_http_failure(&event) { @@ -86,7 +86,7 @@ fn main() { // Drop 401 "Session expired. Please log in again." bodies surfaced // by llm_provider / backend_api, plus pre-flight "no session token // stored" guards from the rpc dispatcher. Primary suppression - // lives at the call sites (`openhuman::providers::ops::api_error` + // lives at the call sites (`openhuman::inference::provider::ops::api_error` // publishes a SessionExpired event_bus signal and short-circuits; // the rpc dispatcher's `is_session_expired_error` skip-path in // `src/core/jsonrpc.rs` redirects to a tracing::info). This diff --git a/src/openhuman/agent/bus.rs b/src/openhuman/agent/bus.rs index 922fcb8f0d..55e32685dc 100644 --- a/src/openhuman/agent/bus.rs +++ b/src/openhuman/agent/bus.rs @@ -21,10 +21,10 @@ use tokio::sync::mpsc; use crate::core::event_bus::register_native_global; use crate::openhuman::agent::progress::AgentProgress; use crate::openhuman::config::MultimodalConfig; +use crate::openhuman::inference::provider::{ChatMessage, Provider}; use crate::openhuman::prompt_injection::{ enforce_prompt_input, PromptEnforcementAction, PromptEnforcementContext, }; -use crate::openhuman::providers::{ChatMessage, Provider}; use crate::openhuman::tools::Tool; use super::harness::definition::{AgentDefinitionRegistry, SandboxMode}; diff --git a/src/openhuman/agent/cost.rs b/src/openhuman/agent/cost.rs index 188e8bee32..af9bfd37c7 100644 --- a/src/openhuman/agent/cost.rs +++ b/src/openhuman/agent/cost.rs @@ -23,7 +23,7 @@ //! cents-per-Mtok at the tier level is good enough for client-side //! telemetry and budget gating. PRs adding new tiers should add a row. -use crate::openhuman::providers::UsageInfo; +use crate::openhuman::inference::provider::UsageInfo; /// Per-million-token rates for a single model tier. /// diff --git a/src/openhuman/agent/dispatcher.rs b/src/openhuman/agent/dispatcher.rs index d0da21af0f..36d2ec64cd 100644 --- a/src/openhuman/agent/dispatcher.rs +++ b/src/openhuman/agent/dispatcher.rs @@ -1,7 +1,7 @@ use crate::openhuman::agent::harness::parse_tool_calls; use crate::openhuman::agent::pformat::{self, PFormatRegistry}; use crate::openhuman::context::prompt::ToolCallFormat; -use crate::openhuman::providers::{ +use crate::openhuman::inference::provider::{ ChatMessage, ChatResponse, ConversationMessage, ToolResultMessage, }; use crate::openhuman::tools::{Tool, ToolSpec}; diff --git a/src/openhuman/agent/dispatcher_tests.rs b/src/openhuman/agent/dispatcher_tests.rs index 6a0c9af871..34ada29d16 100644 --- a/src/openhuman/agent/dispatcher_tests.rs +++ b/src/openhuman/agent/dispatcher_tests.rs @@ -21,7 +21,7 @@ fn xml_dispatcher_parses_tool_calls() { fn native_dispatcher_roundtrip() { let response = ChatResponse { text: Some("ok".into()), - tool_calls: vec![crate::openhuman::providers::ToolCall { + tool_calls: vec![crate::openhuman::inference::provider::ToolCall { id: "tc1".into(), name: "file_read".into(), arguments: "{\"path\":\"a.txt\"}".into(), diff --git a/src/openhuman/agent/harness/bughunt_tests.rs b/src/openhuman/agent/harness/bughunt_tests.rs index dae7656ce5..481b0350db 100644 --- a/src/openhuman/agent/harness/bughunt_tests.rs +++ b/src/openhuman/agent/harness/bughunt_tests.rs @@ -9,7 +9,7 @@ use super::test_support::{KeywordRule, KeywordScriptedProvider, ScriptedToolCall}; use super::tool_loop::run_tool_call_loop; -use crate::openhuman::providers::{ChatMessage, ChatResponse, ToolCall}; +use crate::openhuman::inference::provider::{ChatMessage, ChatResponse, ToolCall}; use crate::openhuman::tools::traits::{Tool, ToolResult}; use async_trait::async_trait; use parking_lot::Mutex; diff --git a/src/openhuman/agent/harness/fork_context.rs b/src/openhuman/agent/harness/fork_context.rs index 477998bbbe..03c51b0b92 100644 --- a/src/openhuman/agent/harness/fork_context.rs +++ b/src/openhuman/agent/harness/fork_context.rs @@ -12,8 +12,8 @@ use crate::openhuman::agent::progress::AgentProgress; use crate::openhuman::config::AgentConfig; +use crate::openhuman::inference::provider::Provider; use crate::openhuman::memory::Memory; -use crate::openhuman::providers::Provider; use crate::openhuman::skills::Skill; use crate::openhuman::tools::{Tool, ToolSpec}; use std::path::PathBuf; diff --git a/src/openhuman/agent/harness/harness_gap_tests.rs b/src/openhuman/agent/harness/harness_gap_tests.rs index 032bba505e..d3348b4a7b 100644 --- a/src/openhuman/agent/harness/harness_gap_tests.rs +++ b/src/openhuman/agent/harness/harness_gap_tests.rs @@ -22,9 +22,9 @@ use crate::openhuman::agent::error::AgentError; use crate::openhuman::agent::harness::tool_loop::run_tool_call_loop; use crate::openhuman::context::guard::{ContextCheckResult, ContextGuard}; -use crate::openhuman::providers::traits::ProviderCapabilities; -use crate::openhuman::providers::Provider; -use crate::openhuman::providers::{ChatMessage, ChatRequest, ChatResponse, UsageInfo}; +use crate::openhuman::inference::provider::traits::ProviderCapabilities; +use crate::openhuman::inference::provider::Provider; +use crate::openhuman::inference::provider::{ChatMessage, ChatRequest, ChatResponse, UsageInfo}; use crate::openhuman::tool_timeout::parse_tool_timeout_secs; use crate::openhuman::tools::{Tool, ToolResult}; use async_trait::async_trait; diff --git a/src/openhuman/agent/harness/parse.rs b/src/openhuman/agent/harness/parse.rs index 5c38a8e5ab..52b34afbc1 100644 --- a/src/openhuman/agent/harness/parse.rs +++ b/src/openhuman/agent/harness/parse.rs @@ -1,4 +1,4 @@ -use crate::openhuman::providers::ToolCall; +use crate::openhuman::inference::provider::ToolCall; use crate::openhuman::tools::Tool; use regex::Regex; use std::sync::LazyLock; diff --git a/src/openhuman/agent/harness/session/builder.rs b/src/openhuman/agent/harness/session/builder.rs index 219176756c..52bdd39b50 100644 --- a/src/openhuman/agent/harness/session/builder.rs +++ b/src/openhuman/agent/harness/session/builder.rs @@ -18,8 +18,8 @@ use crate::openhuman::agent::memory_loader::{DefaultMemoryLoader, MemoryLoader}; use crate::openhuman::config::{Config, ContextConfig}; use crate::openhuman::context::prompt::SystemPromptBuilder; use crate::openhuman::context::{ContextManager, ProviderSummarizer}; +use crate::openhuman::inference::provider::{self, Provider}; use crate::openhuman::memory::{self, Memory}; -use crate::openhuman::providers::{self, Provider}; use crate::openhuman::security::SecurityPolicy; use crate::openhuman::tools::{self, Tool, ToolSpec}; use anyhow::Result; @@ -514,7 +514,7 @@ impl Agent { /// legacy behaviour). /// /// The welcome agent uses this entry point when routed from the - /// Tauri web channel (see `channels::providers::web::build_session_agent`). + /// Tauri web channel (see `channels::provider::web::build_session_agent`). pub fn from_config_for_agent(config: &Config, agent_id: &str) -> Result { // Look up the target definition up front so we can fail fast // with a clear error instead of building half an agent and then @@ -594,7 +594,7 @@ impl Agent { /// [`SystemPromptBuilder`], seeded with the `source_chunks` snapshot /// from the spawning subconscious reflection (#623). /// - /// Used by `channels::providers::web::build_session_agent` when a + /// Used by `channels::provider::web::build_session_agent` when a /// chat thread's seed message metadata flags /// `origin == "subconscious_reflection"` — the orchestrator then /// has the same memory context the reflection-LLM had, so the user's @@ -762,7 +762,7 @@ impl Agent { // backend. Those are valuable but orthogonal — they can be layered // back on top of the factory's output in a follow-up without // re-introducing the routing bypass. - let _ = providers::ProviderRuntimeOptions { + let _ = provider::ProviderRuntimeOptions { auth_profile_override: None, openhuman_dir: config.config_path.parent().map(std::path::PathBuf::from), secrets_encrypt: config.secrets.encrypt, @@ -775,7 +775,7 @@ impl Agent { _ => "reasoning", }; let (provider, mut model_name): (Box, String) = - crate::openhuman::providers::create_chat_provider(provider_role, config)?; + crate::openhuman::inference::provider::create_chat_provider(provider_role, config)?; let target_agent_id = target_def .map(|def| def.id.as_str()) .unwrap_or("orchestrator"); @@ -945,21 +945,22 @@ impl Agent { let full_config = Arc::new(config.clone()); // For cloud reflection, wrap the provider in an Arc. // For local, no provider needed. - let reflection_provider: Option> = - if config.learning.reflection_source - == crate::openhuman::config::ReflectionSource::Cloud - { - Some(Arc::from(providers::create_routed_provider( - config.inference_url.as_deref(), - config.api_url.as_deref(), - config.api_key.as_deref(), - &config.reliability, - &config.model_routes, - &model_name, - )?)) - } else { - None - }; + let reflection_provider: Option< + Arc, + > = if config.learning.reflection_source + == crate::openhuman::config::ReflectionSource::Cloud + { + Some(Arc::from(provider::create_routed_provider( + config.inference_url.as_deref(), + config.api_url.as_deref(), + config.api_key.as_deref(), + &config.reliability, + &config.model_routes, + &model_name, + )?)) + } else { + None + }; post_turn_hooks.push(Arc::new(crate::openhuman::learning::ReflectionHook::new( config.learning.clone(), full_config.clone(), diff --git a/src/openhuman/agent/harness/session/runtime.rs b/src/openhuman/agent/harness/session/runtime.rs index b79bdf7ea3..1b44ad021f 100644 --- a/src/openhuman/agent/harness/session/runtime.rs +++ b/src/openhuman/agent/harness/session/runtime.rs @@ -11,11 +11,11 @@ use super::types::{Agent, AgentBuilder}; use crate::core::event_bus::{publish_global, DomainEvent}; use crate::openhuman::agent::dispatcher::ParsedToolCall; use crate::openhuman::agent::error::AgentError; +use crate::openhuman::inference::provider::{self, ConversationMessage, Provider, ToolCall}; use crate::openhuman::memory::Memory; use crate::openhuman::prompt_injection::{ enforce_prompt_input, PromptEnforcementAction, PromptEnforcementContext, }; -use crate::openhuman::providers::{self, ConversationMessage, Provider, ToolCall}; use crate::openhuman::tools::{Tool, ToolSpec}; use crate::openhuman::util::truncate_with_ellipsis; use anyhow::Result; @@ -276,21 +276,21 @@ impl Agent { let learned = crate::openhuman::agent::prompts::LearnedContextData::default(); let system_prompt = self.build_system_prompt(learned)?; - let mut cached: Vec = + let mut cached: Vec = Vec::with_capacity(prior.len() + 1); - cached.push(crate::openhuman::providers::ChatMessage::system( + cached.push(crate::openhuman::inference::provider::ChatMessage::system( system_prompt, )); for (role, content) in prior { let chat = match role.as_str() { - "user" => crate::openhuman::providers::ChatMessage::user(content), + "user" => crate::openhuman::inference::provider::ChatMessage::user(content), "agent" | "assistant" => { - crate::openhuman::providers::ChatMessage::assistant(content) + crate::openhuman::inference::provider::ChatMessage::assistant(content) } // Fall back to user role for unknown senders rather than // dropping the message — losing context is worse than // mislabelling a system/tool message. - _ => crate::openhuman::providers::ChatMessage::user(content), + _ => crate::openhuman::inference::provider::ChatMessage::user(content), }; cached.push(chat); } @@ -376,7 +376,7 @@ impl Agent { return kind.to_string(); } - let scrubbed = providers::sanitize_api_error(&err.to_string()) + let scrubbed = provider::sanitize_api_error(&err.to_string()) .replace(['\n', '\r', '\t'], " ") .split_whitespace() .collect::>() @@ -405,7 +405,7 @@ impl Agent { /// If the provider response already contains native tool calls, they are /// returned as-is. pub(super) fn persisted_tool_calls_for_history( - response: &crate::openhuman::providers::ChatResponse, + response: &crate::openhuman::inference::provider::ChatResponse, parsed_calls: &[ParsedToolCall], iteration: usize, ) -> Vec { diff --git a/src/openhuman/agent/harness/session/runtime_tests.rs b/src/openhuman/agent/harness/session/runtime_tests.rs index acf5ec422b..adc34aaa6f 100644 --- a/src/openhuman/agent/harness/session/runtime_tests.rs +++ b/src/openhuman/agent/harness/session/runtime_tests.rs @@ -2,8 +2,8 @@ use super::*; use crate::core::event_bus::{global, init_global, DomainEvent}; use crate::openhuman::agent::dispatcher::XmlToolDispatcher; use crate::openhuman::agent::error::AgentError; +use crate::openhuman::inference::provider::{ChatMessage, ChatRequest, ChatResponse, UsageInfo}; use crate::openhuman::memory::Memory; -use crate::openhuman::providers::{ChatMessage, ChatRequest, ChatResponse, UsageInfo}; use anyhow::anyhow; use async_trait::async_trait; use parking_lot::Mutex; @@ -121,7 +121,7 @@ fn sanitizers_and_tool_call_helpers_cover_fallback_paths() { assert_eq!(calls[0].tool_call_id.as_deref(), Some("parsed-3-1")); assert_eq!(calls[1].tool_call_id.as_deref(), Some("keep")); - let response = crate::openhuman::providers::ChatResponse { + let response = crate::openhuman::inference::provider::ChatResponse { text: Some(String::new()), tool_calls: vec![], usage: None, @@ -302,12 +302,12 @@ fn helper_paths_cover_no_overlap_native_calls_and_truncation() { assert_eq!(appended.len(), 1); assert!(matches!(&appended[0], ConversationMessage::Chat(msg) if msg.content == "b")); - let native_calls = vec![crate::openhuman::providers::ToolCall { + let native_calls = vec![crate::openhuman::inference::provider::ToolCall { id: "native-1".into(), name: "echo".into(), arguments: "{}".into(), }]; - let response = crate::openhuman::providers::ChatResponse { + let response = crate::openhuman::inference::provider::ChatResponse { text: Some(String::new()), tool_calls: native_calls.clone(), usage: None, diff --git a/src/openhuman/agent/harness/session/tests.rs b/src/openhuman/agent/harness/session/tests.rs index 43bcff9de0..efa50917dd 100644 --- a/src/openhuman/agent/harness/session/tests.rs +++ b/src/openhuman/agent/harness/session/tests.rs @@ -7,8 +7,8 @@ use super::types::{Agent, AgentBuilder}; use crate::openhuman::agent::dispatcher::{NativeToolDispatcher, XmlToolDispatcher}; +use crate::openhuman::inference::provider::{ChatRequest, ConversationMessage, Provider}; use crate::openhuman::memory::Memory; -use crate::openhuman::providers::{ChatRequest, ConversationMessage, Provider}; use crate::openhuman::tools::Tool; use anyhow::Result; use async_trait::async_trait; @@ -16,7 +16,7 @@ use parking_lot::Mutex; use std::sync::Arc; struct MockProvider { - responses: Mutex>, + responses: Mutex>, } #[async_trait] @@ -36,10 +36,10 @@ impl Provider for MockProvider { _request: ChatRequest<'_>, _model: &str, _temperature: f64, - ) -> Result { + ) -> Result { let mut guard = self.responses.lock(); if guard.is_empty() { - return Ok(crate::openhuman::providers::ChatResponse { + return Ok(crate::openhuman::inference::provider::ChatResponse { text: Some("done".into()), tool_calls: vec![], usage: None, @@ -56,7 +56,7 @@ impl Provider for MockProvider { #[derive(Default)] struct RecordingProvider { captures: Mutex>, - responses: Mutex>, + responses: Mutex>, } #[derive(Clone)] @@ -82,7 +82,7 @@ impl Provider for RecordingProvider { request: ChatRequest<'_>, model: &str, _temperature: f64, - ) -> Result { + ) -> Result { let system_prompt = request .messages .iter() @@ -95,7 +95,7 @@ impl Provider for RecordingProvider { let mut guard = self.responses.lock(); if guard.is_empty() { - return Ok(crate::openhuman::providers::ChatResponse { + return Ok(crate::openhuman::inference::provider::ChatResponse { text: Some("done".into()), tool_calls: vec![], usage: None, @@ -248,7 +248,7 @@ async fn turn_without_tools_returns_text() { let workspace_path = workspace.path().to_path_buf(); let provider = Box::new(MockProvider { - responses: Mutex::new(vec![crate::openhuman::providers::ChatResponse { + responses: Mutex::new(vec![crate::openhuman::inference::provider::ChatResponse { text: Some("hello".into()), tool_calls: vec![], usage: None, @@ -282,16 +282,16 @@ async fn turn_with_native_dispatcher_handles_tool_results_variant() { let provider = Box::new(MockProvider { responses: Mutex::new(vec![ - crate::openhuman::providers::ChatResponse { + crate::openhuman::inference::provider::ChatResponse { text: Some(String::new()), - tool_calls: vec![crate::openhuman::providers::ToolCall { + tool_calls: vec![crate::openhuman::inference::provider::ToolCall { id: "tc1".into(), name: "echo".into(), arguments: "{}".into(), }], usage: None, }, - crate::openhuman::providers::ChatResponse { + crate::openhuman::inference::provider::ChatResponse { text: Some("done".into()), tool_calls: vec![], usage: None, @@ -330,7 +330,7 @@ async fn turn_with_native_dispatcher_persists_fallback_tool_calls() { let provider = Box::new(MockProvider { responses: Mutex::new(vec![ - crate::openhuman::providers::ChatResponse { + crate::openhuman::inference::provider::ChatResponse { text: Some( "Checking...\n{\"name\":\"echo\",\"arguments\":{}}" .into(), @@ -338,7 +338,7 @@ async fn turn_with_native_dispatcher_persists_fallback_tool_calls() { tool_calls: vec![], usage: None, }, - crate::openhuman::providers::ChatResponse { + crate::openhuman::inference::provider::ChatResponse { text: Some("done".into()), tool_calls: vec![], usage: None, @@ -415,9 +415,9 @@ async fn turn_dispatches_spawn_subagent_through_full_path() { // 3. Parent turn iter 1 — fold sub-agent result into "Based on the research, X is Y." let provider = Box::new(MockProvider { responses: Mutex::new(vec![ - crate::openhuman::providers::ChatResponse { + crate::openhuman::inference::provider::ChatResponse { text: Some(String::new()), - tool_calls: vec![crate::openhuman::providers::ToolCall { + tool_calls: vec![crate::openhuman::inference::provider::ToolCall { id: "call-spawn".into(), name: "spawn_subagent".into(), arguments: serde_json::json!({ @@ -428,12 +428,12 @@ async fn turn_dispatches_spawn_subagent_through_full_path() { }], usage: None, }, - crate::openhuman::providers::ChatResponse { + crate::openhuman::inference::provider::ChatResponse { text: Some("X is Y".into()), tool_calls: vec![], usage: None, }, - crate::openhuman::providers::ChatResponse { + crate::openhuman::inference::provider::ChatResponse { text: Some("Based on the research, X is Y.".into()), tool_calls: vec![], usage: None, @@ -510,17 +510,17 @@ async fn system_prompt_and_model_are_byte_stable_across_turns() { let provider = Arc::new(RecordingProvider { responses: Mutex::new(vec![ - crate::openhuman::providers::ChatResponse { + crate::openhuman::inference::provider::ChatResponse { text: Some("first".into()), tool_calls: vec![], usage: None, }, - crate::openhuman::providers::ChatResponse { + crate::openhuman::inference::provider::ChatResponse { text: Some("second".into()), tool_calls: vec![], usage: None, }, - crate::openhuman::providers::ChatResponse { + crate::openhuman::inference::provider::ChatResponse { text: Some("third".into()), tool_calls: vec![], usage: None, @@ -690,8 +690,8 @@ fn seed_resume_from_messages_primes_cached_transcript() { fn seed_resume_from_messages_is_noop_on_warm_agent() { let mut agent = build_minimal_agent_with_definition_name(Some("orchestrator")); agent.cached_transcript_messages = Some(vec![ - crate::openhuman::providers::ChatMessage::system("warm prefix"), - crate::openhuman::providers::ChatMessage::user("hi"), + crate::openhuman::inference::provider::ChatMessage::system("warm prefix"), + crate::openhuman::inference::provider::ChatMessage::user("hi"), ]); agent .seed_resume_from_messages(vec![("user".into(), "different".into())], "different") diff --git a/src/openhuman/agent/harness/session/transcript.rs b/src/openhuman/agent/harness/session/transcript.rs index 4108757823..3ee740fc51 100644 --- a/src/openhuman/agent/harness/session/transcript.rs +++ b/src/openhuman/agent/harness/session/transcript.rs @@ -50,7 +50,7 @@ //! the session transcript can eventually replace the separate thread //! message log without losing message-level addressing. -use crate::openhuman::providers::ChatMessage; +use crate::openhuman::inference::provider::ChatMessage; use anyhow::{Context, Result}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; diff --git a/src/openhuman/agent/harness/session/turn.rs b/src/openhuman/agent/harness/session/turn.rs index 05f0a8b2e1..ad1fe5d63f 100644 --- a/src/openhuman/agent/harness/session/turn.rs +++ b/src/openhuman/agent/harness/session/turn.rs @@ -27,8 +27,10 @@ use crate::openhuman::agent::memory_loader::collect_recall_citations; use crate::openhuman::agent::progress::AgentProgress; use crate::openhuman::context::prompt::{LearnedContextData, PromptContext, PromptTool}; use crate::openhuman::context::{ReductionOutcome, ARCHIVIST_EXTRACTION_PROMPT}; +use crate::openhuman::inference::provider::{ + ChatMessage, ChatRequest, ConversationMessage, ProviderDelta, +}; use crate::openhuman::memory::MemoryCategory; -use crate::openhuman::providers::{ChatMessage, ChatRequest, ConversationMessage, ProviderDelta}; use crate::openhuman::tools::traits::ToolCallOptions; use crate::openhuman::tools::Tool; use crate::openhuman::util::truncate_with_ellipsis; @@ -1664,7 +1666,7 @@ impl Agent { output_tokens, cached_input_tokens, charged_amount_usd, - thread_id: crate::openhuman::providers::thread_context::current_thread_id(), + thread_id: crate::openhuman::inference::provider::thread_context::current_thread_id(), }; if let Err(err) = transcript::write_transcript(path, messages, &meta, turn_usage) { diff --git a/src/openhuman/agent/harness/session/turn_tests.rs b/src/openhuman/agent/harness/session/turn_tests.rs index f5618e2dbc..e39f94639e 100644 --- a/src/openhuman/agent/harness/session/turn_tests.rs +++ b/src/openhuman/agent/harness/session/turn_tests.rs @@ -3,8 +3,8 @@ use crate::core::event_bus::{global, init_global, DomainEvent}; use crate::openhuman::agent::dispatcher::XmlToolDispatcher; use crate::openhuman::agent::hooks::{PostTurnHook, TurnContext}; use crate::openhuman::agent::memory_loader::MemoryLoader; +use crate::openhuman::inference::provider::{ChatRequest, ChatResponse, Provider}; use crate::openhuman::memory::Memory; -use crate::openhuman::providers::{ChatRequest, ChatResponse, Provider}; use crate::openhuman::tools::Tool; use crate::openhuman::tools::ToolResult; use async_trait::async_trait; diff --git a/src/openhuman/agent/harness/session/types.rs b/src/openhuman/agent/harness/session/types.rs index 487be16357..e44ad4a35f 100644 --- a/src/openhuman/agent/harness/session/types.rs +++ b/src/openhuman/agent/harness/session/types.rs @@ -12,8 +12,8 @@ use crate::openhuman::agent::memory_loader::MemoryLoader; use crate::openhuman::agent::progress::AgentProgress; use crate::openhuman::context::prompt::SystemPromptBuilder; use crate::openhuman::context::ContextManager; +use crate::openhuman::inference::provider::{ChatMessage, ConversationMessage, Provider}; use crate::openhuman::memory::Memory; -use crate::openhuman::providers::{ChatMessage, ConversationMessage, Provider}; use crate::openhuman::tools::{Tool, ToolSpec}; use std::path::PathBuf; use std::sync::Arc; diff --git a/src/openhuman/agent/harness/subagent_runner/extract_tool.rs b/src/openhuman/agent/harness/subagent_runner/extract_tool.rs index fa2c327cd3..1d07f216d6 100644 --- a/src/openhuman/agent/harness/subagent_runner/extract_tool.rs +++ b/src/openhuman/agent/harness/subagent_runner/extract_tool.rs @@ -28,7 +28,7 @@ use super::handoff::{chunk_content, ResultHandoffCache, HANDOFF_MAX_ENTRIES}; use crate::openhuman::agent::harness::session::transcript::{ resolve_keyed_transcript_path, write_transcript, MessageUsage, TranscriptMeta, TurnUsage, }; -use crate::openhuman::providers::{ChatMessage, Provider}; +use crate::openhuman::inference::provider::{ChatMessage, Provider}; use crate::openhuman::tools::{Tool, ToolCategory, ToolResult}; // ── Tunables ────────────────────────────────────────────────────────── @@ -485,7 +485,7 @@ fn write_extract_transcript( output_tokens: 0, cached_input_tokens: 0, charged_amount_usd: 0.0, - thread_id: crate::openhuman::providers::thread_context::current_thread_id(), + thread_id: crate::openhuman::inference::provider::thread_context::current_thread_id(), }; if let Err(e) = write_transcript(&path, &messages, &meta, Some(&turn_usage)) { diff --git a/src/openhuman/agent/harness/subagent_runner/mod.rs b/src/openhuman/agent/harness/subagent_runner/mod.rs index e74fd708ad..6ffb1abe24 100644 --- a/src/openhuman/agent/harness/subagent_runner/mod.rs +++ b/src/openhuman/agent/harness/subagent_runner/mod.rs @@ -13,7 +13,7 @@ //! definition asks to omit (`omit_identity`, `omit_memory_context`, //! `omit_safety_preamble`, `omit_skills_catalog`). //! 5. Runs a slim inner tool-call loop using the parent's -//! [`crate::openhuman::providers::Provider`] and returns a single +//! [`crate::openhuman::inference::provider::Provider`] and returns a single //! text result. The intra-sub-agent history never leaks back to the //! parent — the parent only sees one compact tool result. //! diff --git a/src/openhuman/agent/harness/subagent_runner/ops.rs b/src/openhuman/agent/harness/subagent_runner/ops.rs index 9f80d6bb56..361b86f69b 100644 --- a/src/openhuman/agent/harness/subagent_runner/ops.rs +++ b/src/openhuman/agent/harness/subagent_runner/ops.rs @@ -29,8 +29,8 @@ use crate::openhuman::agent::progress::AgentProgress; use crate::openhuman::context::prompt::{ render_subagent_system_prompt, PromptContext, PromptTool, SubagentRenderOptions, }; +use crate::openhuman::inference::provider::{ChatMessage, ChatRequest, Provider, ToolCall}; use crate::openhuman::memory::conversations::ConversationMessage; -use crate::openhuman::providers::{ChatMessage, ChatRequest, Provider, ToolCall}; use crate::openhuman::tools::{Tool, ToolCategory, ToolSpec}; /// Prompt suffix injected into every typed sub-agent run. @@ -135,23 +135,25 @@ pub(super) fn resolve_subagent_provider( match spec { ModelSpec::Hint(workload) => match config { - Some(cfg) => match crate::openhuman::providers::create_chat_provider(workload, cfg) { - Ok((p, m)) => { - log::info!( + Some(cfg) => { + match crate::openhuman::inference::provider::create_chat_provider(workload, cfg) { + Ok((p, m)) => { + log::info!( "[subagent_runner] role={} agent_id={} resolved via workload factory model={}", workload, agent_id, m ); - (std::sync::Arc::from(p), m) - } - Err(e) => { - log::warn!( + (std::sync::Arc::from(p), m) + } + Err(e) => { + log::warn!( "[subagent_runner] workload '{}' provider build failed ({}) for agent_id={} — \ falling back to parent provider + parent model '{}'", workload, e, agent_id, parent_model ); - (parent_provider, parent_model) + (parent_provider, parent_model) + } } - }, + } None => { log::warn!( "[subagent_runner] config load failed for workload '{}' (agent_id={}) — \ @@ -1184,7 +1186,7 @@ async fn run_inner_loop( output_tokens: usage.output_tokens, cached_input_tokens: usage.cached_input_tokens, charged_amount_usd: usage.charged_amount_usd, - thread_id: crate::openhuman::providers::thread_context::current_thread_id(), + thread_id: crate::openhuman::inference::provider::thread_context::current_thread_id(), }; if let Err(err) = transcript::write_transcript(&path, history, &meta, None) { tracing::debug!( diff --git a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs index ee1faa1eab..462af5b070 100644 --- a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs +++ b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs @@ -134,7 +134,9 @@ fn append_subagent_role_contract_is_idempotent() { // ── End-to-end runner tests with mock provider ──────────────────────── use crate::openhuman::agent::harness::fork_context::with_parent_context; -use crate::openhuman::providers::{ChatRequest as PChatRequest, ChatResponse, Provider, ToolCall}; +use crate::openhuman::inference::provider::{ + ChatRequest as PChatRequest, ChatResponse, Provider, ToolCall, +}; use parking_lot::Mutex; use std::sync::Arc; @@ -142,7 +144,7 @@ use std::sync::Arc; /// to verify the bytes that arrive at the model. #[derive(Clone)] struct CapturedRequest { - messages: Vec, + messages: Vec, tool_count: usize, model: String, } diff --git a/src/openhuman/agent/harness/test_support.rs b/src/openhuman/agent/harness/test_support.rs index ec44869dc8..e97d453d5b 100644 --- a/src/openhuman/agent/harness/test_support.rs +++ b/src/openhuman/agent/harness/test_support.rs @@ -39,8 +39,10 @@ use async_trait::async_trait; use parking_lot::Mutex; use serde_json::json; -use crate::openhuman::providers::traits::ProviderCapabilities; -use crate::openhuman::providers::{ChatMessage, ChatRequest, ChatResponse, Provider, ToolCall}; +use crate::openhuman::inference::provider::traits::ProviderCapabilities; +use crate::openhuman::inference::provider::{ + ChatMessage, ChatRequest, ChatResponse, Provider, ToolCall, +}; /// One scripted reaction the [`KeywordScriptedProvider`] can emit when /// it sees its keyword in the latest user/tool turn. diff --git a/src/openhuman/agent/harness/test_support_test.rs b/src/openhuman/agent/harness/test_support_test.rs index 2436ac522e..74732644b1 100644 --- a/src/openhuman/agent/harness/test_support_test.rs +++ b/src/openhuman/agent/harness/test_support_test.rs @@ -9,7 +9,7 @@ use super::test_support::{ KeywordScriptedProvider, ScriptedToolCall, }; use super::tool_loop::run_tool_call_loop; -use crate::openhuman::providers::{ChatMessage, ChatRequest, ChatResponse, Provider}; +use crate::openhuman::inference::provider::{ChatMessage, ChatRequest, ChatResponse, Provider}; use crate::openhuman::tools::traits::{PermissionLevel, Tool, ToolCategory, ToolResult, ToolScope}; use async_trait::async_trait; use serde_json::json; diff --git a/src/openhuman/agent/harness/tests.rs b/src/openhuman/agent/harness/tests.rs index c144d24c25..550800589b 100644 --- a/src/openhuman/agent/harness/tests.rs +++ b/src/openhuman/agent/harness/tests.rs @@ -5,8 +5,8 @@ use super::parse::{ parse_tool_calls, parse_tool_calls_from_json_value, tools_to_openai_format, }; use super::tool_loop::{run_tool_call_loop, DEFAULT_MAX_TOOL_ITERATIONS}; -use crate::openhuman::providers::traits::ProviderCapabilities; -use crate::openhuman::providers::{ChatMessage, ChatRequest, ChatResponse, Provider}; +use crate::openhuman::inference::provider::traits::ProviderCapabilities; +use crate::openhuman::inference::provider::{ChatMessage, ChatRequest, ChatResponse, Provider}; use crate::openhuman::tools::{self, Tool}; use async_trait::async_trait; use base64::{engine::general_purpose::STANDARD, Engine as _}; diff --git a/src/openhuman/agent/harness/tool_loop.rs b/src/openhuman/agent/harness/tool_loop.rs index 35ce566481..140d4e9971 100644 --- a/src/openhuman/agent/harness/tool_loop.rs +++ b/src/openhuman/agent/harness/tool_loop.rs @@ -3,7 +3,7 @@ use crate::openhuman::agent::multimodal; use crate::openhuman::agent::progress::AgentProgress; use crate::openhuman::agent::stop_hooks::{current_stop_hooks, StopDecision, TurnState}; use crate::openhuman::approval::{ApprovalManager, ApprovalRequest, ApprovalResponse}; -use crate::openhuman::providers::{ +use crate::openhuman::inference::provider::{ ChatMessage, ChatRequest, Provider, ProviderCapabilityError, ProviderDelta, }; use crate::openhuman::tools::traits::ToolScope; @@ -417,8 +417,12 @@ pub(crate) async fn run_tool_call_loop( // signal and floods Sentry — see OPENHUMAN-TAURI-3Y/3Z // (~46 events combined) and the underlying TAURI-2E/84/T // (~3300 events from raw per-attempt 429/503/504 reports). - let transient = crate::openhuman::providers::reliable::is_rate_limited(&e) - || crate::openhuman::providers::reliable::is_upstream_unhealthy(&e); + let transient = crate::openhuman::inference::provider::reliable::is_rate_limited( + &e, + ) + || crate::openhuman::inference::provider::reliable::is_upstream_unhealthy( + &e, + ); if transient { tracing::warn!( domain = "agent", diff --git a/src/openhuman/agent/harness/tool_loop_tests.rs b/src/openhuman/agent/harness/tool_loop_tests.rs index 6ca463471c..684d4e69da 100644 --- a/src/openhuman/agent/harness/tool_loop_tests.rs +++ b/src/openhuman/agent/harness/tool_loop_tests.rs @@ -1,8 +1,8 @@ use super::*; use crate::openhuman::approval::ApprovalManager; use crate::openhuman::config::AutonomyConfig; -use crate::openhuman::providers::traits::ProviderCapabilities; -use crate::openhuman::providers::ChatResponse; +use crate::openhuman::inference::provider::traits::ProviderCapabilities; +use crate::openhuman::inference::provider::ChatResponse; use crate::openhuman::security::AutonomyLevel; use crate::openhuman::tools::{ToolResult, ToolScope}; use async_trait::async_trait; @@ -388,7 +388,7 @@ async fn run_tool_call_loop_persists_native_tool_results_as_tool_messages() { responses: Mutex::new(vec![ Ok(ChatResponse { text: Some(String::new()), - tool_calls: vec![crate::openhuman::providers::ToolCall { + tool_calls: vec![crate::openhuman::inference::provider::ToolCall { id: "call-1".into(), name: "echo".into(), arguments: "{}".into(), diff --git a/src/openhuman/agent/multimodal.rs b/src/openhuman/agent/multimodal.rs index c9195efbf7..921d334d2f 100644 --- a/src/openhuman/agent/multimodal.rs +++ b/src/openhuman/agent/multimodal.rs @@ -1,5 +1,5 @@ use crate::openhuman::config::{build_runtime_proxy_client_with_timeouts, MultimodalConfig}; -use crate::openhuman::providers::ChatMessage; +use crate::openhuman::inference::provider::ChatMessage; use base64::{engine::general_purpose::STANDARD, Engine as _}; use reqwest::Client; use std::path::Path; diff --git a/src/openhuman/agent/schemas.rs b/src/openhuman/agent/schemas.rs index 16f3ad8a05..abf4159043 100644 --- a/src/openhuman/agent/schemas.rs +++ b/src/openhuman/agent/schemas.rs @@ -240,7 +240,7 @@ fn handle_chat(params: Map) -> ControllerFuture { let p = deserialize_params::(params)?; let mut config = config_rpc::load_config_with_timeout().await?; to_json( - crate::openhuman::local_ai::rpc::agent_chat( + crate::openhuman::inference::local::rpc::agent_chat( &mut config, &p.message, p.model_override, @@ -256,7 +256,7 @@ fn handle_chat_simple(params: Map) -> ControllerFuture { let p = deserialize_params::(params)?; let config = config_rpc::load_config_with_timeout().await?; to_json( - crate::openhuman::local_ai::rpc::agent_chat_simple( + crate::openhuman::inference::local::rpc::agent_chat_simple( &config, &p.message, p.model_override, diff --git a/src/openhuman/agent/stop_hooks.rs b/src/openhuman/agent/stop_hooks.rs index f3ad0d759d..b89df26c23 100644 --- a/src/openhuman/agent/stop_hooks.rs +++ b/src/openhuman/agent/stop_hooks.rs @@ -180,7 +180,7 @@ impl StopHook for MaxIterationsStopHook { #[cfg(test)] mod tests { use super::*; - use crate::openhuman::providers::UsageInfo; + use crate::openhuman::inference::provider::UsageInfo; fn cost_with_usd(usd: f64) -> TurnCost { let mut tc = TurnCost::new(); diff --git a/src/openhuman/agent/tests.rs b/src/openhuman/agent/tests.rs index 4cccb90154..125107f43f 100644 --- a/src/openhuman/agent/tests.rs +++ b/src/openhuman/agent/tests.rs @@ -29,11 +29,11 @@ use crate::openhuman::agent::dispatcher::{ }; use crate::openhuman::agent::harness::session::Agent; use crate::openhuman::config::{AgentConfig, MemoryConfig}; -use crate::openhuman::memory::{self, Memory}; -use crate::openhuman::providers::{ +use crate::openhuman::inference::provider::{ ChatMessage, ChatRequest, ChatResponse, ConversationMessage, Provider, ToolCall, ToolResultMessage, }; +use crate::openhuman::memory::{self, Memory}; use crate::openhuman::tools::{Tool, ToolResult}; use anyhow::Result; use async_trait::async_trait; diff --git a/src/openhuman/agent/triage/evaluator.rs b/src/openhuman/agent/triage/evaluator.rs index 375c58ce7c..c69625f81f 100644 --- a/src/openhuman/agent/triage/evaluator.rs +++ b/src/openhuman/agent/triage/evaluator.rs @@ -38,10 +38,10 @@ use crate::openhuman::agent::harness::definition::{AgentDefinition, PromptSource use crate::openhuman::agent::harness::AgentDefinitionRegistry; use crate::openhuman::config::Config; use crate::openhuman::config::MultimodalConfig; -use crate::openhuman::providers::reliable::{ +use crate::openhuman::inference::provider::reliable::{ is_rate_limited, is_upstream_unhealthy, parse_retry_after_ms, }; -use crate::openhuman::providers::ChatMessage; +use crate::openhuman::inference::provider::ChatMessage; use crate::openhuman::scheduler_gate::LlmPermit; use super::decision::{parse_triage_decision, ParseError, TriageDecision}; diff --git a/src/openhuman/agent/triage/evaluator_tests.rs b/src/openhuman/agent/triage/evaluator_tests.rs index b9d9b74539..4a8a17dace 100644 --- a/src/openhuman/agent/triage/evaluator_tests.rs +++ b/src/openhuman/agent/triage/evaluator_tests.rs @@ -2,7 +2,7 @@ use super::*; use crate::openhuman::agent::agents::BUILTINS; use crate::openhuman::agent::bus::{mock_agent_run_turn, AgentTurnResponse}; use crate::openhuman::agent::harness::AgentDefinitionRegistry; -use crate::openhuman::providers::Provider; +use crate::openhuman::inference::provider::Provider; use async_trait::async_trait; use serde_json::json; use std::sync::atomic::{AtomicUsize, Ordering}; diff --git a/src/openhuman/agent/triage/routing.rs b/src/openhuman/agent/triage/routing.rs index d2b40aaef4..838863b8cf 100644 --- a/src/openhuman/agent/triage/routing.rs +++ b/src/openhuman/agent/triage/routing.rs @@ -14,7 +14,9 @@ use std::sync::Arc; use anyhow::Context; use crate::openhuman::config::Config; -use crate::openhuman::providers::{self, Provider, ProviderRuntimeOptions, INFERENCE_BACKEND_ID}; +use crate::openhuman::inference::provider::{ + self, Provider, ProviderRuntimeOptions, INFERENCE_BACKEND_ID, +}; /// The concrete provider + metadata that [`crate::openhuman::agent::triage::evaluator::run_triage`] /// should use for this particular triage turn. @@ -66,7 +68,7 @@ pub async fn resolve_provider_with_config(config: &Config) -> anyhow::Result Option { - use crate::openhuman::providers::compatible::{AuthStyle, OpenAiCompatibleProvider}; + use crate::openhuman::inference::provider::compatible::{AuthStyle, OpenAiCompatibleProvider}; let local_cfg = &config.local_ai; if !local_cfg.runtime_enabled { @@ -100,7 +102,7 @@ pub fn build_local_provider_with_config(config: &Config) -> Option Option anyhow::Result { let default_model = config @@ -149,7 +151,7 @@ fn build_remote_provider(config: &Config) -> anyhow::Result { secrets_encrypt: config.secrets.encrypt, reasoning_enabled: config.runtime.reasoning_enabled, }; - let provider_box = providers::create_routed_provider_with_options( + let provider_box = provider::create_routed_provider_with_options( config.inference_url.as_deref(), config.api_url.as_deref(), config.api_key.as_deref(), diff --git a/src/openhuman/app_state/ops.rs b/src/openhuman/app_state/ops.rs index a37abccb53..d94a49b86a 100644 --- a/src/openhuman/app_state/ops.rs +++ b/src/openhuman/app_state/ops.rs @@ -19,7 +19,7 @@ use crate::openhuman::autocomplete::AutocompleteStatus; use crate::openhuman::config::rpc as config_rpc; use crate::openhuman::config::Config; use crate::openhuman::credentials::session_support::build_session_state; -use crate::openhuman::local_ai::LocalAiStatus; +use crate::openhuman::inference::LocalAiStatus; use crate::openhuman::screen_intelligence::AccessibilityStatus; use crate::openhuman::service::{ServiceState, ServiceStatus}; use crate::rpc::RpcOutcome; @@ -410,11 +410,11 @@ async fn build_runtime_snapshot(config: &Config) -> RuntimeSnapshot { .await }; - let local_ai = match crate::openhuman::local_ai::rpc::local_ai_status(config).await { + let local_ai = match crate::openhuman::inference::rpc::inference_status(config).await { Ok(outcome) => outcome.value, Err(error) => { warn!("{LOG_PREFIX} local_ai status failed during snapshot: {error}"); - crate::openhuman::local_ai::LocalAiStatus::disabled(config) + crate::openhuman::inference::LocalAiStatus::disabled(config) } }; diff --git a/src/openhuman/autocomplete/core/engine.rs b/src/openhuman/autocomplete/core/engine.rs index 2051def25f..b471a26022 100644 --- a/src/openhuman/autocomplete/core/engine.rs +++ b/src/openhuman/autocomplete/core/engine.rs @@ -1,5 +1,5 @@ use crate::openhuman::config::Config; -use crate::openhuman::local_ai; +use crate::openhuman::inference::local as local_ai; use chrono::Utc; use once_cell::sync::Lazy; use std::sync::{Arc, Once}; diff --git a/src/openhuman/channels/context.rs b/src/openhuman/channels/context.rs index aee676fba5..6068de1ca5 100644 --- a/src/openhuman/channels/context.rs +++ b/src/openhuman/channels/context.rs @@ -1,7 +1,7 @@ //! Shared channel runtime state and memory helpers. +use crate::openhuman::inference::provider::{ChatMessage, Provider}; use crate::openhuman::memory::Memory; -use crate::openhuman::providers::{ChatMessage, Provider}; use crate::openhuman::tools::Tool; use crate::openhuman::util::truncate_with_ellipsis; use std::collections::HashMap; @@ -61,7 +61,8 @@ pub(crate) struct ChannelRuntimeContext { pub(crate) api_url: Option, pub(crate) inference_url: Option, pub(crate) reliability: Arc, - pub(crate) provider_runtime_options: crate::openhuman::providers::ProviderRuntimeOptions, + pub(crate) provider_runtime_options: + crate::openhuman::inference::provider::ProviderRuntimeOptions, pub(crate) workspace_dir: Arc, pub(crate) message_timeout_secs: u64, pub(crate) multimodal: crate::openhuman::config::MultimodalConfig, @@ -207,8 +208,8 @@ pub(crate) async fn build_memory_context( mod tests { use super::*; use crate::openhuman::channels::traits; + use crate::openhuman::inference::provider::Provider; use crate::openhuman::memory::{Memory, MemoryCategory, MemoryEntry}; - use crate::openhuman::providers::Provider; use crate::openhuman::tools::{Tool, ToolResult}; use async_trait::async_trait; @@ -344,8 +345,8 @@ mod tests { api_url: None, inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), - provider_runtime_options: crate::openhuman::providers::ProviderRuntimeOptions::default( - ), + provider_runtime_options: + crate::openhuman::inference::provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(PathBuf::from("/tmp")), message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, multimodal: crate::openhuman::config::MultimodalConfig::default(), @@ -387,12 +388,12 @@ mod tests { let ctx = runtime_context(); let sender = "discord_alice_reply_thread:thread-1"; let mut history = Vec::new(); - history.push(crate::openhuman::providers::ChatMessage::user("short")); - history.extend( - (0..20).map(|idx| { - crate::openhuman::providers::ChatMessage::assistant("x".repeat(700 + idx)) - }), - ); + history.push(crate::openhuman::inference::provider::ChatMessage::user( + "short", + )); + history.extend((0..20).map(|idx| { + crate::openhuman::inference::provider::ChatMessage::assistant("x".repeat(700 + idx)) + })); ctx.conversation_histories .lock() .unwrap() diff --git a/src/openhuman/channels/providers/presentation.rs b/src/openhuman/channels/providers/presentation.rs index 6f9049c7d4..16f57287bc 100644 --- a/src/openhuman/channels/providers/presentation.rs +++ b/src/openhuman/channels/providers/presentation.rs @@ -396,7 +396,8 @@ async fn try_reaction(user_message: &str) -> Option { return None; } - match crate::openhuman::local_ai::ops::local_ai_should_react(&config, user_message, "web").await + match crate::openhuman::inference::ops::inference_should_react(&config, user_message, "web") + .await { Ok(outcome) => { let decision = outcome.value; diff --git a/src/openhuman/channels/providers/web.rs b/src/openhuman/channels/providers/web.rs index 39fcc6d82f..fc0d2c5781 100644 --- a/src/openhuman/channels/providers/web.rs +++ b/src/openhuman/channels/providers/web.rs @@ -194,7 +194,7 @@ fn extract_provider_error_detail(err: &str) -> Option { if trimmed.is_empty() { return None; } - let sanitized = crate::openhuman::providers::sanitize_api_error(trimmed); + let sanitized = crate::openhuman::inference::provider::sanitize_api_error(trimmed); return Some(crate::openhuman::util::truncate_with_ellipsis( &sanitized, MAX_DETAIL_CHARS, @@ -681,7 +681,10 @@ async fn run_chat_task( model_override: model_override.clone(), temperature, target_agent_id: target_agent_id.clone(), - provider_binding: crate::openhuman::providers::provider_for_role(provider_role, &config), + provider_binding: crate::openhuman::inference::provider::provider_for_role( + provider_role, + &config, + ), }; let prior = { @@ -803,7 +806,7 @@ async fn run_chat_task( // `thread_context::current_thread_id()` and forwards it on // `/openai/v1/chat/completions` so the backend can group // InferenceLog entries and reuse the KV cache for this thread. - let result = match crate::openhuman::providers::thread_context::with_thread_id( + let result = match crate::openhuman::inference::provider::thread_context::with_thread_id( thread_id.to_string(), agent.run_single(message), ) diff --git a/src/openhuman/channels/routes.rs b/src/openhuman/channels/routes.rs index d9dfca444b..3daa0650b6 100644 --- a/src/openhuman/channels/routes.rs +++ b/src/openhuman/channels/routes.rs @@ -5,7 +5,7 @@ use super::context::{ }; use super::traits; use super::{Channel, SendMessage}; -use crate::openhuman::providers::{self, Provider}; +use crate::openhuman::inference::provider::{self, Provider}; use serde::Deserialize; use std::fmt::Write; use std::path::Path; @@ -83,7 +83,7 @@ fn resolve_provider_alias(name: &str) -> Option { return None; } - let providers_list = providers::list_providers(); + let providers_list = provider::list_providers(); for provider in providers_list { if provider.name.eq_ignore_ascii_case(candidate) || provider @@ -177,7 +177,7 @@ pub(crate) async fn get_or_create_provider( (None, None) }; - let provider = providers::create_resilient_provider_with_options( + let provider = provider::create_resilient_provider_with_options( inference_url, backend_url, None, @@ -237,7 +237,7 @@ fn build_providers_help_response(current: &ChannelRouteSelection) -> String { response.push_str("\nSwitch provider with `/models `.\n"); response.push_str("Switch model with `/model `.\n\n"); response.push_str("Available providers:\n"); - for provider in providers::list_providers() { + for provider in provider::list_providers() { if provider.aliases.is_empty() { let _ = writeln!(response, "- {}", provider.name); } else { @@ -286,7 +286,7 @@ pub(crate) async fn handle_runtime_command_if_needed( ) } Err(err) => { - let safe_err = providers::sanitize_api_error(&err.to_string()); + let safe_err = provider::sanitize_api_error(&err.to_string()); format!( "Failed to initialize provider `{provider_name}`. Route unchanged.\nDetails: {safe_err}" ) diff --git a/src/openhuman/channels/routes_tests.rs b/src/openhuman/channels/routes_tests.rs index d5527767ff..06124b51ba 100644 --- a/src/openhuman/channels/routes_tests.rs +++ b/src/openhuman/channels/routes_tests.rs @@ -3,8 +3,8 @@ use crate::openhuman::channels::context::{ ChannelRuntimeContext, ProviderCacheMap, RouteSelectionMap, }; use crate::openhuman::channels::traits::ChannelMessage; +use crate::openhuman::inference::provider::{ChatMessage, Provider}; use crate::openhuman::memory::{Memory, MemoryCategory, MemoryEntry}; -use crate::openhuman::providers::{ChatMessage, Provider}; use crate::openhuman::tools::{Tool, ToolResult}; use async_trait::async_trait; use std::collections::HashMap; @@ -147,7 +147,8 @@ fn runtime_context(workspace_dir: PathBuf) -> ChannelRuntimeContext { api_url: None, inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), - provider_runtime_options: crate::openhuman::providers::ProviderRuntimeOptions::default(), + provider_runtime_options: + crate::openhuman::inference::provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(workspace_dir), message_timeout_secs: 60, multimodal: crate::openhuman::config::MultimodalConfig::default(), @@ -182,7 +183,7 @@ fn runtime_command_parsing_and_provider_support_are_channel_scoped() { #[test] fn provider_alias_and_route_selection_round_trip() { - let first_provider = providers::list_providers() + let first_provider = provider::list_providers() .into_iter() .next() .expect("provider registry should not be empty"); diff --git a/src/openhuman/channels/runtime/dispatch.rs b/src/openhuman/channels/runtime/dispatch.rs index 08346515b8..72763c9348 100644 --- a/src/openhuman/channels/runtime/dispatch.rs +++ b/src/openhuman/channels/runtime/dispatch.rs @@ -20,7 +20,7 @@ use crate::openhuman::channels::traits; use crate::openhuman::channels::{Channel, SendMessage}; use crate::openhuman::composio::fetch_connected_integrations; use crate::openhuman::config::Config; -use crate::openhuman::providers::{self, ChatMessage}; +use crate::openhuman::inference::provider::{self, ChatMessage}; use crate::openhuman::tools::{orchestrator_tools, Tool}; use crate::openhuman::util::truncate_with_ellipsis; use std::collections::HashSet; @@ -389,7 +389,7 @@ async fn resolve_target_agent(channel: &str) -> AgentScoping { }; // Welcome is **desktop-app only**. The web channel has its own - // bespoke chat path (`channels::providers::web::run_chat_task` → + // bespoke chat path (`channels::provider::web::run_chat_task` → // `pick_target_agent_id`) that routes to the welcome agent while // `chat_onboarding_completed` is false. Every other channel // (telegram, slack, discord, mattermost, signal, …) flows through @@ -798,7 +798,7 @@ pub(crate) async fn process_channel_message( ("provider", route.provider.as_str()), ], ); - let safe_err = providers::sanitize_api_error(&err.to_string()); + let safe_err = provider::sanitize_api_error(&err.to_string()); let message = format!( "⚠️ Failed to initialize provider `{}`. Please run `/models` to choose another provider.\nDetails: {safe_err}", route.provider diff --git a/src/openhuman/channels/runtime/startup.rs b/src/openhuman/channels/runtime/startup.rs index 3fa680a905..d61b8c7009 100644 --- a/src/openhuman/channels/runtime/startup.rs +++ b/src/openhuman/channels/runtime/startup.rs @@ -31,8 +31,8 @@ use crate::openhuman::channels::whatsapp_web::WhatsAppWebChannel; use crate::openhuman::channels::Channel; use crate::openhuman::config::Config; use crate::openhuman::context::channels_prompt::build_system_prompt; +use crate::openhuman::inference::provider::{self, Provider}; use crate::openhuman::memory::{self, Memory}; -use crate::openhuman::providers::{self, Provider}; use crate::openhuman::security::SecurityPolicy; use crate::openhuman::tools; use anyhow::Result; @@ -154,13 +154,13 @@ pub async fn start_channels(config: Config) -> Result<()> { // in bootstrap_core_runtime() (src/core/jsonrpc.rs) to avoid double-registration // when both startup paths run in the same process. - let provider_runtime_options = providers::ProviderRuntimeOptions { + let provider_runtime_options = provider::ProviderRuntimeOptions { auth_profile_override: None, openhuman_dir: config.config_path.parent().map(std::path::PathBuf::from), secrets_encrypt: config.secrets.encrypt, reasoning_enabled: config.runtime.reasoning_enabled, }; - let provider: Arc = Arc::from(providers::create_intelligent_routing_provider( + let provider: Arc = Arc::from(provider::create_intelligent_routing_provider( config.inference_url.as_deref(), config.api_url.as_deref(), config.api_key.as_deref(), @@ -572,7 +572,7 @@ pub async fn start_channels(config: Config) -> Result<()> { println!(" 🚦 In-flight message limit: {max_in_flight_messages}"); - let provider_name = providers::INFERENCE_BACKEND_ID.to_string(); + let provider_name = provider::INFERENCE_BACKEND_ID.to_string(); let mut provider_cache_seed: HashMap> = HashMap::new(); provider_cache_seed.insert(provider_name.clone(), Arc::clone(&provider)); let message_timeout_secs = diff --git a/src/openhuman/channels/tests/common.rs b/src/openhuman/channels/tests/common.rs index 2adb0b71dc..7d389265e5 100644 --- a/src/openhuman/channels/tests/common.rs +++ b/src/openhuman/channels/tests/common.rs @@ -1,6 +1,6 @@ use crate::openhuman::channels::{traits, Channel, SendMessage}; +use crate::openhuman::inference::provider::{ChatMessage, Provider}; use crate::openhuman::memory::{Memory, MemoryCategory, MemoryEntry}; -use crate::openhuman::providers::{ChatMessage, Provider}; use crate::openhuman::tools::{Tool, ToolResult}; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; diff --git a/src/openhuman/channels/tests/context.rs b/src/openhuman/channels/tests/context.rs index 624e30345b..b36f4b5340 100644 --- a/src/openhuman/channels/tests/context.rs +++ b/src/openhuman/channels/tests/context.rs @@ -6,7 +6,7 @@ use super::super::context::{ CHANNEL_MESSAGE_TIMEOUT_SECS, MIN_CHANNEL_MESSAGE_TIMEOUT_SECS, }; use super::super::traits; -use crate::openhuman::providers::ChatMessage; +use crate::openhuman::inference::provider::ChatMessage; use std::collections::HashMap; use std::sync::{Arc, Mutex}; @@ -81,7 +81,7 @@ fn compact_sender_history_keeps_recent_truncated_messages() { inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), multimodal: crate::openhuman::config::MultimodalConfig::default(), - provider_runtime_options: crate::openhuman::providers::ProviderRuntimeOptions::default(), + provider_runtime_options: crate::openhuman::inference::provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, }; diff --git a/src/openhuman/channels/tests/discord_integration.rs b/src/openhuman/channels/tests/discord_integration.rs index 5f8ee756b9..1fcc9a7e7f 100644 --- a/src/openhuman/channels/tests/discord_integration.rs +++ b/src/openhuman/channels/tests/discord_integration.rs @@ -31,7 +31,7 @@ use super::super::traits; use super::super::{Channel, SendMessage}; use super::common::{HistoryCaptureProvider, NoopMemory}; use crate::openhuman::agent::bus::{mock_agent_run_turn, AgentTurnResponse}; -use crate::openhuman::providers::{ChatMessage, Provider}; +use crate::openhuman::inference::provider::{ChatMessage, Provider}; use std::collections::HashMap; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; @@ -134,7 +134,8 @@ fn make_discord_ctx( api_url: None, inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), - provider_runtime_options: crate::openhuman::providers::ProviderRuntimeOptions::default(), + provider_runtime_options: + crate::openhuman::inference::provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, multimodal: crate::openhuman::config::MultimodalConfig::default(), diff --git a/src/openhuman/channels/tests/memory.rs b/src/openhuman/channels/tests/memory.rs index 64b55c3c05..5f96434a42 100644 --- a/src/openhuman/channels/tests/memory.rs +++ b/src/openhuman/channels/tests/memory.rs @@ -6,8 +6,8 @@ use super::super::runtime::process_channel_message; use super::super::{traits, Channel}; use super::common::{HistoryCaptureProvider, NoopMemory, RecordingChannel}; use crate::openhuman::embeddings::NoopEmbedding; +use crate::openhuman::inference::provider; use crate::openhuman::memory::{Memory, MemoryCategory, UnifiedMemory}; -use crate::openhuman::providers; use std::collections::HashMap; use std::sync::{Arc, Mutex}; use tempfile::TempDir; @@ -153,7 +153,7 @@ async fn process_channel_message_restores_per_sender_history_on_follow_ups() { api_url: None, inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), - provider_runtime_options: providers::ProviderRuntimeOptions::default(), + provider_runtime_options: provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, multimodal: crate::openhuman::config::MultimodalConfig::default(), @@ -236,7 +236,7 @@ async fn process_channel_message_uses_autosaved_memory_after_history_is_cleared( api_url: None, inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), - provider_runtime_options: providers::ProviderRuntimeOptions::default(), + provider_runtime_options: provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, multimodal: crate::openhuman::config::MultimodalConfig::default(), diff --git a/src/openhuman/channels/tests/runtime_dispatch.rs b/src/openhuman/channels/tests/runtime_dispatch.rs index 6557823a4e..c9a919d7fb 100644 --- a/src/openhuman/channels/tests/runtime_dispatch.rs +++ b/src/openhuman/channels/tests/runtime_dispatch.rs @@ -3,7 +3,7 @@ use super::super::runtime::{process_channel_message, run_message_dispatch_loop}; use super::super::{traits, Channel}; use super::common::{use_real_agent_handler, NoopMemory, RecordingChannel, SlowProvider}; use crate::openhuman::agent::bus::{mock_agent_run_turn, AgentTurnRequest, AgentTurnResponse}; -use crate::openhuman::providers; +use crate::openhuman::inference::provider; use std::collections::HashMap; use std::sync::atomic::Ordering; use std::sync::{Arc, Mutex}; @@ -49,7 +49,7 @@ async fn message_dispatch_processes_messages_in_parallel() { api_url: None, inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), - provider_runtime_options: providers::ProviderRuntimeOptions::default(), + provider_runtime_options: provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, multimodal: crate::openhuman::config::MultimodalConfig::default(), @@ -123,7 +123,7 @@ async fn process_channel_message_cancels_scoped_typing_task() { api_url: None, inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), - provider_runtime_options: providers::ProviderRuntimeOptions::default(), + provider_runtime_options: provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, multimodal: crate::openhuman::config::MultimodalConfig::default(), @@ -211,7 +211,7 @@ async fn dispatch_routes_through_agent_run_turn_bus_handler() { api_url: None, inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), - provider_runtime_options: providers::ProviderRuntimeOptions::default(), + provider_runtime_options: provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, multimodal: crate::openhuman::config::MultimodalConfig::default(), diff --git a/src/openhuman/channels/tests/runtime_tool_calls.rs b/src/openhuman/channels/tests/runtime_tool_calls.rs index 3f3cdaee9c..eda9153914 100644 --- a/src/openhuman/channels/tests/runtime_tool_calls.rs +++ b/src/openhuman/channels/tests/runtime_tool_calls.rs @@ -8,7 +8,7 @@ use super::common::{ IterativeToolProvider, MockPriceTool, ModelCaptureProvider, NoopMemory, RecordingChannel, TelegramRecordingChannel, ToolCallingAliasProvider, ToolCallingProvider, }; -use crate::openhuman::providers::{self, Provider}; +use crate::openhuman::inference::provider::{self, Provider}; use std::collections::HashMap; use std::sync::atomic::Ordering; use std::sync::{Arc, Mutex}; @@ -39,7 +39,7 @@ async fn process_channel_message_executes_tool_calls_instead_of_sending_raw_json api_url: None, inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), - provider_runtime_options: providers::ProviderRuntimeOptions::default(), + provider_runtime_options: provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, multimodal: crate::openhuman::config::MultimodalConfig::default(), @@ -94,7 +94,7 @@ async fn process_channel_message_executes_tool_calls_with_alias_tags() { api_url: None, inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), - provider_runtime_options: providers::ProviderRuntimeOptions::default(), + provider_runtime_options: provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, multimodal: crate::openhuman::config::MultimodalConfig::default(), @@ -158,7 +158,7 @@ async fn process_channel_message_handles_models_command_without_llm_call() { api_url: None, inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), - provider_runtime_options: providers::ProviderRuntimeOptions::default(), + provider_runtime_options: provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, multimodal: crate::openhuman::config::MultimodalConfig::default(), @@ -249,7 +249,7 @@ async fn process_channel_message_uses_route_override_provider_and_model() { api_url: None, inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), - provider_runtime_options: providers::ProviderRuntimeOptions::default(), + provider_runtime_options: provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, multimodal: crate::openhuman::config::MultimodalConfig::default(), @@ -298,7 +298,7 @@ async fn process_channel_message_respects_configured_max_tool_iterations_above_d api_url: None, inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), - provider_runtime_options: providers::ProviderRuntimeOptions::default(), + provider_runtime_options: provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, multimodal: crate::openhuman::config::MultimodalConfig::default(), @@ -354,7 +354,7 @@ async fn process_channel_message_reports_configured_max_tool_iterations_limit() api_url: None, inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), - provider_runtime_options: providers::ProviderRuntimeOptions::default(), + provider_runtime_options: provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, multimodal: crate::openhuman::config::MultimodalConfig::default(), diff --git a/src/openhuman/channels/tests/telegram_integration.rs b/src/openhuman/channels/tests/telegram_integration.rs index 16bb3e3f67..9199786a75 100644 --- a/src/openhuman/channels/tests/telegram_integration.rs +++ b/src/openhuman/channels/tests/telegram_integration.rs @@ -13,7 +13,7 @@ use super::super::traits; use super::super::{Channel, SendMessage}; use super::common::{NoopMemory, SlowProvider}; use crate::openhuman::agent::bus::{mock_agent_run_turn, AgentTurnResponse}; -use crate::openhuman::providers::{ChatMessage, Provider}; +use crate::openhuman::inference::provider::{ChatMessage, Provider}; use std::collections::HashMap; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; @@ -110,7 +110,8 @@ fn make_test_context( api_url: None, inference_url: None, reliability: Arc::new(crate::openhuman::config::ReliabilityConfig::default()), - provider_runtime_options: crate::openhuman::providers::ProviderRuntimeOptions::default(), + provider_runtime_options: + crate::openhuman::inference::provider::ProviderRuntimeOptions::default(), workspace_dir: Arc::new(std::env::temp_dir()), message_timeout_secs: CHANNEL_MESSAGE_TIMEOUT_SECS, multimodal: crate::openhuman::config::MultimodalConfig::default(), diff --git a/src/openhuman/config/ops.rs b/src/openhuman/config/ops.rs index 88c50e8bb2..7f631831b0 100644 --- a/src/openhuman/config/ops.rs +++ b/src/openhuman/config/ops.rs @@ -205,6 +205,65 @@ pub fn snapshot_config_json(config: &Config) -> Result serde_json::Value { + let app_version = + std::env::var("OPENHUMAN_APP_VERSION").unwrap_or_else(|_| "unknown".to_string()); + let api_key_set = config + .api_key + .as_deref() + .map(|k| !k.trim().is_empty()) + .unwrap_or(false); + let model_routes: Vec = config + .model_routes + .iter() + .map(|r| serde_json::json!({ "hint": r.hint, "model": r.model })) + .collect(); + let cloud_providers: Vec = config + .cloud_providers + .iter() + .map(|c| { + serde_json::json!({ + "id": c.id, + "slug": c.slug, + "label": c.label, + "endpoint": c.endpoint, + "auth_style": c.auth_style.as_str(), + }) + }) + .collect(); + + serde_json::json!({ + "api_url": config.api_url, + "inference_url": config.inference_url, + "default_model": config.default_model, + "app_version": app_version, + "api_key_set": api_key_set, + "model_routes": model_routes, + "cloud_providers": cloud_providers, + "primary_cloud": config.primary_cloud, + "reasoning_provider": config.reasoning_provider, + "agentic_provider": config.agentic_provider, + "coding_provider": config.coding_provider, + "memory_provider": config.memory_provider, + "embeddings_provider": config.embeddings_provider, + "heartbeat_provider": config.heartbeat_provider, + "learning_provider": config.learning_provider, + "subconscious_provider": config.subconscious_provider, + }) +} + +/// Loads config and returns the client-facing AI config slice. +pub async fn load_and_get_client_config_snapshot() -> Result, String> +{ + let config = load_config_with_timeout().await?; + let snapshot = client_config_json(&config); + Ok(RpcOutcome::new( + snapshot, + vec!["client config read".to_string()], + )) +} + #[derive(Debug, Clone, Default)] pub struct ModelSettingsPatch { pub api_url: Option, @@ -681,7 +740,7 @@ pub async fn apply_local_ai_settings( } if let Some(provider) = update.provider { config.local_ai.provider = - crate::openhuman::local_ai::provider::normalize_provider(&provider); + crate::openhuman::inference::local::provider::normalize_provider(&provider); } if let Some(base_url) = update.base_url { config.local_ai.base_url = if base_url.trim().is_empty() { diff --git a/src/openhuman/config/schema/cloud_providers.rs b/src/openhuman/config/schema/cloud_providers.rs index a18774c358..9afefbad02 100644 --- a/src/openhuman/config/schema/cloud_providers.rs +++ b/src/openhuman/config/schema/cloud_providers.rs @@ -2,7 +2,7 @@ //! //! Each entry in `Config::cloud_providers` represents one configured LLM //! backend. Providers are keyed by a user-chosen `slug` (e.g. `"openai"`, -//! `"my-deepseek"`). The factory in `crate::openhuman::providers::factory` +//! `"my-deepseek"`). The factory in `crate::openhuman::inference::provider::factory` //! resolves workload-to-provider strings against this list at runtime using //! the grammar `":"`. //! @@ -47,7 +47,7 @@ impl AuthStyle { /// `auth-profiles.json` via [`crate::openhuman::credentials::AuthService`], /// keyed by `provider:` (falling back to bare `` for legacy /// entries). The factory looks up the token at call time via -/// [`crate::openhuman::providers::factory::auth_key_for_slug`]. +/// [`crate::openhuman::inference::provider::factory::auth_key_for_slug`]. /// /// ## Back-compat /// diff --git a/src/openhuman/config/schema/load.rs b/src/openhuman/config/schema/load.rs index 1325508d6d..997ebdaff5 100644 --- a/src/openhuman/config/schema/load.rs +++ b/src/openhuman/config/schema/load.rs @@ -1096,9 +1096,9 @@ impl Config { let tier_str = tier_str.trim().to_ascii_lowercase(); if !tier_str.is_empty() { if let Some(tier) = - crate::openhuman::local_ai::presets::ModelTier::from_str_opt(&tier_str) + crate::openhuman::inference::presets::ModelTier::from_str_opt(&tier_str) { - if tier == crate::openhuman::local_ai::presets::ModelTier::Custom { + if tier == crate::openhuman::inference::presets::ModelTier::Custom { tracing::warn!( tier = %tier_str, "ignoring custom OPENHUMAN_LOCAL_AI_TIER; only built-in presets are supported" @@ -1109,7 +1109,7 @@ impl Config { "ignoring OPENHUMAN_LOCAL_AI_TIER outside the 1B local-model allowlist" ); } else { - crate::openhuman::local_ai::presets::apply_preset_to_config( + crate::openhuman::inference::presets::apply_preset_to_config( &mut self.local_ai, tier, ); diff --git a/src/openhuman/config/schema/types.rs b/src/openhuman/config/schema/types.rs index 23487e165e..36cdd2b3ac 100644 --- a/src/openhuman/config/schema/types.rs +++ b/src/openhuman/config/schema/types.rs @@ -56,6 +56,12 @@ pub struct Config { #[serde(default = "default_temperature_value")] pub default_temperature: f64, + /// Models (by exact ID match OR shell-style glob like `gpt-5*`, `o1-*`) that + /// MUST NOT receive a `temperature` parameter. Used for reasoning models + /// that error out when temperature is set (OpenAI o-series, GPT-5). + #[serde(default = "default_temperature_unsupported_models")] + pub temperature_unsupported_models: Vec, + #[serde(default)] pub observability: ObservabilityConfig, @@ -333,6 +339,18 @@ fn default_temperature_value() -> f64 { DEFAULT_TEMPERATURE } +/// Returns the default list of model glob patterns that do not support the +/// `temperature` parameter. These cover OpenAI o-series and GPT-5 reasoning +/// models that return an error when `temperature` is included in the request. +fn default_temperature_unsupported_models() -> Vec { + vec![ + "o1*".to_string(), + "o3*".to_string(), + "o4*".to_string(), + "gpt-5*".to_string(), + ] +} + impl Config { /// Resolve the root directory where chunk `.md` files are stored. /// @@ -476,6 +494,7 @@ impl Default for Config { inference_url: None, default_model: Some(DEFAULT_MODEL.to_string()), default_temperature: DEFAULT_TEMPERATURE, + temperature_unsupported_models: default_temperature_unsupported_models(), observability: ObservabilityConfig::default(), autonomy: AutonomyConfig::default(), runtime: RuntimeConfig::default(), diff --git a/src/openhuman/config/schemas.rs b/src/openhuman/config/schemas.rs index 677452d291..7713d8e279 100644 --- a/src/openhuman/config/schemas.rs +++ b/src/openhuman/config/schemas.rs @@ -873,71 +873,13 @@ fn handle_get_config(_params: Map) -> ControllerFuture { fn handle_get_client_config(_params: Map) -> ControllerFuture { Box::pin(async move { log::debug!("[config][rpc] get_client_config enter"); - let config = match config_rpc::load_config_with_timeout().await { - Ok(c) => c, + match config_rpc::load_and_get_client_config_snapshot().await { + Ok(snapshot) => to_json(snapshot), Err(err) => { log::warn!("[config][rpc] get_client_config load failed: {err}"); - return Err(err); + Err(err) } - }; - let app_version = - std::env::var("OPENHUMAN_APP_VERSION").unwrap_or_else(|_| "unknown".to_string()); - let api_key_set = config - .api_key - .as_deref() - .map(|k| !k.trim().is_empty()) - .unwrap_or(false); - let model_routes: Vec = config - .model_routes - .iter() - .map(|r| serde_json::json!({ "hint": r.hint, "model": r.model })) - .collect(); - - // Surface the new unified AI routing surface (cloud_providers + the - // 8 per-workload provider strings + primary_cloud) so the AI - // settings panel doesn't have to round-trip the full Config blob. - let cloud_providers: Vec = config - .cloud_providers - .iter() - .map(|c| { - serde_json::json!({ - "id": c.id, - "slug": c.slug, - "label": c.label, - "endpoint": c.endpoint, - "auth_style": c.auth_style.as_str(), - }) - }) - .collect(); - - log::debug!( - "[config][rpc] get_client_config ok api_key_set={} model_routes_count={} \ - cloud_providers_count={}", - api_key_set, - model_routes.len(), - cloud_providers.len(), - ); - to_json(RpcOutcome::new( - serde_json::json!({ - "api_url": config.api_url, - "inference_url": config.inference_url, - "default_model": config.default_model, - "app_version": app_version, - "api_key_set": api_key_set, - "model_routes": model_routes, - "cloud_providers": cloud_providers, - "primary_cloud": config.primary_cloud, - "reasoning_provider": config.reasoning_provider, - "agentic_provider": config.agentic_provider, - "coding_provider": config.coding_provider, - "memory_provider": config.memory_provider, - "embeddings_provider": config.embeddings_provider, - "heartbeat_provider": config.heartbeat_provider, - "learning_provider": config.learning_provider, - "subconscious_provider": config.subconscious_provider, - }), - vec!["client config read".to_string()], - )) + } }) } diff --git a/src/openhuman/context/guard.rs b/src/openhuman/context/guard.rs index d23a8d82ea..0f35797749 100644 --- a/src/openhuman/context/guard.rs +++ b/src/openhuman/context/guard.rs @@ -4,7 +4,7 @@ //! when usage exceeds a threshold. A circuit breaker disables compaction after //! consecutive failures to prevent infinite retry loops. -use crate::openhuman::providers::UsageInfo; +use crate::openhuman::inference::provider::UsageInfo; /// Threshold (0.0–1.0) at which auto-compaction is triggered. pub(crate) const COMPACTION_TRIGGER_THRESHOLD: f64 = 0.90; diff --git a/src/openhuman/context/manager.rs b/src/openhuman/context/manager.rs index f86c5d60b6..6cc1fbfce1 100644 --- a/src/openhuman/context/manager.rs +++ b/src/openhuman/context/manager.rs @@ -37,7 +37,7 @@ use super::prompt::{PromptContext, SystemPromptBuilder}; use super::session_memory::SessionMemoryConfig; use super::summarizer::{Summarizer, SummaryStats}; use crate::openhuman::config::ContextConfig; -use crate::openhuman::providers::{ConversationMessage, UsageInfo}; +use crate::openhuman::inference::provider::{ConversationMessage, UsageInfo}; use anyhow::Result; /// Outcome of a reduction pass driven by [`ContextManager::reduce_before_call`]. diff --git a/src/openhuman/context/manager_tests.rs b/src/openhuman/context/manager_tests.rs index c12278a687..2622cacef5 100644 --- a/src/openhuman/context/manager_tests.rs +++ b/src/openhuman/context/manager_tests.rs @@ -1,5 +1,5 @@ use super::*; -use crate::openhuman::providers::{ChatMessage, ToolCall, ToolResultMessage}; +use crate::openhuman::inference::provider::{ChatMessage, ToolCall, ToolResultMessage}; use async_trait::async_trait; use std::sync::Mutex; diff --git a/src/openhuman/context/microcompact.rs b/src/openhuman/context/microcompact.rs index 587bb70261..084e93c8af 100644 --- a/src/openhuman/context/microcompact.rs +++ b/src/openhuman/context/microcompact.rs @@ -23,7 +23,7 @@ //! otherwise be too large to fit — the pipeline orchestrator handles //! gating. -use crate::openhuman::providers::ConversationMessage; +use crate::openhuman::inference::provider::ConversationMessage; /// Placeholder used in place of cleared tool-result bodies. Must be /// stable across versions so callers can pattern-match on it for @@ -102,7 +102,7 @@ pub fn microcompact(history: &mut [ConversationMessage], keep_recent: usize) -> #[cfg(test)] mod tests { use super::*; - use crate::openhuman::providers::{ChatMessage, ToolCall, ToolResultMessage}; + use crate::openhuman::inference::provider::{ChatMessage, ToolCall, ToolResultMessage}; fn user(text: &str) -> ConversationMessage { ConversationMessage::Chat(ChatMessage::user(text)) diff --git a/src/openhuman/context/pipeline.rs b/src/openhuman/context/pipeline.rs index 6a3eb7d208..e5a96a5080 100644 --- a/src/openhuman/context/pipeline.rs +++ b/src/openhuman/context/pipeline.rs @@ -36,7 +36,7 @@ use super::guard::{ContextCheckResult, ContextGuard}; use super::microcompact::{microcompact, MicrocompactStats, DEFAULT_KEEP_RECENT_TOOL_RESULTS}; use super::session_memory::{SessionMemoryConfig, SessionMemoryState}; -use crate::openhuman::providers::{ConversationMessage, UsageInfo}; +use crate::openhuman::inference::provider::{ConversationMessage, UsageInfo}; use std::sync::{Arc, Mutex}; /// Shared handle to a [`SessionMemoryState`] so both the synchronous @@ -257,7 +257,7 @@ impl ContextPipeline { mod tests { use super::super::microcompact::CLEARED_PLACEHOLDER; use super::*; - use crate::openhuman::providers::{ + use crate::openhuman::inference::provider::{ ChatMessage, ConversationMessage, ToolCall, ToolResultMessage, UsageInfo, }; diff --git a/src/openhuman/context/summarizer.rs b/src/openhuman/context/summarizer.rs index 608938e638..145040bf28 100644 --- a/src/openhuman/context/summarizer.rs +++ b/src/openhuman/context/summarizer.rs @@ -28,7 +28,7 @@ //! complete turns. use super::microcompact::MicrocompactStats; -use crate::openhuman::providers::{ChatMessage, ConversationMessage, Provider}; +use crate::openhuman::inference::provider::{ChatMessage, ConversationMessage, Provider}; use anyhow::Result; use async_trait::async_trait; use std::fmt::Write as _; diff --git a/src/openhuman/context/summarizer_tests.rs b/src/openhuman/context/summarizer_tests.rs index 7812a55cee..bc81a343c6 100644 --- a/src/openhuman/context/summarizer_tests.rs +++ b/src/openhuman/context/summarizer_tests.rs @@ -1,5 +1,5 @@ use super::*; -use crate::openhuman::providers::{ChatResponse, ToolCall, ToolResultMessage}; +use crate::openhuman::inference::provider::{ChatResponse, ToolCall, ToolResultMessage}; use async_trait::async_trait; use std::sync::Mutex; @@ -75,7 +75,7 @@ impl Provider for StubProvider { async fn chat( &self, - _request: crate::openhuman::providers::ChatRequest<'_>, + _request: crate::openhuman::inference::provider::ChatRequest<'_>, _model: &str, _temperature: f64, ) -> anyhow::Result { diff --git a/src/openhuman/credentials/ops.rs b/src/openhuman/credentials/ops.rs index 0c27ccd53d..d29e32e18b 100644 --- a/src/openhuman/credentials/ops.rs +++ b/src/openhuman/credentials/ops.rs @@ -26,7 +26,7 @@ use crate::openhuman::memory::conversations; pub async fn start_login_gated_services(config: &Config) { // 1. Local AI (Ollama, whisper, embeddings) if config.local_ai.runtime_enabled { - let service = crate::openhuman::local_ai::global(config); + let service = crate::openhuman::inference::local::global(config); service.bootstrap(config).await; log::info!("[services] local AI bootstrapped after login"); } @@ -78,7 +78,7 @@ pub async fn stop_login_gated_services(config: &Config) { // (it may be serving other clients or mid-download), but we clear // the internal state so it re-bootstraps on next login. if config.local_ai.runtime_enabled { - let service = crate::openhuman::local_ai::global(config); + let service = crate::openhuman::inference::local::global(config); service.reset_to_idle(config); log::info!("[services] local AI reset to idle on logout"); } diff --git a/src/openhuman/cron/scheduler.rs b/src/openhuman/cron/scheduler.rs index 3a90ab3596..35b4055203 100644 --- a/src/openhuman/cron/scheduler.rs +++ b/src/openhuman/cron/scheduler.rs @@ -252,7 +252,7 @@ async fn run_agent_job(config: &Config, job: &CronJob) -> (bool, String, Option< .unwrap_or_else(|| crate::openhuman::config::DEFAULT_MODEL.to_string()); let resolved_model = match &def.model { ModelSpec::Hint(workload) => { - match crate::openhuman::providers::create_chat_provider( + match crate::openhuman::inference::provider::create_chat_provider( workload, &effective, ) { Ok((_, m)) => { diff --git a/src/openhuman/doctor/core.rs b/src/openhuman/doctor/core.rs index 100108a872..29a1325be2 100644 --- a/src/openhuman/doctor/core.rs +++ b/src/openhuman/doctor/core.rs @@ -123,7 +123,7 @@ pub struct ModelProbeReport { } fn doctor_model_targets() -> Vec { - crate::openhuman::providers::list_providers() + crate::openhuman::inference::provider::list_providers() .into_iter() .map(|provider| provider.name.to_string()) .collect() diff --git a/src/openhuman/embeddings/cloud.rs b/src/openhuman/embeddings/cloud.rs index 6841c42713..ee77d6fe4b 100644 --- a/src/openhuman/embeddings/cloud.rs +++ b/src/openhuman/embeddings/cloud.rs @@ -9,7 +9,7 @@ //! //! The JWT and API URL are resolved per call so a session refresh between //! embed batches is picked up transparently — matching -//! [`crate::openhuman::providers::openhuman_backend::OpenHumanBackendProvider`]. +//! [`crate::openhuman::inference::provider::openhuman_backend::OpenHumanBackendProvider`]. use std::path::PathBuf; diff --git a/src/openhuman/embeddings/factory.rs b/src/openhuman/embeddings/factory.rs index a3e7da13e0..00bcb69c74 100644 --- a/src/openhuman/embeddings/factory.rs +++ b/src/openhuman/embeddings/factory.rs @@ -30,7 +30,7 @@ pub fn create_embedding_provider( None, None, true, model, dims, ))), "ollama" => { - let base_url = crate::openhuman::local_ai::ollama_base_url(); + let base_url = crate::openhuman::inference::local::ollama_base_url(); Ok(Box::new(OllamaEmbedding::try_new(&base_url, model, dims)?)) } "openai" => Ok(Box::new(OpenAiEmbedding::new( diff --git a/src/openhuman/local_ai/device.rs b/src/openhuman/inference/device.rs similarity index 100% rename from src/openhuman/local_ai/device.rs rename to src/openhuman/inference/device.rs diff --git a/src/openhuman/inference/http/mod.rs b/src/openhuman/inference/http/mod.rs new file mode 100644 index 0000000000..775ee9c359 --- /dev/null +++ b/src/openhuman/inference/http/mod.rs @@ -0,0 +1,14 @@ +//! OpenAI-compatible HTTP endpoint at `/v1/chat/completions` and `/v1/models`. +//! +//! ## Mounting +//! +//! The router is mounted by `src/core/jsonrpc.rs`: +//! ```ignore +//! .nest("/v1", crate::openhuman::inference::http::router()) +//! ``` +//! It inherits the same bearer-token auth middleware that guards `/rpc`. + +pub mod server; +pub mod types; + +pub use server::router; diff --git a/src/openhuman/inference/http/server.rs b/src/openhuman/inference/http/server.rs new file mode 100644 index 0000000000..5c4f877de8 --- /dev/null +++ b/src/openhuman/inference/http/server.rs @@ -0,0 +1,298 @@ +//! OpenAI-compatible HTTP handlers for `/v1/chat/completions` and `/v1/models`. +//! +//! ## Mounting +//! +//! The router returned by [`router()`] is merged into the core axum server +//! in `src/core/jsonrpc.rs` via `.nest("/v1", inference::http::router())`. +//! It reuses the same bearer-token auth middleware that guards `/rpc`. +//! +//! ## Authentication +//! +//! All routes require `Authorization: Bearer ` — the +//! same per-launch token used by the JSON-RPC endpoint. Missing or wrong +//! tokens get a `401 Unauthorized` from the shared middleware. +//! +//! ## Provider routing +//! +//! The `model` field in the request selects the provider: +//! - `"ollama:"` or a bare model name → local Ollama +//! - `":"` → cloud provider entry by slug +//! - everything else → OpenHuman backend (session JWT) + +use axum::http::StatusCode; +use axum::response::sse::{Event, KeepAlive, Sse}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::{extract::State, Json, Router}; +use futures_util::stream::{self, StreamExt}; +use serde_json::json; +use tracing::{debug, error}; + +use crate::core::types::AppState; +use crate::openhuman::config::Config; +use crate::openhuman::inference::provider; +use crate::openhuman::inference::provider::traits::ChatMessage; + +use super::types::{ + ChatCompletionChoice, ChatCompletionChunk, ChatCompletionChunkChoice, ChatCompletionDelta, + ChatCompletionMessage, ChatCompletionRequest, ChatCompletionResponse, ChatCompletionUsage, + ModelObject, ModelsResponse, +}; + +const LOG_PREFIX: &str = "[inference::http]"; + +/// Build the `/v1` axum sub-router. +pub fn router() -> Router { + Router::new() + .route("/chat/completions", post(chat_completions_handler)) + .route("/models", get(models_handler)) +} + +/// `POST /v1/chat/completions` +/// +/// Accepts an OpenAI-compatible request body. Routes through the unified +/// `Provider` trait — local (Ollama) for `ollama:*` model names, cloud otherwise. +async fn chat_completions_handler( + State(_state): State, + Json(req): Json, +) -> Response { + debug!( + model = %req.model, + stream = req.stream, + message_count = req.messages.len(), + "{LOG_PREFIX} chat_completions: start" + ); + + let config = match Config::load_or_init().await { + Ok(c) => c, + Err(e) => { + error!("{LOG_PREFIX} chat_completions: config load failed: {e}"); + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({ "error": { "message": format!("config load failed: {e}"), "type": "internal_error" }})), + ) + .into_response(); + } + }; + + // Build provider string from model name. + // If the model already looks like a provider string, use it directly. + // Otherwise treat a bare model name as an Ollama model. + let provider_string = if req.model.starts_with("ollama:") + || req.model.contains(':') + || req.model == "openhuman" + { + req.model.clone() + } else { + // Bare model name (no colon) — route to Ollama local runtime. + format!("ollama:{}", req.model) + }; + + let (provider_box, model_id) = match provider::factory::create_chat_provider_from_string( + "agentic", + &provider_string, + &config, + ) { + Ok(pair) => pair, + Err(e) => { + error!("{LOG_PREFIX} chat_completions: provider build failed: {e}"); + return ( + StatusCode::BAD_REQUEST, + Json(json!({ "error": { "message": format!("provider error: {e}"), "type": "invalid_request_error" }})), + ) + .into_response(); + } + }; + + // Map request messages to provider ChatMessage type. + let messages: Vec = req + .messages + .iter() + .map(|m| ChatMessage { + id: None, + role: m.role.clone(), + content: m.content.clone(), + extra_metadata: None, + }) + .collect(); + + // If the caller supplied a temperature but the model is on the unsupported + // list, log a warning and drop it — sending temperature to o1/o3/o4/gpt-5 + // reasoning models causes an API error. The provider layer applies the same + // check on the outbound body, so this is belt-and-suspenders for logging. + let temperature = { + let raw = req.temperature.unwrap_or(config.default_temperature); + let suppressed = crate::openhuman::inference::provider::temperature::temperature_for_model( + &model_id, raw, &config, + ); + if suppressed.is_none() && req.temperature.is_some() { + tracing::warn!( + model = %model_id, + requested_temperature = req.temperature.unwrap_or(0.0), + "{LOG_PREFIX} dropping caller-supplied temperature — model is on temperature_unsupported_models list" + ); + } + raw // the Provider layer handles omission; we pass the value through + }; + let completion_id = format!("chatcmpl-{}", uuid::Uuid::new_v4()); + let created = chrono::Utc::now().timestamp(); + let model_name = req.model.clone(); + + if req.stream { + // Streaming response via SSE + let options = provider::traits::StreamOptions::new(true); + let stream = + provider_box.stream_chat_with_history(&messages, &model_id, temperature, options); + + let cid = completion_id.clone(); + let model_clone = model_name.clone(); + let event_stream = stream + .enumerate() + .map(move |(i, chunk_result)| { + let cid = cid.clone(); + let model_clone = model_clone.clone(); + match chunk_result { + Ok(chunk) => { + let finish_reason = if chunk.is_final { Some("stop") } else { None }; + let content = if chunk.delta.is_empty() && chunk.is_final { + None + } else { + Some(chunk.delta) + }; + let sse_chunk = ChatCompletionChunk { + id: cid, + object: "chat.completion.chunk", + created, + model: model_clone, + choices: vec![ChatCompletionChunkChoice { + index: 0, + delta: ChatCompletionDelta { + role: if i == 0 { + Some("assistant".to_string()) + } else { + None + }, + content, + }, + finish_reason, + }], + }; + let data = + serde_json::to_string(&sse_chunk).unwrap_or_else(|_| "{}".to_string()); + Ok::(Event::default().data(data)) + } + Err(e) => { + let err_event = json!({ + "error": { "message": e.to_string(), "type": "stream_error" } + }); + Ok(Event::default() + .data(serde_json::to_string(&err_event).unwrap_or_default())) + } + } + }) + .chain(stream::once(async { + Ok::(Event::default().data("[DONE]")) + })); + + debug!("{LOG_PREFIX} chat_completions: streaming response started"); + return Sse::new(event_stream) + .keep_alive(KeepAlive::default()) + .into_response(); + } + + // Non-streaming: call chat_with_history + match provider_box + .chat_with_history(&messages, &model_id, temperature) + .await + { + Ok(content) => { + debug!("{LOG_PREFIX} chat_completions: non-streaming ok"); + let response = ChatCompletionResponse { + id: completion_id, + object: "chat.completion", + created, + model: model_name, + choices: vec![ChatCompletionChoice { + index: 0, + message: ChatCompletionMessage { + role: "assistant".to_string(), + content, + }, + finish_reason: "stop", + }], + usage: ChatCompletionUsage { + prompt_tokens: 0, + completion_tokens: 0, + total_tokens: 0, + }, + }; + (StatusCode::OK, Json(response)).into_response() + } + Err(e) => { + error!("{LOG_PREFIX} chat_completions: inference failed: {e}"); + ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({ "error": { "message": format!("inference error: {e}"), "type": "internal_error" }})), + ) + .into_response() + } + } +} + +/// `GET /v1/models` +/// +/// Lists all configured models (local Ollama + cloud providers). +async fn models_handler(State(_state): State) -> Response { + debug!("{LOG_PREFIX} models: start"); + + let config = match Config::load_or_init().await { + Ok(c) => c, + Err(e) => { + error!("{LOG_PREFIX} models: config load failed: {e}"); + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({ "error": { "message": format!("config load failed: {e}") }})), + ) + .into_response(); + } + }; + + let created = chrono::Utc::now().timestamp(); + let mut data: Vec = Vec::new(); + + // Cloud provider default models + for cp in &config.cloud_providers { + if let Some(ref model) = cp.default_model { + data.push(ModelObject { + id: format!("{}:{}", cp.slug, model), + object: "model", + created, + owned_by: cp.slug.clone(), + }); + } + } + + // Configured local chat model (Ollama) + if !config.local_ai.chat_model_id.is_empty() { + data.push(ModelObject { + id: format!("ollama:{}", config.local_ai.chat_model_id), + object: "model", + created, + owned_by: "ollama".to_string(), + }); + } + + debug!(model_count = data.len(), "{LOG_PREFIX} models: ok"); + ( + StatusCode::OK, + Json(ModelsResponse { + object: "list", + data, + }), + ) + .into_response() +} + +#[cfg(test)] +#[path = "tests.rs"] +mod tests; diff --git a/src/openhuman/inference/http/tests.rs b/src/openhuman/inference/http/tests.rs new file mode 100644 index 0000000000..987ceaffd6 --- /dev/null +++ b/src/openhuman/inference/http/tests.rs @@ -0,0 +1,124 @@ +//! Integration tests for the OpenAI-compatible `/v1` HTTP endpoint. +//! +//! These tests spin up an in-process axum router (no network), send +//! crafted HTTP requests via `tower::ServiceExt::oneshot`, and assert on +//! the response status codes. +//! +//! A running inference backend is NOT required — the tests exercise the +//! routing and auth-middleware layers only. + +use std::sync::Once; + +use axum::body::Body; +use axum::http::{header, Method, Request, StatusCode}; +use tower::ServiceExt; + +use crate::core::auth::CORE_TOKEN_ENV_VAR; +use crate::core::jsonrpc::build_core_http_router; + +const TEST_RPC_TOKEN: &str = "inference-http-tests-token"; + +/// Initialize the per-process RPC bearer token exactly once, so that the +/// auth middleware can answer 401 instead of 500 ("auth subsystem not +/// initialized") in tests that don't spin up a real core. +fn ensure_test_rpc_auth() { + static INIT: Once = Once::new(); + INIT.call_once(|| { + // SAFETY: test-only init; we serialize via `Once`, and live_routing_e2e + // uses its own env lock + a different token value so the two test + // binaries don't collide (they run in separate processes anyway). + unsafe { std::env::set_var(CORE_TOKEN_ENV_VAR, TEST_RPC_TOKEN) }; + let tmp = tempfile::tempdir().expect("tempdir for token file"); + crate::core::auth::init_rpc_token(tmp.path()).expect("init rpc auth token for http tests"); + }); +} + +/// Build the test router (Socket.IO disabled — no real runtime needed). +fn test_router() -> axum::Router { + ensure_test_rpc_auth(); + build_core_http_router(false) +} + +/// Convenience: dispatch a single request through the in-process router. +async fn dispatch(req: Request) -> axum::response::Response { + test_router().oneshot(req).await.unwrap() +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +/// Requests to `POST /v1/chat/completions` without any `Authorization` header +/// must be rejected with `401 Unauthorized`. +#[tokio::test] +async fn test_chat_completions_no_bearer_returns_401() { + let body = serde_json::json!({ + "model": "ollama:llama3", + "messages": [{ "role": "user", "content": "hello" }] + }); + let req = Request::builder() + .method(Method::POST) + .uri("/v1/chat/completions") + .header(header::CONTENT_TYPE, "application/json") + .body(Body::from(serde_json::to_string(&body).unwrap())) + .unwrap(); + + let resp = dispatch(req).await; + assert_eq!( + resp.status(), + StatusCode::UNAUTHORIZED, + "POST /v1/chat/completions without bearer must return 401" + ); +} + +/// Requests to `GET /v1/models` without any `Authorization` header must be +/// rejected with `401 Unauthorized`. +#[tokio::test] +async fn test_models_no_bearer_returns_401() { + let req = Request::builder() + .method(Method::GET) + .uri("/v1/models") + .body(Body::empty()) + .unwrap(); + + let resp = dispatch(req).await; + assert_eq!( + resp.status(), + StatusCode::UNAUTHORIZED, + "GET /v1/models without bearer must return 401" + ); +} + +/// A request with a bearer token must not be rejected as 401/403. The actual +/// response code depends on whether a live inference backend is running; the +/// test only asserts that auth passed. +#[tokio::test] +async fn test_chat_completions_with_bearer_not_rejected_as_auth_error() { + // Use the same token that `ensure_test_rpc_auth` installed via the + // `Once` initializer in this module. + let token = TEST_RPC_TOKEN.to_string(); + + let body = serde_json::json!({ + "model": "ollama:llama3", + "messages": [{ "role": "user", "content": "ping" }], + "stream": false + }); + let req = Request::builder() + .method(Method::POST) + .uri("/v1/chat/completions") + .header(header::CONTENT_TYPE, "application/json") + .header(header::AUTHORIZATION, format!("Bearer {}", token)) + .body(Body::from(serde_json::to_string(&body).unwrap())) + .unwrap(); + + let resp = dispatch(req).await; + let status = resp.status(); + assert_ne!( + status, + StatusCode::UNAUTHORIZED, + "401 must not fire when bearer is present" + ); + assert_ne!( + status, + StatusCode::FORBIDDEN, + "403 must not fire when bearer is present" + ); +} diff --git a/src/openhuman/inference/http/types.rs b/src/openhuman/inference/http/types.rs new file mode 100644 index 0000000000..6bf58fe518 --- /dev/null +++ b/src/openhuman/inference/http/types.rs @@ -0,0 +1,93 @@ +//! OpenAI-compatible HTTP request / response types. + +use serde::{Deserialize, Serialize}; + +// ── Chat Completions ────────────────────────────────────────────────────────── + +#[derive(Debug, Deserialize)] +pub struct ChatCompletionRequest { + pub model: String, + pub messages: Vec, + #[serde(default)] + pub stream: bool, + #[serde(default)] + pub temperature: Option, + #[serde(default)] + pub max_tokens: Option, + /// Optional tool definitions (ignored if the provider doesn't support them). + #[serde(default)] + pub tools: Option, +} + +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct ChatCompletionMessage { + pub role: String, + pub content: String, +} + +#[derive(Debug, Serialize)] +pub struct ChatCompletionResponse { + pub id: String, + pub object: &'static str, + pub created: i64, + pub model: String, + pub choices: Vec, + pub usage: ChatCompletionUsage, +} + +#[derive(Debug, Serialize)] +pub struct ChatCompletionChoice { + pub index: u32, + pub message: ChatCompletionMessage, + pub finish_reason: &'static str, +} + +#[derive(Debug, Serialize)] +pub struct ChatCompletionUsage { + pub prompt_tokens: u64, + pub completion_tokens: u64, + pub total_tokens: u64, +} + +// ── Streaming (SSE) ─────────────────────────────────────────────────────────── + +#[derive(Debug, Serialize)] +pub struct ChatCompletionChunk { + pub id: String, + pub object: &'static str, + pub created: i64, + pub model: String, + pub choices: Vec, +} + +#[derive(Debug, Serialize)] +pub struct ChatCompletionChunkChoice { + pub index: u32, + pub delta: ChatCompletionDelta, + #[serde(skip_serializing_if = "Option::is_none")] + pub finish_reason: Option<&'static str>, +} + +#[derive(Debug, Serialize)] +pub struct ChatCompletionDelta { + #[serde(skip_serializing_if = "Option::is_none")] + pub role: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub content: Option, +} + +// ── Models ──────────────────────────────────────────────────────────────────── + +#[derive(Debug, Serialize)] +pub struct ModelsResponse { + pub object: &'static str, + pub data: Vec, +} + +#[derive(Debug, Serialize)] +pub struct ModelObject { + pub id: String, + pub object: &'static str, + pub created: i64, + pub owned_by: String, +} diff --git a/src/openhuman/local_ai/core.rs b/src/openhuman/inference/local/core.rs similarity index 100% rename from src/openhuman/local_ai/core.rs rename to src/openhuman/inference/local/core.rs diff --git a/src/openhuman/local_ai/install.rs b/src/openhuman/inference/local/install.rs similarity index 99% rename from src/openhuman/local_ai/install.rs rename to src/openhuman/inference/local/install.rs index 780dffe1f9..d6d8c6de6f 100644 --- a/src/openhuman/local_ai/install.rs +++ b/src/openhuman/inference/local/install.rs @@ -124,7 +124,7 @@ fn build_install_command(install_dir: &Path) -> Result std::sync::MutexGuard<'static, ()> { - crate::openhuman::local_ai::local_ai_test_guard() + crate::openhuman::inference::inference_test_guard() } /// RAII guard: records the prior value of `var` on construction and diff --git a/src/openhuman/local_ai/install_piper.rs b/src/openhuman/inference/local/install_piper.rs similarity index 99% rename from src/openhuman/local_ai/install_piper.rs rename to src/openhuman/inference/local/install_piper.rs index c04ad6d1ea..ae975ed5be 100644 --- a/src/openhuman/local_ai/install_piper.rs +++ b/src/openhuman/inference/local/install_piper.rs @@ -142,7 +142,7 @@ fn decode_voice_id(voice_id: &str) -> (String, String, String, String) { /// to "installed" when on-disk artifacts pass validation. pub fn status(config: &Config) -> VoiceInstallStatus { let mut snapshot = read_status(ENGINE_PIPER); - let configured_voice = crate::openhuman::local_ai::model_ids::effective_tts_voice_id(config); + let configured_voice = crate::openhuman::inference::model_ids::effective_tts_voice_id(config); let configured_voice = configured_voice.trim_end_matches(".onnx").to_string(); if matches!(snapshot.state, VoiceInstallState::Missing) && installed_artifacts_ok(config, &configured_voice) @@ -469,7 +469,7 @@ pub(crate) fn find_workspace_piper_binary(config: &Config) -> Option { #[cfg(test)] mod tests { use super::*; - use crate::openhuman::local_ai::voice_install_common::reset_status; + use crate::openhuman::inference::local::voice_install_common::reset_status; fn temp_config() -> (tempfile::TempDir, Config) { let dir = tempfile::tempdir().expect("tempdir"); @@ -553,7 +553,7 @@ mod tests { /// directory; reuses the module-wide `local_ai_test_guard` so paths + /// install_whisper tests are serialised through the same lock. fn shared_install_lock() -> std::sync::MutexGuard<'static, ()> { - crate::openhuman::local_ai::local_ai_test_guard() + crate::openhuman::inference::inference_test_guard() } fn wipe_shared_install_dir(config: &Config) { diff --git a/src/openhuman/local_ai/install_whisper.rs b/src/openhuman/inference/local/install_whisper.rs similarity index 98% rename from src/openhuman/local_ai/install_whisper.rs rename to src/openhuman/inference/local/install_whisper.rs index 378fd16d33..b6a654d56e 100644 --- a/src/openhuman/local_ai/install_whisper.rs +++ b/src/openhuman/inference/local/install_whisper.rs @@ -21,7 +21,7 @@ //! moment a binary lands on PATH. //! //! Per-engine progress is reported via the shared -//! [`crate::openhuman::local_ai::voice_install_common`] status table so +//! [`crate::openhuman::inference::local::voice_install_common`] status table so //! the renderer can poll one RPC for state across both Whisper and Piper. use std::path::PathBuf; @@ -106,7 +106,7 @@ pub fn status(config: &Config) -> VoiceInstallStatus { // artifacts so the UI doesn't show a perpetual "missing" after a // successful install across a process restart. if matches!(snapshot.state, VoiceInstallState::Missing) { - let configured = crate::openhuman::local_ai::model_ids::effective_stt_model_id(config); + let configured = crate::openhuman::inference::model_ids::effective_stt_model_id(config); if installed_artifacts_ok(config, &configured) { snapshot.state = VoiceInstallState::Installed; snapshot.stage = Some(format!("{configured} present")); @@ -339,7 +339,7 @@ pub(crate) fn find_workspace_whisper_binary(config: &Config) -> Option #[cfg(test)] mod tests { use super::*; - use crate::openhuman::local_ai::voice_install_common::reset_status; + use crate::openhuman::inference::local::voice_install_common::reset_status; fn temp_config() -> (tempfile::TempDir, Config) { let dir = tempfile::tempdir().expect("tempdir"); @@ -408,7 +408,7 @@ mod tests { /// in parallel. Reuses the module-wide `local_ai_test_guard` so paths /// + install_piper tests are serialised through the same lock. fn shared_install_lock() -> std::sync::MutexGuard<'static, ()> { - crate::openhuman::local_ai::local_ai_test_guard() + crate::openhuman::inference::inference_test_guard() } /// Wipe the shared-root install dir for whisper so the absence diff --git a/src/openhuman/local_ai/lm_studio_api.rs b/src/openhuman/inference/local/lm_studio.rs similarity index 100% rename from src/openhuman/local_ai/lm_studio_api.rs rename to src/openhuman/inference/local/lm_studio.rs diff --git a/src/openhuman/inference/local/mod.rs b/src/openhuman/inference/local/mod.rs new file mode 100644 index 0000000000..e158f06ee3 --- /dev/null +++ b/src/openhuman/inference/local/mod.rs @@ -0,0 +1,51 @@ +//! Local AI runtime — Ollama, LM Studio, Whisper, Piper sub-process management. +//! +//! This module was previously `src/openhuman/local_ai/`. It now lives under +//! `inference/local/` so all inference concerns share a single domain root. + +#[cfg(test)] +pub(crate) static INFERENCE_TEST_MUTEX: once_cell::sync::Lazy> = + once_cell::sync::Lazy::new(|| std::sync::Mutex::new(())); + +#[cfg(test)] +pub(crate) fn inference_test_guard() -> std::sync::MutexGuard<'static, ()> { + INFERENCE_TEST_MUTEX + .lock() + .unwrap_or_else(|p| p.into_inner()) +} + +mod core; +pub mod ops; +mod schemas; + +// Re-expose inference-level modules under `local::` so that files that +// were moved from `local_ai/` and used `super::model_ids` etc. continue +// to compile without rewriting every callsite. +pub use super::device; +pub use super::model_ids; +pub use super::parse; +pub use super::paths; +pub use super::presets; +pub use super::sentiment; +pub use super::types; + +pub mod install; +pub(crate) mod install_piper; +pub(crate) mod install_whisper; +pub(crate) mod lm_studio; +mod ollama; +mod process_util; +pub(crate) mod provider; +pub(crate) use ollama::{ollama_base_url, OLLAMA_BASE_URL}; +pub mod service; +pub(crate) mod voice_install_common; + +pub use core::*; +pub use ops as rpc; +pub use ops::*; +pub use schemas::{ + all_controller_schemas as all_local_ai_controller_schemas, + all_registered_controllers as all_local_ai_registered_controllers, +}; +pub(crate) use service::whisper_engine; +pub use service::LocalAiService; diff --git a/src/openhuman/local_ai/ollama_api.rs b/src/openhuman/inference/local/ollama.rs similarity index 99% rename from src/openhuman/local_ai/ollama_api.rs rename to src/openhuman/inference/local/ollama.rs index f794c433d7..cb96fb2756 100644 --- a/src/openhuman/local_ai/ollama_api.rs +++ b/src/openhuman/inference/local/ollama.rs @@ -341,7 +341,7 @@ mod tests { } fn test_lock() -> std::sync::MutexGuard<'static, ()> { - crate::openhuman::local_ai::local_ai_test_guard() + crate::openhuman::inference::inference_test_guard() } #[test] diff --git a/src/openhuman/local_ai/ops.rs b/src/openhuman/inference/local/ops.rs similarity index 79% rename from src/openhuman/local_ai/ops.rs rename to src/openhuman/inference/local/ops.rs index 1602f8bd57..f3b64ecb57 100644 --- a/src/openhuman/local_ai/ops.rs +++ b/src/openhuman/inference/local/ops.rs @@ -8,14 +8,16 @@ use chrono::Utc; use crate::openhuman::agent::Agent; use crate::openhuman::config::Config; -use crate::openhuman::local_ai::{ - self, LocalAiAssetsStatus, LocalAiDownloadsProgress, LocalAiEmbeddingResult, - LocalAiSpeechResult, LocalAiTtsResult, +use crate::openhuman::inference::local as local_ai; +use crate::openhuman::inference::provider as providers; +use crate::openhuman::inference::provider::ops::ProviderRuntimeOptions; +use crate::openhuman::inference::{ + LocalAiAssetsStatus, LocalAiDownloadsProgress, LocalAiEmbeddingResult, LocalAiSpeechResult, + LocalAiStatus, LocalAiTtsResult, }; use crate::openhuman::prompt_injection::{ enforce_prompt_input, PromptEnforcementAction, PromptEnforcementContext, }; -use crate::openhuman::providers::{self, ProviderRuntimeOptions}; use crate::rpc::RpcOutcome; fn prompt_guard_user_message(action: PromptEnforcementAction) -> &'static str { @@ -135,9 +137,7 @@ pub async fn agent_chat_simple( } /// Returns the current operational status of the local AI stack. -pub async fn local_ai_status( - config: &Config, -) -> Result, String> { +pub async fn local_ai_status(config: &Config) -> Result, String> { let service = local_ai::global(config); let status = service.status(); if matches!(status.state.as_str(), "idle" | "degraded") { @@ -147,134 +147,16 @@ pub async fn local_ai_status( service_clone.bootstrap(&config_clone).await; }); } - Ok(RpcOutcome::single_log( - service.status(), - "local ai status fetched", - )) -} - -/// Stop the local-AI runtime, killing the Ollama daemon ONLY if OpenHuman -/// spawned it, and shift any workload routed to `ollama:` back to -/// `"cloud"` (= primary). -/// -/// Three coordinated effects: -/// -/// 1. **Daemon shutdown** — `shutdown_owned_ollama` kills the child process -/// only when the spawn marker matches. External daemons (system service, -/// user-launched `ollama serve`, daemons from another OpenHuman workspace) -/// are left untouched, per the same friendly-fire-avoidance rule -/// `ensure_ollama_server` follows at startup. -/// -/// 2. **Routing shift** — every `*_provider` field starting with `ollama:` -/// is cleared (set to `None`, which resolves to `"cloud"` at the factory). -/// Without this, the next chat call routed to `reasoning` (or any other -/// workload the user had set to `ollama:`) would fail at factory -/// build time. The shift is one-way: re-enabling local AI does NOT -/// restore the previous Ollama routes — the user re-picks. -/// -/// 3. **Status forced to disabled** so the UI reflects the gate immediately. -pub async fn local_ai_shutdown_owned( - config: &mut Config, -) -> Result, String> { - let service = local_ai::global(config); - service.shutdown_owned_ollama(config).await; - - // Shift any ollama-routed workload back to "cloud" (= primary). - let cleared = clear_ollama_workload_routes(config); - if cleared > 0 { - log::info!( - "[local_ai] shutdown_owned: shifted {cleared} ollama-routed workload(s) back to cloud" - ); - config.save().await.map_err(|e| e.to_string())?; - } - - service.mark_disabled(config); - Ok(RpcOutcome::single_log( - service.status(), - "local ai runtime gated off (owned daemon killed if any)", - )) -} - -/// Clear every per-workload `*_provider` field whose stored value starts -/// with `"ollama:"`. Returns the count of fields actually changed so the -/// caller can decide whether to persist. -fn clear_ollama_workload_routes(config: &mut Config) -> usize { - fn clear_if_ollama(field: &mut Option) -> bool { - let is_ollama = field - .as_deref() - .map(|s| s.trim().starts_with("ollama:")) - .unwrap_or(false); - if is_ollama { - *field = None; - true - } else { - false - } - } - let mut changed = 0; - for field in [ - &mut config.reasoning_provider, - &mut config.agentic_provider, - &mut config.coding_provider, - &mut config.memory_provider, - &mut config.embeddings_provider, - &mut config.heartbeat_provider, - &mut config.learning_provider, - &mut config.subconscious_provider, - ] { - if clear_if_ollama(field) { - changed += 1; - } - } - changed -} - -/// Triggers a full download of all required local AI models. -pub async fn local_ai_download( - config: &Config, - force: bool, -) -> Result, String> { - let service = local_ai::global(config); - if force { - service.reset_to_idle(config); - } - let service_clone = service.clone(); - let config_clone = config.clone(); - tokio::spawn(async move { - if let Err(err) = service_clone.download_all_models(&config_clone).await { - service_clone.mark_degraded(err); - } - }); - Ok(RpcOutcome::single_log( - service.status(), - "local ai full model download triggered", - )) -} - -/// Triggers a download of all local AI assets and returns progress information. -pub async fn local_ai_download_all_assets( - config: &Config, - force: bool, -) -> Result, String> { - let service = local_ai::global(config); - if force { - service.reset_to_idle(config); - } - let service_clone = service.clone(); - let config_clone = config.clone(); - tokio::spawn(async move { - if let Err(err) = service_clone.download_all_models(&config_clone).await { - service_clone.mark_degraded(err); - } - }); - let progress = service - .downloads_progress(config) - .await - .map_err(|e| e.to_string())?; - Ok(RpcOutcome::single_log( - progress, - "local ai full asset download triggered", - )) + // `LocalAiService` is a process-wide singleton whose cached `provider` + // field was set at first init from whichever config it saw. After an + // `inference_update_local_settings` call that swaps providers + // (e.g. ollama → lm_studio) the cached value is stale, so we overlay + // the current config's provider on the status snapshot before returning. + let mut snapshot = service.status(); + snapshot.provider = local_ai::provider::provider_from_config(config) + .as_str() + .to_string(); + Ok(RpcOutcome::single_log(snapshot, "local ai status fetched")) } /// Generates a summary of the provided text using local AI models. @@ -502,7 +384,7 @@ pub async fn local_ai_chat( return Err("messages must not be empty".to_string()); } - let mut ollama_messages: Vec = + let mut ollama_messages: Vec = Vec::with_capacity(messages.len()); for msg in messages.into_iter() { @@ -520,10 +402,12 @@ pub async fn local_ai_chat( } } - ollama_messages.push(crate::openhuman::local_ai::ollama_api::OllamaChatMessage { - role: normalized_role, - content: msg.content, - }); + ollama_messages.push( + crate::openhuman::inference::local::ollama::OllamaChatMessage { + role: normalized_role, + content: msg.content, + }, + ); } let service = local_ai::global(config); diff --git a/src/openhuman/local_ai/ops_tests.rs b/src/openhuman/inference/local/ops_tests.rs similarity index 100% rename from src/openhuman/local_ai/ops_tests.rs rename to src/openhuman/inference/local/ops_tests.rs diff --git a/src/openhuman/local_ai/process_util.rs b/src/openhuman/inference/local/process_util.rs similarity index 100% rename from src/openhuman/local_ai/process_util.rs rename to src/openhuman/inference/local/process_util.rs diff --git a/src/openhuman/local_ai/provider.rs b/src/openhuman/inference/local/provider.rs similarity index 100% rename from src/openhuman/local_ai/provider.rs rename to src/openhuman/inference/local/provider.rs diff --git a/src/openhuman/inference/local/schemas.rs b/src/openhuman/inference/local/schemas.rs new file mode 100644 index 0000000000..aefc7d5af0 --- /dev/null +++ b/src/openhuman/inference/local/schemas.rs @@ -0,0 +1,610 @@ +use serde::de::DeserializeOwned; +use serde::Deserialize; +use serde_json::{Map, Value}; + +use crate::core::all::{ControllerFuture, RegisteredController}; +use crate::core::{ControllerSchema, FieldSchema, TypeSchema}; +use crate::openhuman::config::rpc as config_rpc; +use crate::rpc::RpcOutcome; + +#[derive(Debug, Deserialize)] +struct AgentChatParams { + message: String, + model_override: Option, + temperature: Option, +} + +#[derive(Debug, Deserialize)] +struct LocalAiTranscribeParams { + audio_path: String, +} + +#[derive(Debug, Deserialize)] +struct LocalAiTranscribeBytesParams { + audio_bytes: Vec, + extension: Option, +} + +#[derive(Debug, Deserialize)] +struct LocalAiTtsParams { + text: String, + output_path: Option, +} + +#[derive(Debug, Deserialize)] +struct LocalAiDownloadAssetParams { + capability: String, +} + +#[derive(Debug, Deserialize)] +struct LocalAiInstallWhisperParams { + /// Optional model size (`tiny`, `base`, `small`, `medium`, + /// `large-v3-turbo`). Defaults to `large-v3-turbo`. + #[serde(default)] + model_size: Option, + /// When true, blow away any existing model file and re-download. + #[serde(default)] + force: Option, +} + +#[derive(Debug, Deserialize)] +struct LocalAiInstallPiperParams { + /// Optional Piper voice id (e.g. `en_US-lessac-medium`). Defaults to + /// the bundled US-English Lessac voice. + #[serde(default)] + voice_id: Option, + /// When true, blow away any existing voice file and re-download. + #[serde(default)] + force: Option, +} + +pub fn all_controller_schemas() -> Vec { + vec![ + schemas("agent_chat"), + schemas("agent_chat_simple"), + schemas("local_ai_transcribe"), + schemas("local_ai_transcribe_bytes"), + schemas("local_ai_tts"), + schemas("local_ai_assets_status"), + schemas("local_ai_downloads_progress"), + schemas("local_ai_download_asset"), + schemas("local_ai_install_whisper"), + schemas("local_ai_install_piper"), + schemas("local_ai_whisper_install_status"), + schemas("local_ai_piper_install_status"), + ] +} + +pub fn all_registered_controllers() -> Vec { + vec![ + RegisteredController { + schema: schemas("agent_chat"), + handler: handle_agent_chat, + }, + RegisteredController { + schema: schemas("agent_chat_simple"), + handler: handle_agent_chat_simple, + }, + RegisteredController { + schema: schemas("local_ai_transcribe"), + handler: handle_local_ai_transcribe, + }, + RegisteredController { + schema: schemas("local_ai_transcribe_bytes"), + handler: handle_local_ai_transcribe_bytes, + }, + RegisteredController { + schema: schemas("local_ai_tts"), + handler: handle_local_ai_tts, + }, + RegisteredController { + schema: schemas("local_ai_assets_status"), + handler: handle_local_ai_assets_status, + }, + RegisteredController { + schema: schemas("local_ai_downloads_progress"), + handler: handle_local_ai_downloads_progress, + }, + RegisteredController { + schema: schemas("local_ai_download_asset"), + handler: handle_local_ai_download_asset, + }, + RegisteredController { + schema: schemas("local_ai_install_whisper"), + handler: handle_local_ai_install_whisper, + }, + RegisteredController { + schema: schemas("local_ai_install_piper"), + handler: handle_local_ai_install_piper, + }, + RegisteredController { + schema: schemas("local_ai_whisper_install_status"), + handler: handle_local_ai_whisper_install_status, + }, + RegisteredController { + schema: schemas("local_ai_piper_install_status"), + handler: handle_local_ai_piper_install_status, + }, + ] +} + +pub fn schemas(function: &str) -> ControllerSchema { + match function { + "agent_chat" => ControllerSchema { + namespace: "local_ai", + function: "agent_chat", + description: "Run one-shot agent chat with optional model overrides.", + inputs: vec![ + required_string("message", "User message."), + optional_string("model_override", "Optional model override."), + optional_f64("temperature", "Optional temperature override."), + ], + outputs: vec![json_output("response", "Agent response payload.")], + }, + "agent_chat_simple" => ControllerSchema { + namespace: "local_ai", + function: "agent_chat_simple", + description: "Run one-shot lightweight provider chat.", + inputs: vec![ + required_string("message", "User message."), + optional_string("model_override", "Optional model override."), + optional_f64("temperature", "Optional temperature override."), + ], + outputs: vec![json_output("response", "Agent response payload.")], + }, + "local_ai_transcribe" => ControllerSchema { + namespace: "local_ai", + function: "transcribe", + description: "Transcribe audio from file path.", + inputs: vec![required_string("audio_path", "Input audio path.")], + outputs: vec![json_output("speech", "Transcription payload.")], + }, + "local_ai_transcribe_bytes" => ControllerSchema { + namespace: "local_ai", + function: "transcribe_bytes", + description: "Transcribe audio from raw bytes.", + inputs: vec![ + FieldSchema { + name: "audio_bytes", + ty: TypeSchema::Bytes, + comment: "Raw audio bytes.", + required: true, + }, + optional_string("extension", "Optional audio extension."), + ], + outputs: vec![json_output("speech", "Transcription payload.")], + }, + "local_ai_tts" => ControllerSchema { + namespace: "local_ai", + function: "tts", + description: "Synthesize speech from text.", + inputs: vec![ + required_string("text", "Input text."), + optional_string("output_path", "Optional output path."), + ], + outputs: vec![json_output("tts", "TTS result payload.")], + }, + "local_ai_assets_status" => ControllerSchema { + namespace: "local_ai", + function: "assets_status", + description: "Get local AI asset installation status.", + inputs: vec![], + outputs: vec![json_output("status", "Assets status payload.")], + }, + "local_ai_downloads_progress" => ControllerSchema { + namespace: "local_ai", + function: "downloads_progress", + description: "Get local AI download progress.", + inputs: vec![], + outputs: vec![json_output("progress", "Download progress payload.")], + }, + "local_ai_download_asset" => ControllerSchema { + namespace: "local_ai", + function: "download_asset", + description: "Trigger download for one local AI asset capability.", + inputs: vec![required_string("capability", "Asset capability id.")], + outputs: vec![json_output("status", "Assets status payload.")], + }, + "local_ai_install_whisper" => ControllerSchema { + namespace: "local_ai", + function: "install_whisper", + description: "Download whisper.cpp's GGML model (and on Windows the whisper-cli binary) into the workspace so the local STT factory has everything it needs to run.", + inputs: vec![ + optional_string( + "model_size", + "Whisper model size (tiny, base, small, medium, large-v3-turbo). Defaults to large-v3-turbo.", + ), + optional_bool( + "force", + "When true, re-download even if the workspace already has a matching model.", + ), + ], + outputs: vec![json_output("status", "Whisper install status payload.")], + }, + "local_ai_install_piper" => ControllerSchema { + namespace: "local_ai", + function: "install_piper", + description: "Download the Piper binary archive and the bundled en_US-lessac-medium voice files into the workspace.", + inputs: vec![ + optional_string( + "voice_id", + "Piper voice id (e.g. en_US-lessac-medium). Defaults to en_US-lessac-medium.", + ), + optional_bool( + "force", + "When true, re-download even if the workspace already has the voice files.", + ), + ], + outputs: vec![json_output("status", "Piper install status payload.")], + }, + "local_ai_whisper_install_status" => ControllerSchema { + namespace: "local_ai", + function: "whisper_install_status", + description: "Query the Whisper install state (missing / installing / installed / broken / error) plus per-stage download progress.", + inputs: vec![], + outputs: vec![json_output("status", "Whisper install status payload.")], + }, + "local_ai_piper_install_status" => ControllerSchema { + namespace: "local_ai", + function: "piper_install_status", + description: "Query the Piper install state (missing / installing / installed / broken / error) plus per-stage download progress.", + inputs: vec![], + outputs: vec![json_output("status", "Piper install status payload.")], + }, + _ => ControllerSchema { + namespace: "local_ai", + function: "unknown", + description: "Unknown local_ai controller function.", + inputs: vec![], + outputs: vec![FieldSchema { + name: "error", + ty: TypeSchema::String, + comment: "Lookup error details.", + required: true, + }], + }, + } +} + +fn handle_agent_chat(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let mut config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::local::ops::agent_chat( + &mut config, + &p.message, + p.model_override, + p.temperature, + ) + .await?, + ) + }) +} + +fn handle_agent_chat_simple(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::local::ops::agent_chat_simple( + &config, + &p.message, + p.model_override, + p.temperature, + ) + .await?, + ) + }) +} + +fn handle_local_ai_transcribe(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::local::ops::local_ai_transcribe( + &config, + p.audio_path.trim(), + ) + .await?, + ) + }) +} + +fn handle_local_ai_transcribe_bytes(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::local::ops::local_ai_transcribe_bytes( + &config, + &p.audio_bytes, + p.extension, + ) + .await?, + ) + }) +} + +fn handle_local_ai_tts(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::local::ops::local_ai_tts( + &config, + &p.text, + p.output_path.as_deref(), + ) + .await?, + ) + }) +} + +fn handle_local_ai_assets_status(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + to_json(crate::openhuman::inference::local::ops::local_ai_assets_status(&config).await?) + }) +} + +fn handle_local_ai_downloads_progress(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::local::ops::local_ai_downloads_progress(&config).await?, + ) + }) +} + +fn handle_local_ai_download_asset(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::local::ops::local_ai_download_asset( + &config, + p.capability.trim(), + ) + .await?, + ) + }) +} + +// The install RPCs are intentionally fire-and-forget: a binary+model +// download can take minutes (1.6 GB GGML model, ~5 MB Piper binary +// archive) but the core JSON-RPC client times out at +// VITE_CORE_RPC_TIMEOUT_MS (default 30s). Blocking the handler on the +// full download would force the UI into a retry loop that deletes the +// in-flight .part on each retry, looping forever. +// +// Shape: mark the engine as `installing(0%)` in the shared status table, +// spawn the real install on a background tokio task, return the +// just-written status immediately. The UI's status-polling RPC +// (handle_local_ai_*_install_status) reads from the same table and +// renders real-time progress. The eventual `installed` / `error` +// transition lands on the table when the background task finishes; +// no caller awaits it. + +fn handle_local_ai_install_whisper(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + let force = p.force.unwrap_or(false); + + // Atomic install-start guard. A duplicate click while an install + // is already in flight (or a parallel auto-install firing + // alongside a manual click) must be a no-op — not a second + // concurrent download racing on the same `.part` file inside + // `download_to_file`. The previous read_status -> check -> + // write_status sequence was non-atomic and let two callers slip + // through; `try_acquire_install_slot` does the check-and-claim + // under a single mutex acquisition. + let slot = + match crate::openhuman::inference::local::voice_install_common::try_acquire_install_slot( + crate::openhuman::inference::local::voice_install_common::ENGINE_WHISPER, + ) { + Some(slot) => slot, + None => { + tracing::debug!( + "[voice-install:whisper] slot already held — returning current status" + ); + let current = crate::openhuman::inference::local::voice_install_common::read_status( + crate::openhuman::inference::local::voice_install_common::ENGINE_WHISPER, + ); + return serde_json::to_value(current) + .map_err(|e| format!("serialize whisper status: {e}")); + } + }; + + // Mark "installing" before the spawn so the very next status poll + // (≤ 2s away) reflects the new state without a stale read. + crate::openhuman::inference::local::voice_install_common::write_status( + crate::openhuman::inference::local::voice_install_common::VoiceInstallStatus { + engine: crate::openhuman::inference::local::voice_install_common::ENGINE_WHISPER + .to_string(), + state: + crate::openhuman::inference::local::voice_install_common::VoiceInstallState::Installing, + progress: Some(0), + downloaded_bytes: None, + total_bytes: None, + stage: Some("queued".to_string()), + error_detail: None, + }, + ); + + tracing::debug!( + model_size = ?p.model_size, + force, + "[voice-install:whisper] spawning background install" + ); + let model_size = p.model_size.clone(); + // Move the slot into the spawned task so it lives for the actual + // install duration (download + extract + validate), not just the + // RPC handler's lifetime. The slot's Drop releases the + // single-writer guard on task exit, including via panic. + tokio::spawn(async move { + let _slot = slot; + if let Err(e) = crate::openhuman::inference::local::install_whisper::install_whisper( + &config, model_size, force, + ) + .await + { + log::warn!("[voice-install:whisper] background install failed: {e}"); + } + }); + + let status = crate::openhuman::inference::local::voice_install_common::read_status( + crate::openhuman::inference::local::voice_install_common::ENGINE_WHISPER, + ); + serde_json::to_value(status).map_err(|e| format!("serialize whisper status: {e}")) + }) +} + +fn handle_local_ai_install_piper(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + let force = p.force.unwrap_or(false); + + // See the whisper handler above for why this is an atomic slot + // acquisition rather than a read_status / write_status pair. + let slot = + match crate::openhuman::inference::local::voice_install_common::try_acquire_install_slot( + crate::openhuman::inference::local::voice_install_common::ENGINE_PIPER, + ) { + Some(slot) => slot, + None => { + tracing::debug!( + "[voice-install:piper] slot already held — returning current status" + ); + let current = + crate::openhuman::inference::local::voice_install_common::read_status( + crate::openhuman::inference::local::voice_install_common::ENGINE_PIPER, + ); + return serde_json::to_value(current) + .map_err(|e| format!("serialize piper status: {e}")); + } + }; + + crate::openhuman::inference::local::voice_install_common::write_status( + crate::openhuman::inference::local::voice_install_common::VoiceInstallStatus { + engine: crate::openhuman::inference::local::voice_install_common::ENGINE_PIPER.to_string(), + state: + crate::openhuman::inference::local::voice_install_common::VoiceInstallState::Installing, + progress: Some(0), + downloaded_bytes: None, + total_bytes: None, + stage: Some("queued".to_string()), + error_detail: None, + }, + ); + + tracing::debug!( + voice_id = ?p.voice_id, + force, + "[voice-install:piper] spawning background install" + ); + let voice_id = p.voice_id.clone(); + // Move the slot into the spawned task — same rationale as the + // whisper handler. + tokio::spawn(async move { + let _slot = slot; + if let Err(e) = crate::openhuman::inference::local::install_piper::install_piper( + &config, voice_id, force, + ) + .await + { + log::warn!("[voice-install:piper] background install failed: {e}"); + } + }); + + let status = crate::openhuman::inference::local::voice_install_common::read_status( + crate::openhuman::inference::local::voice_install_common::ENGINE_PIPER, + ); + serde_json::to_value(status).map_err(|e| format!("serialize piper status: {e}")) + }) +} + +fn handle_local_ai_whisper_install_status(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + let status = crate::openhuman::inference::local::install_whisper::status(&config); + serde_json::to_value(status).map_err(|e| format!("serialize whisper status: {e}")) + }) +} + +fn handle_local_ai_piper_install_status(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + let status = crate::openhuman::inference::local::install_piper::status(&config); + serde_json::to_value(status).map_err(|e| format!("serialize piper status: {e}")) + }) +} + +fn deserialize_params(params: Map) -> Result { + serde_json::from_value(Value::Object(params)).map_err(|e| format!("invalid params: {e}")) +} + +fn required_string(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::String, + comment, + required: true, + } +} + +fn optional_string(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::String)), + comment, + required: false, + } +} + +fn optional_bool(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), + comment, + required: false, + } +} + +fn optional_f64(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::F64)), + comment, + required: false, + } +} + +fn optional_u64(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::U64)), + comment, + required: false, + } +} + +fn json_output(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Json, + comment, + required: true, + } +} + +fn to_json(outcome: RpcOutcome) -> Result { + outcome.into_cli_compatible_json() +} + +#[cfg(test)] +#[path = "schemas_tests.rs"] +mod tests; diff --git a/src/openhuman/inference/local/schemas_tests.rs b/src/openhuman/inference/local/schemas_tests.rs new file mode 100644 index 0000000000..4308ce19e8 --- /dev/null +++ b/src/openhuman/inference/local/schemas_tests.rs @@ -0,0 +1,228 @@ +use super::*; + +#[test] +fn catalog_counts_match_and_nonempty() { + let s = all_controller_schemas(); + let h = all_registered_controllers(); + assert_eq!(s.len(), h.len()); + assert!(s.len() >= 12, "local_ai should expose >=12 controller fns"); +} + +#[test] +fn all_schemas_use_local_ai_namespace_and_have_descriptions() { + for s in all_controller_schemas() { + assert_eq!(s.namespace, "local_ai", "function {}", s.function); + assert!(!s.description.is_empty(), "function {} desc", s.function); + assert!(!s.outputs.is_empty(), "function {} outputs", s.function); + } +} + +#[test] +fn unknown_function_returns_unknown_schema() { + let s = schemas("no_such_fn"); + assert_eq!(s.function, "unknown"); + assert_eq!(s.namespace, "local_ai"); +} + +#[test] +fn every_registered_key_resolves_to_non_unknown_schema() { + let keys = [ + "agent_chat", + "agent_chat_simple", + "local_ai_transcribe", + "local_ai_transcribe_bytes", + "local_ai_tts", + "local_ai_assets_status", + "local_ai_downloads_progress", + "local_ai_download_asset", + "local_ai_install_whisper", + "local_ai_install_piper", + "local_ai_whisper_install_status", + "local_ai_piper_install_status", + ]; + for k in keys { + let s = schemas(k); + assert_eq!(s.namespace, "local_ai"); + assert_ne!(s.function, "unknown", "key `{k}` fell through"); + } +} + +#[test] +fn registered_controllers_all_in_local_ai_namespace() { + for h in all_registered_controllers() { + assert_eq!(h.schema.namespace, "local_ai"); + assert!(!h.schema.function.is_empty()); + } +} + +#[test] +fn field_builder_helpers_are_correct_shape() { + let r = required_string("k", "c"); + assert!(r.required); + assert!(matches!(r.ty, TypeSchema::String)); + + let o = optional_string("k", "c"); + assert!(!o.required); + + let ou = optional_u64("k", "c"); + assert!(!ou.required); + + let j = json_output("result", "c"); + assert!(j.required); + assert!(matches!(j.ty, TypeSchema::Json)); +} + +#[test] +fn to_json_wraps_rpc_outcome() { + let v = + to_json(RpcOutcome::single_log(serde_json::json!({"ok": true}), "l")).expect("serialize"); + assert!(v.get("logs").is_some() || v.get("result").is_some() || v.get("ok").is_some()); +} + +#[test] +fn deserialize_params_parses_valid_object() { + let mut m = Map::new(); + m.insert("message".into(), Value::String("hi".into())); + let p: AgentChatParams = deserialize_params(m).expect("parse"); + assert_eq!(p.message, "hi"); +} + +#[test] +fn deserialize_params_errors_on_invalid_shape() { + let mut m = Map::new(); + m.insert("message".into(), Value::Bool(true)); + let err = deserialize_params::(m).unwrap_err(); + assert!(err.contains("invalid params")); +} + +// ── Handler-level tests that don't need Ollama ──────────────── + +use crate::openhuman::config::TEST_ENV_LOCK as ENV_LOCK; +use tempfile::TempDir; + +/// Regression test for the CodeRabbit #7 race on PR #1755: when two +/// concurrent RPC calls (e.g. a double-click, or the auto-install firing +/// alongside a manual click) hit `handle_local_ai_install_whisper` at +/// the same time, only one of them must spawn a real install task. The +/// other must short-circuit and return the in-flight status without +/// starting a second download that would race on the same `.part` file. +/// +/// We exercise the actual handler — not just the slot primitive — so +/// the wiring at the call site is also covered. +#[tokio::test] +async fn install_whisper_handler_serializes_concurrent_calls() { + let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + let tmp = TempDir::new().unwrap(); + unsafe { + std::env::set_var("OPENHUMAN_WORKSPACE", tmp.path()); + } + + // Pre-acquire the install slot from the test so we're guaranteed to + // observe the "already in flight" code path. Holding the slot here + // also means the handler under test will short-circuit immediately + // rather than spawning a real install task that would try to hit + // the network in CI. + let slot = crate::openhuman::inference::local::voice_install_common::try_acquire_install_slot( + crate::openhuman::inference::local::voice_install_common::ENGINE_WHISPER, + ) + .expect("test should be able to claim the slot first"); + + // Mark the status table as `Installing` so the handler's + // short-circuit branch (which reads current status to return) sees + // a coherent snapshot. + crate::openhuman::inference::local::voice_install_common::write_status( + crate::openhuman::inference::local::voice_install_common::VoiceInstallStatus { + engine: crate::openhuman::inference::local::voice_install_common::ENGINE_WHISPER.to_string(), + state: crate::openhuman::inference::local::voice_install_common::VoiceInstallState::Installing, + progress: Some(0), + downloaded_bytes: None, + total_bytes: None, + stage: Some("queued".to_string()), + error_detail: None, + }, + ); + + // Fire two handler calls in parallel. Both must succeed and both + // must return the existing `Installing` status — neither must + // mutate or re-spawn. This is exactly the double-click / auto-fire + // shape described in CodeRabbit #7. + let (r1, r2) = tokio::join!( + handle_local_ai_install_whisper(Map::new()), + handle_local_ai_install_whisper(Map::new()) + ); + + unsafe { + std::env::remove_var("OPENHUMAN_WORKSPACE"); + } + drop(slot); + // Clean up so other tests see Missing. + crate::openhuman::inference::local::voice_install_common::reset_status( + crate::openhuman::inference::local::voice_install_common::ENGINE_WHISPER, + ); + + let v1 = r1.expect("first call ok"); + let v2 = r2.expect("second call ok"); + // Both calls must report the engine is already installing — proving + // the handler short-circuited rather than running the spawn path. + for (label, v) in [("first", &v1), ("second", &v2)] { + let state = v.get("state").and_then(|s| s.as_str()); + assert_eq!( + state, + Some("installing"), + "{label} concurrent call should see Installing, got {v:?}" + ); + } +} + +/// Same regression for Piper. The two handlers share the slot +/// infrastructure but live in separate code paths, so the wiring needs +/// independent coverage. +#[tokio::test] +async fn install_piper_handler_serializes_concurrent_calls() { + let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + let tmp = TempDir::new().unwrap(); + unsafe { + std::env::set_var("OPENHUMAN_WORKSPACE", tmp.path()); + } + + let slot = crate::openhuman::inference::local::voice_install_common::try_acquire_install_slot( + crate::openhuman::inference::local::voice_install_common::ENGINE_PIPER, + ) + .expect("test should be able to claim the slot first"); + + crate::openhuman::inference::local::voice_install_common::write_status( + crate::openhuman::inference::local::voice_install_common::VoiceInstallStatus { + engine: crate::openhuman::inference::local::voice_install_common::ENGINE_PIPER.to_string(), + state: crate::openhuman::inference::local::voice_install_common::VoiceInstallState::Installing, + progress: Some(0), + downloaded_bytes: None, + total_bytes: None, + stage: Some("queued".to_string()), + error_detail: None, + }, + ); + + let (r1, r2) = tokio::join!( + handle_local_ai_install_piper(Map::new()), + handle_local_ai_install_piper(Map::new()) + ); + + unsafe { + std::env::remove_var("OPENHUMAN_WORKSPACE"); + } + drop(slot); + crate::openhuman::inference::local::voice_install_common::reset_status( + crate::openhuman::inference::local::voice_install_common::ENGINE_PIPER, + ); + + let v1 = r1.expect("first call ok"); + let v2 = r2.expect("second call ok"); + for (label, v) in [("first", &v1), ("second", &v2)] { + let state = v.get("state").and_then(|s| s.as_str()); + assert_eq!( + state, + Some("installing"), + "{label} concurrent call should see Installing, got {v:?}" + ); + } +} diff --git a/src/openhuman/local_ai/service/assets.rs b/src/openhuman/inference/local/service/assets.rs similarity index 97% rename from src/openhuman/local_ai/service/assets.rs rename to src/openhuman/inference/local/service/assets.rs index 8b281cbc7c..b45fbcf2e3 100644 --- a/src/openhuman/local_ai/service/assets.rs +++ b/src/openhuman/inference/local/service/assets.rs @@ -3,15 +3,15 @@ use std::path::Path; use futures_util::TryStreamExt; use crate::openhuman::config::Config; -use crate::openhuman::local_ai::model_ids; +use crate::openhuman::inference::model_ids; use tracing::{debug, trace}; -use crate::openhuman::local_ai::paths::{ +use crate::openhuman::inference::local::provider::{provider_from_config, LocalAiProvider}; +use crate::openhuman::inference::paths::{ resolve_stt_model_path, resolve_tts_voice_path, stt_model_target_path, tts_model_target_path, }; -use crate::openhuman::local_ai::presets::{self, VisionMode}; -use crate::openhuman::local_ai::provider::{provider_from_config, LocalAiProvider}; -use crate::openhuman::local_ai::types::{ +use crate::openhuman::inference::presets::{self, VisionMode}; +use crate::openhuman::inference::types::{ LocalAiAssetStatus, LocalAiAssetsStatus, LocalAiDownloadProgressItem, LocalAiDownloadsProgress, }; @@ -37,23 +37,21 @@ impl LocalAiService { "[local_ai:assets:provider_routing] entry" ); - // Pre-flight precondition: if no Ollama binary exists anywhere - // discoverable, every Ollama-backed `has_model` call will fail (or - // time out). LM Studio still delegates embeddings to Ollama in this - // first provider slice, so it needs the same pre-flight for the - // embedding branch. + // External-runtime precondition: OpenHuman no longer installs or + // starts Ollama itself, so the interesting question is whether the + // user-managed runtime is reachable right now. let uses_ollama_assets = matches!( provider, LocalAiProvider::Ollama | LocalAiProvider::LmStudio ); let ollama_available = if uses_ollama_assets { - let present = self.ollama_binary_present(config); + let present = self.ollama_healthy().await; debug!( target: "local_ai::assets", %correlation_id, provider = %provider.as_str(), ollama_available = present, - "[local_ai:assets:provider_routing] ollama binary check" + "[local_ai:assets:provider_routing] ollama runtime check" ); present } else { @@ -121,7 +119,7 @@ impl LocalAiService { %correlation_id, provider = "ollama", model = %embedding_model, - "[local_ai:assets:provider_routing] lm studio embedding check skipped; ollama binary missing" + "[local_ai:assets:provider_routing] lm studio embedding check skipped; ollama runtime unavailable" ); false }; @@ -216,7 +214,7 @@ impl LocalAiService { trace!( target: "local_ai::assets", %correlation_id, - branch = "ollama_missing_binary", + branch = "ollama_runtime_unavailable", "[local_ai:assets:provider_routing] selected provider branch" ); (false, false, false) @@ -728,7 +726,7 @@ impl LocalAiService { self.assets_status(config).await } - pub(in crate::openhuman::local_ai::service) async fn ensure_stt_asset_available( + pub(in crate::openhuman::inference::local::service) async fn ensure_stt_asset_available( &self, config: &Config, ) -> Result<(), String> { @@ -751,7 +749,7 @@ impl LocalAiService { Ok(()) } - pub(in crate::openhuman::local_ai::service) async fn ensure_tts_asset_available( + pub(in crate::openhuman::inference::local::service) async fn ensure_tts_asset_available( &self, config: &Config, ) -> Result<(), String> { diff --git a/src/openhuman/local_ai/service/bootstrap.rs b/src/openhuman/inference/local/service/bootstrap.rs similarity index 91% rename from src/openhuman/local_ai/service/bootstrap.rs rename to src/openhuman/inference/local/service/bootstrap.rs index 843f8d9477..943c8cff6f 100644 --- a/src/openhuman/local_ai/service/bootstrap.rs +++ b/src/openhuman/inference/local/service/bootstrap.rs @@ -1,9 +1,9 @@ use crate::openhuman::config::Config; -use crate::openhuman::local_ai::device::DeviceProfile; -use crate::openhuman::local_ai::model_ids; -use crate::openhuman::local_ai::presets::{self, VisionMode}; -use crate::openhuman::local_ai::provider::{provider_from_config, LocalAiProvider}; -use crate::openhuman::local_ai::types::LocalAiStatus; +use crate::openhuman::inference::device::DeviceProfile; +use crate::openhuman::inference::local::provider::{provider_from_config, LocalAiProvider}; +use crate::openhuman::inference::model_ids; +use crate::openhuman::inference::presets::{self, VisionMode}; +use crate::openhuman::inference::types::LocalAiStatus; use super::LocalAiService; @@ -129,7 +129,7 @@ impl LocalAiService { pub async fn bootstrap(&self, config: &Config) { let _guard = self.bootstrap_lock.lock().await; - let device = crate::openhuman::local_ai::device::detect_device_profile(); + let device = crate::openhuman::inference::device::detect_device_profile(); let effective_config = config_with_recommended_tier_if_unselected(config, &device); if !effective_config.local_ai.runtime_enabled { @@ -301,30 +301,15 @@ impl LocalAiService { return; } - if let Err(first_err) = self.ensure_ollama_server(&effective_config).await { + if let Err(err) = self.ensure_ollama_server(&effective_config).await { log::warn!( - "[local_ai] ensure_ollama_server failed, retrying with fresh install: {first_err}" + "[local_ai] bootstrap degraded: external runtime connectivity check failed: {err}" ); - // Force a fresh install attempt before giving up. - { - let mut status = self.status.lock(); - status.state = "installing".to_string(); - status.warning = Some("Retrying Ollama installation...".to_string()); - status.error_detail = None; - status.error_category = None; - } - if let Err(err) = self.ensure_ollama_server_fresh(&effective_config).await { - let mut status = self.status.lock(); - status.state = "degraded".to_string(); - let is_install_error = status.error_category.as_deref() == Some("install"); - if is_install_error { - status.warning = Some(err); - } else { - status.error_category = Some("server".to_string()); - status.warning = Some(format_degraded_warning(&err, &effective_config)); - } - return; - } + let mut status = self.status.lock(); + status.state = "degraded".to_string(); + status.error_category = Some("server".to_string()); + status.warning = Some(format_degraded_warning(&err, &effective_config)); + return; } if let Err(err) = self.ensure_models_available(&effective_config).await { @@ -339,7 +324,7 @@ impl LocalAiService { // Pass GPU info from the device profile so whisper can use hardware acceleration. if effective_config.local_ai.whisper_in_process { if let Ok(model_path) = - crate::openhuman::local_ai::paths::resolve_stt_model_path(&effective_config) + crate::openhuman::inference::paths::resolve_stt_model_path(&effective_config) { let model = std::path::PathBuf::from(&model_path); let handle = self.whisper.clone(); @@ -416,7 +401,7 @@ impl LocalAiService { fn config_with_recommended_tier_if_unselected(config: &Config, device: &DeviceProfile) -> Config { let current_tier = - crate::openhuman::local_ai::presets::current_tier_from_config(&config.local_ai); + crate::openhuman::inference::presets::current_tier_from_config(&config.local_ai); // Local AI is opt-in on every device. The only way to keep it enabled // across a restart is an explicit opt-in (`apply_preset` on a real tier), @@ -426,7 +411,7 @@ fn config_with_recommended_tier_if_unselected(config: &Config, device: &DevicePr if !config.local_ai.opt_in_confirmed { tracing::debug!( total_ram_gb = device.total_ram_gb(), - min_required_gb = crate::openhuman::local_ai::presets::MIN_RAM_GB_FOR_LOCAL_AI, + min_required_gb = crate::openhuman::inference::presets::MIN_RAM_GB_FOR_LOCAL_AI, ?current_tier, selected_tier = ?config.local_ai.selected_tier, "[local_ai] bootstrap: opt_in_confirmed=false, hard-overriding to disabled (cloud fallback)" @@ -446,21 +431,21 @@ fn config_with_recommended_tier_if_unselected(config: &Config, device: &DevicePr } fn format_degraded_warning(err: &str, config: &Config) -> String { - let current = crate::openhuman::local_ai::presets::current_tier_from_config(&config.local_ai); + let current = crate::openhuman::inference::presets::current_tier_from_config(&config.local_ai); match current { - crate::openhuman::local_ai::presets::ModelTier::Ram16PlusGb => { + crate::openhuman::inference::presets::ModelTier::Ram16PlusGb => { format!( "{err}. Hint: your device may not support the 16 GB+ tier model. \ Try switching to the 8-16 GB or 4-8 GB tier in Settings > Local AI Model." ) } - crate::openhuman::local_ai::presets::ModelTier::Ram8To16Gb => { + crate::openhuman::inference::presets::ModelTier::Ram8To16Gb => { format!( "{err}. Hint: your device may not support the 8-16 GB tier model. \ Try switching to the 4-8 GB or 2-4 GB tier in Settings > Local AI Model." ) } - crate::openhuman::local_ai::presets::ModelTier::Ram4To8Gb => format!( + crate::openhuman::inference::presets::ModelTier::Ram4To8Gb => format!( "{err}. Hint: your device may not support the 4-8 GB tier vision sidecar. \ Try switching to the 2-4 GB tier for text-only local AI." ), @@ -546,9 +531,9 @@ mod tests { let mut config = Config::default(); config.local_ai.selected_tier = Some("ram_2_4gb".to_string()); config.local_ai.opt_in_confirmed = true; - crate::openhuman::local_ai::presets::apply_preset_to_config( + crate::openhuman::inference::presets::apply_preset_to_config( &mut config.local_ai, - crate::openhuman::local_ai::presets::ModelTier::Ram2To4Gb, + crate::openhuman::inference::presets::ModelTier::Ram2To4Gb, ); let device = test_device(4); @@ -565,9 +550,9 @@ mod tests { let mut config = Config::default(); config.local_ai.selected_tier = Some("ram_2_4gb".to_string()); config.local_ai.opt_in_confirmed = true; - crate::openhuman::local_ai::presets::apply_preset_to_config( + crate::openhuman::inference::presets::apply_preset_to_config( &mut config.local_ai, - crate::openhuman::local_ai::presets::ModelTier::Ram2To4Gb, + crate::openhuman::inference::presets::ModelTier::Ram2To4Gb, ); let device = test_device(16); diff --git a/src/openhuman/local_ai/service/lm_studio.rs b/src/openhuman/inference/local/service/lm_studio.rs similarity index 92% rename from src/openhuman/local_ai/service/lm_studio.rs rename to src/openhuman/inference/local/service/lm_studio.rs index 218d1b5d54..742d7c46b8 100644 --- a/src/openhuman/local_ai/service/lm_studio.rs +++ b/src/openhuman/inference/local/service/lm_studio.rs @@ -1,10 +1,10 @@ use crate::openhuman::config::Config; -use crate::openhuman::local_ai::lm_studio_api::{ +use crate::openhuman::inference::local::lm_studio::{ apply_lm_studio_auth, lm_studio_base_url, LmStudioChatCompletionRequest, LmStudioChatCompletionResponse, LmStudioChatMessage, LmStudioModelsResponse, }; -use crate::openhuman::local_ai::model_ids; -use crate::openhuman::local_ai::ollama_api::OllamaModelTag; +use crate::openhuman::inference::local::ollama::OllamaModelTag; +use crate::openhuman::inference::model_ids; use super::LocalAiService; @@ -17,14 +17,14 @@ fn diagnostic_body_snippet(body: &str) -> String { snippet } -pub(in crate::openhuman::local_ai::service) struct LmStudioCompletionOutcome { +pub(in crate::openhuman::inference::local::service) struct LmStudioCompletionOutcome { pub reply: String, pub prompt_tokens: Option, pub completion_tokens: Option, } impl LocalAiService { - pub(in crate::openhuman::local_ai::service) async fn ensure_lm_studio_available( + pub(in crate::openhuman::inference::local::service) async fn ensure_lm_studio_available( &self, config: &Config, ) -> Result<(), String> { @@ -37,7 +37,7 @@ impl LocalAiService { Ok(()) } - pub(in crate::openhuman::local_ai::service) async fn list_lm_studio_models( + pub(in crate::openhuman::inference::local::service) async fn list_lm_studio_models( &self, config: &Config, ) -> Result, String> { @@ -120,7 +120,7 @@ impl LocalAiService { .collect()) } - pub(in crate::openhuman::local_ai::service) async fn has_lm_studio_model( + pub(in crate::openhuman::inference::local::service) async fn has_lm_studio_model( &self, config: &Config, model: &str, @@ -133,7 +133,7 @@ impl LocalAiService { .any(|m| m.name.to_ascii_lowercase() == target)) } - pub(in crate::openhuman::local_ai::service) async fn lm_studio_chat_completion( + pub(in crate::openhuman::inference::local::service) async fn lm_studio_chat_completion( &self, config: &Config, messages: Vec, diff --git a/src/openhuman/local_ai/service/mod.rs b/src/openhuman/inference/local/service/mod.rs similarity index 94% rename from src/openhuman/local_ai/service/mod.rs rename to src/openhuman/inference/local/service/mod.rs index d9f03cd3ed..0da432d265 100644 --- a/src/openhuman/local_ai/service/mod.rs +++ b/src/openhuman/inference/local/service/mod.rs @@ -10,7 +10,7 @@ mod speech; mod vision_embed; pub(crate) mod whisper_engine; -use crate::openhuman::local_ai::types::LocalAiStatus; +use crate::openhuman::inference::types::LocalAiStatus; use parking_lot::Mutex; pub struct LocalAiService { diff --git a/src/openhuman/local_ai/service/ollama_admin.rs b/src/openhuman/inference/local/service/ollama_admin.rs similarity index 91% rename from src/openhuman/local_ai/service/ollama_admin.rs rename to src/openhuman/inference/local/service/ollama_admin.rs index 7479ceb1fe..3741b5a390 100644 --- a/src/openhuman/local_ai/service/ollama_admin.rs +++ b/src/openhuman/inference/local/service/ollama_admin.rs @@ -3,17 +3,19 @@ use std::path::{Path, PathBuf}; use futures_util::StreamExt; use crate::openhuman::config::Config; -use crate::openhuman::local_ai::install::{find_system_ollama_binary, run_ollama_install_script}; -use crate::openhuman::local_ai::lm_studio_api::lm_studio_base_url; -use crate::openhuman::local_ai::model_ids; -use crate::openhuman::local_ai::ollama_api::{ +use crate::openhuman::inference::local::install::{ + find_system_ollama_binary, run_ollama_install_script, +}; +use crate::openhuman::inference::local::lm_studio::lm_studio_base_url; +use crate::openhuman::inference::local::ollama::{ ollama_base_url, OllamaModelTag, OllamaPullEvent, OllamaPullProgress, OllamaPullRequest, OllamaTagsResponse, }; -use crate::openhuman::local_ai::paths::{find_workspace_ollama_binary, workspace_ollama_binary}; -use crate::openhuman::local_ai::presets::{self, VisionMode}; -use crate::openhuman::local_ai::process_util::apply_no_window; -use crate::openhuman::local_ai::provider::{provider_from_config, LocalAiProvider}; +use crate::openhuman::inference::local::process_util::apply_no_window; +use crate::openhuman::inference::local::provider::{provider_from_config, LocalAiProvider}; +use crate::openhuman::inference::model_ids; +use crate::openhuman::inference::paths::{find_workspace_ollama_binary, workspace_ollama_binary}; +use crate::openhuman::inference::presets::{self, VisionMode}; use super::spawn_marker::{self, OllamaSpawnMarker}; use super::LocalAiService; @@ -23,57 +25,34 @@ fn lm_studio_models_error_means_unreachable(error: &str) -> bool { } impl LocalAiService { - pub(in crate::openhuman::local_ai::service) async fn ensure_ollama_server( + pub(in crate::openhuman::inference::local::service) async fn ensure_ollama_server( &self, - config: &Config, + _config: &Config, ) -> Result<(), String> { - // If openhuman crashed last session and left a daemon running, the - // spawn marker lets us recognise it and reclaim it (kill + respawn - // under owned-child tracking) instead of either leaking it forever - // or hitting an external daemon that just happens to be on :11434. - self.reclaim_orphan_if_ours(config).await; - if self.ollama_healthy().await { - // Server is running — verify it can actually execute models by checking - // if the runner works. A stale server with a missing binary will 500. if self.ollama_runner_ok().await { return Ok(()); } - // Runner is broken (e.g. binary moved). log::warn!("[local_ai] Ollama server responds but runner is broken"); - // Only restart if we own it. Killing an external daemon's - // broken runner is the user's job, not ours — friendly-fire. - self.kill_ollama_server().await; - if self.ollama_healthy().await { - // Our kill was a no-op (or didn't take effect) — daemon is external. - return Err("An external Ollama daemon on :11434 has a broken runner. \ - Restart it manually (or stop it so openhuman can take over)." - .to_string()); - } + return Err( + "Configured Ollama runtime is reachable but cannot execute models. Restart the external runtime and retry." + .to_string(), + ); } - - let ollama_cmd = self.resolve_or_install_ollama_binary(config).await?; - self.start_and_wait_for_server(config, &ollama_cmd).await + let base_url = ollama_base_url(); + Err(format!( + "OpenHuman no longer starts or installs Ollama automatically. Start your inference runtime yourself and make sure it is reachable at {base_url}." + )) } - /// Like `ensure_ollama_server`, but forces a fresh install of the Ollama binary - /// (ignoring cached/workspace binaries). Used as a retry after the first attempt fails. - pub(in crate::openhuman::local_ai::service) async fn ensure_ollama_server_fresh( + /// Alias of `ensure_ollama_server` in external-runtime mode. + /// OpenHuman no longer installs or starts Ollama automatically; the + /// "fresh" retry path is a no-op that defers to the standard check. + pub(in crate::openhuman::inference::local::service) async fn ensure_ollama_server_fresh( &self, config: &Config, ) -> Result<(), String> { - // Force a fresh download regardless of existing binaries. - self.download_and_install_ollama(config).await?; - - let Some(ollama_cmd) = find_workspace_ollama_binary(config) else { - // Also check system path after install. - let system_bin = find_system_ollama_binary() - .ok_or_else(|| "Ollama installed but binary not found on system".to_string())?; - // Try to use the system binary directly. - return self.start_and_wait_for_server(config, &system_bin).await; - }; - - self.start_and_wait_for_server(config, &ollama_cmd).await + self.ensure_ollama_server(config).await } /// Check if a healthy daemon on `:11434` is actually openhuman's own @@ -349,7 +328,7 @@ impl LocalAiService { } async fn download_and_install_ollama(&self, config: &Config) -> Result<(), String> { - let install_dir = crate::openhuman::local_ai::paths::workspace_ollama_dir(config); + let install_dir = crate::openhuman::inference::paths::workspace_ollama_dir(config); tokio::fs::create_dir_all(&install_dir) .await .map_err(|e| format!("failed to create Ollama install directory: {e}"))?; @@ -361,7 +340,7 @@ impl LocalAiService { // OllamaSetup.exe running, wait for it instead of starting a // second one — two concurrent installers race on the same dir // and corrupt the install. - if crate::openhuman::local_ai::install::is_ollama_installer_running() { + if crate::openhuman::inference::local::install::is_ollama_installer_running() { log::info!( "[local_ai] detected in-flight OllamaSetup.exe — \ waiting for it to finish before deciding whether to install" @@ -381,7 +360,7 @@ impl LocalAiService { const INSTALLER_WAIT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(5 * 60); let mut timed_out = false; - while crate::openhuman::local_ai::install::is_ollama_installer_running() { + while crate::openhuman::inference::local::install::is_ollama_installer_running() { if wait_start.elapsed() >= INSTALLER_WAIT_TIMEOUT { timed_out = true; break; @@ -496,7 +475,7 @@ impl LocalAiService { Ok(()) } - async fn ollama_healthy(&self) -> bool { + pub(in crate::openhuman::inference::local::service) async fn ollama_healthy(&self) -> bool { self.http .get(format!("{}/api/tags", ollama_base_url())) .timeout(std::time::Duration::from_secs(2)) @@ -513,7 +492,7 @@ impl LocalAiService { /// to `/api/tags` should consult this first. Returning `false` here means /// the UI should drive the user to install Ollama instead of polling for /// model state that can never appear. - pub(in crate::openhuman::local_ai::service) fn ollama_binary_present( + pub(in crate::openhuman::inference::local::service) fn ollama_binary_present( &self, config: &Config, ) -> bool { @@ -536,7 +515,7 @@ impl LocalAiService { find_system_ollama_binary().is_some() } - pub(in crate::openhuman::local_ai::service) async fn ensure_models_available( + pub(in crate::openhuman::inference::local::service) async fn ensure_models_available( &self, config: &Config, ) -> Result<(), String> { @@ -577,7 +556,7 @@ impl LocalAiService { Ok(()) } - pub(in crate::openhuman::local_ai::service) async fn ensure_ollama_model_available( + pub(in crate::openhuman::inference::local::service) async fn ensure_ollama_model_available( &self, model_id: &str, label: &str, @@ -883,38 +862,22 @@ impl LocalAiService { let binary_path = self.resolve_binary_path(config); let mut issues: Vec = Vec::new(); - let mut repair_actions: Vec = Vec::new(); + let repair_actions: Vec = Vec::new(); if !healthy { issues.push(format!( "Ollama server is not running or not reachable at {}", base_url )); - if binary_path.is_none() { - repair_actions.push(serde_json::json!({"action": "install_ollama"})); - } else { - repair_actions.push(serde_json::json!({ - "action": "start_server", - "binary_path": binary_path, - })); - } } if healthy && !chat_found { issues.push(format!("Chat model `{}` is not installed", expected_chat)); - repair_actions.push(serde_json::json!({ - "action": "pull_model", - "model": expected_chat, - })); } if healthy && config.local_ai.preload_embedding_model && !embedding_found { issues.push(format!( "Embedding model `{}` is not installed", expected_embedding )); - repair_actions.push(serde_json::json!({ - "action": "pull_model", - "model": expected_embedding, - })); } if healthy && matches!( @@ -927,10 +890,6 @@ impl LocalAiService { "Vision model `{}` is not installed", expected_vision )); - repair_actions.push(serde_json::json!({ - "action": "pull_model", - "model": expected_vision, - })); } if let Some(ref e) = tags_error { issues.push(format!("Failed to list models: {e}")); @@ -1064,7 +1023,7 @@ impl LocalAiService { .any(|name| name == &expected_chat.to_ascii_lowercase()); let mut issues: Vec = Vec::new(); - let mut repair_actions: Vec = Vec::new(); + let repair_actions: Vec = Vec::new(); if !healthy { let detail = models_error @@ -1075,25 +1034,14 @@ impl LocalAiService { "LM Studio server is not running or not reachable at {}{}", base_url, detail )); - repair_actions.push(serde_json::json!({ - "action": "start_lm_studio_server", - "base_url": base_url, - })); } if healthy && models_error.is_none() && models.is_empty() { issues.push("LM Studio is reachable but no models are loaded".to_string()); - repair_actions.push(serde_json::json!({ - "action": "load_lm_studio_model", - })); } else if healthy && models_error.is_none() && !chat_found { issues.push(format!( "Chat model `{}` is not loaded in LM Studio", expected_chat )); - repair_actions.push(serde_json::json!({ - "action": "load_lm_studio_model", - "model": expected_chat, - })); } if healthy { if let Some(ref err) = models_error { @@ -1191,7 +1139,7 @@ impl LocalAiService { } // 5. Platform-specific well-known locations (macOS bundles, Windows, Linux). - crate::openhuman::local_ai::install::find_system_ollama_binary() + crate::openhuman::inference::local::install::find_system_ollama_binary() .map(|p| p.display().to_string()) } @@ -1282,7 +1230,7 @@ impl LocalAiService { spawn_marker::clear_marker(config); } - pub(in crate::openhuman::local_ai::service) async fn has_model( + pub(in crate::openhuman::inference::local::service) async fn has_model( &self, model: &str, ) -> Result { diff --git a/src/openhuman/local_ai/service/ollama_admin_tests.rs b/src/openhuman/inference/local/service/ollama_admin_tests.rs similarity index 82% rename from src/openhuman/local_ai/service/ollama_admin_tests.rs rename to src/openhuman/inference/local/service/ollama_admin_tests.rs index e2a0511fa2..d85e9356f2 100644 --- a/src/openhuman/local_ai/service/ollama_admin_tests.rs +++ b/src/openhuman/inference/local/service/ollama_admin_tests.rs @@ -11,7 +11,7 @@ fn interrupted_pull_does_not_wait_before_any_progress() { } use crate::openhuman::config::Config; -use crate::openhuman::local_ai::service::LocalAiService; +use crate::openhuman::inference::local::service::LocalAiService; use axum::{routing::get, Json, Router}; use serde_json::json; @@ -35,7 +35,7 @@ fn lm_studio_config(base: &str) -> Config { #[tokio::test] async fn has_model_detects_exact_and_prefixed_tag() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/api/tags", @@ -67,7 +67,7 @@ async fn has_model_detects_exact_and_prefixed_tag() { #[tokio::test] async fn has_model_errors_on_non_success_tags_response() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/api/tags", @@ -90,7 +90,7 @@ async fn has_model_errors_on_non_success_tags_response() { #[tokio::test] async fn ollama_healthy_returns_true_on_200_tags_response() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route("/api/tags", get(|| async { Json(json!({ "models": [] })) })); let base = spawn_mock(app).await; @@ -109,7 +109,7 @@ async fn ollama_healthy_returns_true_on_200_tags_response() { #[tokio::test] async fn ollama_healthy_returns_false_on_unreachable_url() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); // Point at a port we never bind → connect fails → healthy = false. unsafe { @@ -123,9 +123,103 @@ async fn ollama_healthy_returns_false_on_unreachable_url() { } } +#[tokio::test] +async fn ensure_ollama_server_requires_external_runtime_when_unreachable() { + let _guard = crate::openhuman::inference::inference_test_guard(); + + unsafe { + std::env::set_var("OPENHUMAN_OLLAMA_BASE_URL", "http://127.0.0.1:1"); + } + + let config = Config::default(); + let service = LocalAiService::new(&config); + let err = service + .ensure_ollama_server(&config) + .await + .expect_err("unreachable runtime should fail"); + + unsafe { + std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL"); + } + + assert!( + err.contains("no longer starts or installs Ollama automatically"), + "unexpected error: {err}" + ); +} + +#[tokio::test] +async fn ensure_ollama_server_reports_broken_external_runner_without_restart_attempt() { + let _guard = crate::openhuman::inference::inference_test_guard(); + + let app = Router::new() + .route("/api/tags", get(|| async { Json(json!({ "models": [] })) })) + .route( + "/api/show", + axum::routing::post(|| async { + ( + axum::http::StatusCode::INTERNAL_SERVER_ERROR, + "fork/exec /broken/ollama: no such file or directory", + ) + }), + ); + let base = spawn_mock(app).await; + unsafe { + std::env::set_var("OPENHUMAN_OLLAMA_BASE_URL", &base); + } + + let config = Config::default(); + let service = LocalAiService::new(&config); + let err = service + .ensure_ollama_server(&config) + .await + .expect_err("broken runner should fail"); + + unsafe { + std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL"); + } + + assert!( + err.contains("cannot execute models") || err.contains("Restart the external runtime"), + "unexpected error: {err}" + ); +} + +#[tokio::test] +async fn assets_status_marks_ollama_unavailable_when_runtime_is_down_even_if_binary_exists() { + let _guard = crate::openhuman::inference::inference_test_guard(); + + unsafe { + std::env::set_var("OPENHUMAN_OLLAMA_BASE_URL", "http://127.0.0.1:1"); + } + let fake_ollama = std::env::current_exe().expect("current exe"); + let prev_ollama_bin = std::env::var_os("OLLAMA_BIN"); + unsafe { + std::env::set_var("OLLAMA_BIN", &fake_ollama); + } + + let config = Config::default(); + let service = LocalAiService::new(&config); + let status = service.assets_status(&config).await.expect("assets status"); + + unsafe { + std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL"); + match prev_ollama_bin { + Some(value) => std::env::set_var("OLLAMA_BIN", value), + None => std::env::remove_var("OLLAMA_BIN"), + } + } + + assert!( + !status.ollama_available, + "runtime-down status must not be treated as available" + ); + assert_ne!(status.chat.state, "ready"); +} + #[tokio::test] async fn diagnostics_reports_server_unreachable_when_url_unbound() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); unsafe { std::env::set_var("OPENHUMAN_OLLAMA_BASE_URL", "http://127.0.0.1:1"); @@ -151,8 +245,8 @@ async fn diagnostics_reports_server_unreachable_when_url_unbound() { .cloned() .unwrap_or_default(); assert!( - !repair_actions.is_empty(), - "unreachable server must produce at least one repair action" + repair_actions.is_empty(), + "OpenHuman should not suggest app-managed repair actions anymore" ); unsafe { std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL"); @@ -161,7 +255,7 @@ async fn diagnostics_reports_server_unreachable_when_url_unbound() { #[tokio::test] async fn diagnostics_with_running_server_but_missing_models_flags_issues() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route("/api/tags", get(|| async { Json(json!({ "models": [] })) })); let base = spawn_mock(app).await; @@ -181,16 +275,13 @@ async fn diagnostics_with_running_server_but_missing_models_flags_issues() { // No models are installed → expected chat model issue surfaces. let issues = diag["issues"].as_array().cloned().unwrap_or_default(); assert!(!issues.is_empty()); - // Missing chat model should produce a pull_model repair action. let repair_actions = diag["repair_actions"] .as_array() .cloned() .unwrap_or_default(); assert!( - repair_actions - .iter() - .any(|a| a["action"].as_str() == Some("pull_model")), - "missing models must produce pull_model repair action" + repair_actions.is_empty(), + "missing models should no longer surface app-managed pull actions" ); unsafe { std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL"); @@ -199,11 +290,11 @@ async fn diagnostics_with_running_server_but_missing_models_flags_issues() { #[tokio::test] async fn diagnostics_ok_when_expected_models_are_present() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let config = Config::default(); - let chat = crate::openhuman::local_ai::model_ids::effective_chat_model_id(&config); - let embedding = crate::openhuman::local_ai::model_ids::effective_embedding_model_id(&config); + let chat = crate::openhuman::inference::model_ids::effective_chat_model_id(&config); + let embedding = crate::openhuman::inference::model_ids::effective_embedding_model_id(&config); let chat_tag = format!("{}:latest", chat); let embed_tag = format!("{}:latest", embedding); let app = Router::new().route( @@ -254,7 +345,7 @@ async fn diagnostics_ok_when_expected_models_are_present() { #[tokio::test] async fn resolve_binary_path_finds_binary_via_ollama_bin_env() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let tmp = tempfile::tempdir().unwrap(); let fake_bin = tmp.path().join(if cfg!(windows) { @@ -286,8 +377,8 @@ async fn resolve_binary_path_finds_binary_via_ollama_bin_env() { } #[tokio::test] -async fn diagnostics_repair_actions_include_start_server_when_binary_known() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); +async fn diagnostics_repair_actions_are_empty_when_binary_is_known_but_server_is_down() { + let _guard = crate::openhuman::inference::inference_test_guard(); let tmp = tempfile::tempdir().unwrap(); let fake_bin = tmp.path().join(if cfg!(windows) { @@ -312,10 +403,8 @@ async fn diagnostics_repair_actions_include_start_server_when_binary_known() { .cloned() .unwrap_or_default(); assert!( - repair_actions - .iter() - .any(|a| a["action"].as_str() == Some("start_server")), - "when binary is known but server is down, repair action should be start_server" + repair_actions.is_empty(), + "when server is down, diagnostics should not advertise app-managed start actions" ); unsafe { @@ -328,7 +417,7 @@ async fn diagnostics_repair_actions_include_start_server_when_binary_known() { async fn diagnostics_repair_actions_field_always_present() { // Verifies that the "repair_actions" key is always present in the diagnostics // JSON, regardless of the server state, so the UI can always iterate over it. - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); unsafe { std::env::set_var("OPENHUMAN_OLLAMA_BASE_URL", "http://127.0.0.1:1"); @@ -349,7 +438,7 @@ async fn diagnostics_repair_actions_field_always_present() { #[tokio::test] async fn list_models_returns_parsed_payload() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/api/tags", @@ -380,7 +469,7 @@ async fn list_models_returns_parsed_payload() { #[tokio::test] async fn list_models_errors_on_non_success() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/api/tags", @@ -402,7 +491,7 @@ async fn list_models_errors_on_non_success() { #[tokio::test] async fn lm_studio_list_models_returns_loaded_models() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/v1/models", @@ -435,7 +524,7 @@ async fn lm_studio_list_models_returns_loaded_models() { #[tokio::test] async fn lm_studio_diagnostics_reports_loaded_chat_model() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/v1/models", @@ -461,7 +550,7 @@ async fn lm_studio_diagnostics_reports_loaded_chat_model() { #[tokio::test] async fn lm_studio_diagnostics_flags_missing_chat_model() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/v1/models", @@ -491,7 +580,7 @@ async fn lm_studio_diagnostics_flags_missing_chat_model() { #[tokio::test] async fn lm_studio_diagnostics_surfaces_reachable_model_list_errors() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route("/v1/models", get(|| async { "not json" })); let base = spawn_mock(app).await; @@ -516,7 +605,7 @@ async fn lm_studio_diagnostics_surfaces_reachable_model_list_errors() { #[tokio::test] async fn lm_studio_assets_reports_embedding_as_ollama_managed() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/v1/models", @@ -570,9 +659,7 @@ async fn lm_studio_assets_reports_embedding_as_ollama_managed() { #[tokio::test] async fn kill_ollama_server_with_no_owned_child_is_noop() { - let _guard = crate::openhuman::local_ai::LOCAL_AI_TEST_MUTEX - .lock() - .expect("local ai mutex"); + let _guard = crate::openhuman::inference::inference_test_guard(); let config = Config::default(); let service = LocalAiService::new(&config); @@ -595,9 +682,7 @@ async fn kill_ollama_server_with_no_owned_child_is_noop() { #[tokio::test] async fn kill_ollama_server_kills_owned_child() { - let _guard = crate::openhuman::local_ai::LOCAL_AI_TEST_MUTEX - .lock() - .expect("local ai mutex"); + let _guard = crate::openhuman::inference::inference_test_guard(); let config = Config::default(); let service = LocalAiService::new(&config); @@ -622,7 +707,7 @@ async fn kill_ollama_server_kills_owned_child() { // Sanity: child should be alive immediately after spawn. assert!( - crate::openhuman::local_ai::service::spawn_marker::pid_is_alive(pid), + crate::openhuman::inference::local::service::spawn_marker::pid_is_alive(pid), "child pid {pid} should be alive right after spawn" ); @@ -638,7 +723,7 @@ async fn kill_ollama_server_kills_owned_child() { // update its process table — the kill is signalled but reap is async. let mut still_alive = true; for _ in 0..40 { - if !crate::openhuman::local_ai::service::spawn_marker::pid_is_alive(pid) { + if !crate::openhuman::inference::local::service::spawn_marker::pid_is_alive(pid) { still_alive = false; break; } @@ -652,9 +737,7 @@ async fn kill_ollama_server_kills_owned_child() { #[tokio::test] async fn shutdown_owned_ollama_clears_marker_and_kills_child() { - let _guard = crate::openhuman::local_ai::LOCAL_AI_TEST_MUTEX - .lock() - .expect("local ai mutex"); + let _guard = crate::openhuman::inference::inference_test_guard(); // Redirect the workspace root to a tempdir so the marker file doesn't // touch the real `~/.openhuman/`. Per `paths::shared_root_dir`, when @@ -692,12 +775,12 @@ async fn shutdown_owned_ollama_clears_marker_and_kills_child() { // resolves to a writable temp path, the write is exercised. On hosts // where `default_root_openhuman_dir()` succeeds against the real home // dir, we skip the marker assertion to avoid touching `~/.openhuman/`. - let marker_path = crate::openhuman::local_ai::paths::ollama_spawn_marker_path(&config); + let marker_path = crate::openhuman::inference::paths::ollama_spawn_marker_path(&config); let marker_writable = marker_path.starts_with(tmp.path()); if marker_writable { - crate::openhuman::local_ai::service::spawn_marker::write_marker_at( + crate::openhuman::inference::local::service::spawn_marker::write_marker_at( &marker_path, - &crate::openhuman::local_ai::service::spawn_marker::OllamaSpawnMarker::new( + &crate::openhuman::inference::local::service::spawn_marker::OllamaSpawnMarker::new( pid, std::path::Path::new("test-stub"), ), @@ -721,7 +804,7 @@ async fn shutdown_owned_ollama_clears_marker_and_kills_child() { // And the spawned process is dead. let mut still_alive = true; for _ in 0..40 { - if !crate::openhuman::local_ai::service::spawn_marker::pid_is_alive(pid) { + if !crate::openhuman::inference::local::service::spawn_marker::pid_is_alive(pid) { still_alive = false; break; } @@ -738,7 +821,7 @@ async fn shutdown_owned_ollama_clears_marker_and_kills_child() { /// `ollama_available: false` immediately. #[tokio::test] async fn assets_status_sets_ollama_available_false_when_binary_missing() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let tmp = tempfile::tempdir().unwrap(); let mut config = Config::default(); @@ -836,7 +919,7 @@ fn binary_present_uses_ollama_bin_env_var_when_set() { // When OLLAMA_BIN points to a real file, it must be preferred over the // workspace/system lookup. Use the current test binary itself as the // "fake ollama" — it's guaranteed to be a real file. - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let real_file = std::env::current_exe().expect("current test exe path"); let prev = std::env::var_os("OLLAMA_BIN"); diff --git a/src/openhuman/local_ai/service/public_infer.rs b/src/openhuman/inference/local/service/public_infer.rs similarity index 87% rename from src/openhuman/local_ai/service/public_infer.rs rename to src/openhuman/inference/local/service/public_infer.rs index 41a3420f94..2eb40d0f12 100644 --- a/src/openhuman/local_ai/service/public_infer.rs +++ b/src/openhuman/inference/local/service/public_infer.rs @@ -1,13 +1,61 @@ use crate::openhuman::config::Config; -use crate::openhuman::local_ai::model_ids; -use crate::openhuman::local_ai::ollama_api::{ +use crate::openhuman::inference::local::ollama::{ ns_to_tps, ollama_base_url, OllamaGenerateOptions, OllamaGenerateRequest, }; -use crate::openhuman::local_ai::parse::sanitize_inline_completion; -use crate::openhuman::local_ai::provider::{provider_from_config, LocalAiProvider}; +use crate::openhuman::inference::local::provider::{provider_from_config, LocalAiProvider}; +use crate::openhuman::inference::model_ids; +use crate::openhuman::inference::parse::sanitize_inline_completion; use super::LocalAiService; +fn redact_ollama_base_url(raw: &str) -> String { + // Strip userinfo, query, and fragment so error payloads + logs don't + // leak `user:pass@host` style credentials embedded in the endpoint. + reqwest::Url::parse(raw) + .map(|mut url| { + let _ = url.set_username(""); + let _ = url.set_password(None); + url.set_query(None); + url.set_fragment(None); + url.to_string() + }) + .unwrap_or_else(|_| "".to_string()) +} + +fn external_ollama_request_error(prefix: &str, error: &reqwest::Error) -> String { + let safe_base_url = redact_ollama_base_url(&ollama_base_url()); + format!( + "{prefix}: OpenHuman routes inference through an external Ollama endpoint. \ + Make sure Ollama is already running and reachable at {safe_base_url} ({error})" + ) +} + +#[cfg(test)] +mod redact_tests { + use super::redact_ollama_base_url; + + #[test] + fn redact_strips_userinfo_query_and_fragment() { + assert_eq!( + redact_ollama_base_url("http://user:pass@host:11434/api?token=abc#frag"), + "http://host:11434/api" + ); + } + + #[test] + fn redact_keeps_plain_url() { + assert_eq!( + redact_ollama_base_url("http://127.0.0.1:11434/"), + "http://127.0.0.1:11434/" + ); + } + + #[test] + fn redact_handles_invalid_url() { + assert_eq!(redact_ollama_base_url("not a url"), ""); + } +} + impl LocalAiService { pub async fn summarize( &self, @@ -174,7 +222,7 @@ impl LocalAiService { pub(crate) async fn chat_with_history( &self, config: &Config, - messages: Vec, + messages: Vec, max_tokens: Option, ) -> Result { if !config.local_ai.runtime_enabled { @@ -197,7 +245,7 @@ impl LocalAiService { let lm_messages = messages .into_iter() .map( - |message| crate::openhuman::local_ai::lm_studio_api::LmStudioChatMessage { + |message| crate::openhuman::inference::local::lm_studio::LmStudioChatMessage { role: message.role, content: message.content, }, @@ -233,18 +281,18 @@ impl LocalAiService { tracing::debug!( message_count = messages.len(), - model = %crate::openhuman::local_ai::model_ids::effective_chat_model_id(config), + model = %crate::openhuman::inference::model_ids::effective_chat_model_id(config), "[local_ai:chat] sending to ollama /api/chat" ); let started = std::time::Instant::now(); - let body = crate::openhuman::local_ai::ollama_api::OllamaChatRequest { - model: crate::openhuman::local_ai::model_ids::effective_chat_model_id(config), + let body = crate::openhuman::inference::local::ollama::OllamaChatRequest { + model: crate::openhuman::inference::model_ids::effective_chat_model_id(config), messages, stream: false, options: Some( - crate::openhuman::local_ai::ollama_api::OllamaGenerateOptions { + crate::openhuman::inference::local::ollama::OllamaGenerateOptions { temperature: Some(config.default_temperature as f32), top_k: Some(40), top_p: Some(0.9), @@ -259,7 +307,7 @@ impl LocalAiService { .json(&body) .send() .await - .map_err(|e| format!("ollama chat request failed: {e}"))?; + .map_err(|e| external_ollama_request_error("ollama chat request failed", &e))?; if !response.status().is_success() { let status = response.status(); @@ -276,7 +324,7 @@ impl LocalAiService { )); } - let payload: crate::openhuman::local_ai::ollama_api::OllamaChatResponse = response + let payload: crate::openhuman::inference::local::ollama::OllamaChatResponse = response .json() .await .map_err(|e| format!("ollama chat response parse failed: {e}"))?; @@ -458,11 +506,11 @@ impl LocalAiService { if provider_from_config(config) == LocalAiProvider::LmStudio { let messages = vec![ - crate::openhuman::local_ai::lm_studio_api::LmStudioChatMessage { + crate::openhuman::inference::local::lm_studio::LmStudioChatMessage { role: "system".to_string(), content: effective_system, }, - crate::openhuman::local_ai::lm_studio_api::LmStudioChatMessage { + crate::openhuman::inference::local::lm_studio::LmStudioChatMessage { role: "user".to_string(), content: prompt.to_string(), }, @@ -509,7 +557,7 @@ impl LocalAiService { .json(&body) .send() .await - .map_err(|e| format!("ollama request failed: {e}"))?; + .map_err(|e| external_ollama_request_error("ollama request failed", &e))?; if !response.status().is_success() { let status = response.status(); let body = response.text().await.unwrap_or_default(); @@ -525,7 +573,7 @@ impl LocalAiService { )); } - let payload: crate::openhuman::local_ai::ollama_api::OllamaGenerateResponse = response + let payload: crate::openhuman::inference::local::ollama::OllamaGenerateResponse = response .json() .await .map_err(|e| format!("ollama response parse failed: {e}"))?; diff --git a/src/openhuman/local_ai/service/public_infer_tests.rs b/src/openhuman/inference/local/service/public_infer_tests.rs similarity index 89% rename from src/openhuman/local_ai/service/public_infer_tests.rs rename to src/openhuman/inference/local/service/public_infer_tests.rs index 20c95ae683..8931438e2c 100644 --- a/src/openhuman/local_ai/service/public_infer_tests.rs +++ b/src/openhuman/inference/local/service/public_infer_tests.rs @@ -38,7 +38,7 @@ fn ready_service(config: &Config) -> LocalAiService { #[tokio::test] async fn inference_hits_ollama_generate_and_returns_response() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/api/generate", @@ -75,7 +75,7 @@ async fn inference_hits_ollama_generate_and_returns_response() { #[tokio::test] async fn inference_errors_on_non_success_status() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/api/generate", @@ -96,9 +96,32 @@ async fn inference_errors_on_non_success_status() { } } +#[tokio::test] +async fn inference_connection_failure_mentions_external_ollama_runtime() { + let _guard = crate::openhuman::inference::inference_test_guard(); + + unsafe { + std::env::set_var("OPENHUMAN_OLLAMA_BASE_URL", "http://127.0.0.1:1"); + } + + let config = enabled_config(); + let service = ready_service(&config); + let err = service.prompt(&config, "hi", None, true).await.unwrap_err(); + + unsafe { + std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL"); + } + + assert!( + err.contains("external Ollama endpoint"), + "unexpected error: {err}" + ); + assert!(err.contains("already running"), "unexpected error: {err}"); +} + #[tokio::test] async fn inference_errors_on_empty_response_when_allow_empty_false() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/api/generate", @@ -135,7 +158,7 @@ async fn inference_errors_on_empty_response_when_allow_empty_false() { #[tokio::test] async fn lm_studio_prompt_hits_openai_chat_completions() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/v1/chat/completions", @@ -174,7 +197,7 @@ async fn lm_studio_prompt_hits_openai_chat_completions() { #[tokio::test] async fn lm_studio_chat_with_history_returns_response() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/v1/chat/completions", @@ -196,11 +219,11 @@ async fn lm_studio_chat_with_history_returns_response() { .chat_with_history( &config, vec![ - crate::openhuman::local_ai::ollama_api::OllamaChatMessage { + crate::openhuman::inference::local::ollama::OllamaChatMessage { role: "system".to_string(), content: "be terse".to_string(), }, - crate::openhuman::local_ai::ollama_api::OllamaChatMessage { + crate::openhuman::inference::local::ollama::OllamaChatMessage { role: "user".to_string(), content: "hi".to_string(), }, @@ -215,7 +238,7 @@ async fn lm_studio_chat_with_history_returns_response() { #[tokio::test] async fn lm_studio_prompt_errors_on_non_success_status() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/v1/chat/completions", @@ -284,7 +307,7 @@ async fn inline_complete_interactive_disabled_returns_empty_string() { /// the permit it would deadlock or time out. #[tokio::test] async fn inline_complete_interactive_does_not_block_on_held_permit() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); // Hold the global LLM permit for the duration of the test. let _held = crate::openhuman::scheduler_gate::gate::try_acquire_llm_permit() @@ -341,7 +364,7 @@ async fn inline_complete_interactive_does_not_block_on_held_permit() { // safer trade-off. See PR #1524. #[ignore = "flaky timing under full-suite load — see PR #1524"] async fn gated_inline_complete_blocks_on_held_permit() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let held = crate::openhuman::scheduler_gate::gate::try_acquire_llm_permit() .expect("test must start with a free permit"); diff --git a/src/openhuman/local_ai/service/spawn_marker.rs b/src/openhuman/inference/local/service/spawn_marker.rs similarity index 99% rename from src/openhuman/local_ai/service/spawn_marker.rs rename to src/openhuman/inference/local/service/spawn_marker.rs index e8f5dd59e8..cc791d6e49 100644 --- a/src/openhuman/local_ai/service/spawn_marker.rs +++ b/src/openhuman/inference/local/service/spawn_marker.rs @@ -17,7 +17,7 @@ use std::path::Path; use serde::{Deserialize, Serialize}; use crate::openhuman::config::Config; -use crate::openhuman::local_ai::paths::ollama_spawn_marker_path; +use crate::openhuman::inference::paths::ollama_spawn_marker_path; /// On-disk record of an openhuman-spawned `ollama serve` process. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] diff --git a/src/openhuman/local_ai/service/speech.rs b/src/openhuman/inference/local/service/speech.rs similarity index 97% rename from src/openhuman/local_ai/service/speech.rs rename to src/openhuman/inference/local/service/speech.rs index 5cbf1a0c4e..79b56a0a26 100644 --- a/src/openhuman/local_ai/service/speech.rs +++ b/src/openhuman/inference/local/service/speech.rs @@ -4,12 +4,12 @@ use std::time::Instant; use log::{debug, warn}; use crate::openhuman::config::Config; -use crate::openhuman::local_ai::model_ids; -use crate::openhuman::local_ai::paths::{ +use crate::openhuman::inference::model_ids; +use crate::openhuman::inference::paths::{ config_root_dir, resolve_piper_binary, resolve_stt_model_path, resolve_tts_voice_path, resolve_whisper_binary, }; -use crate::openhuman::local_ai::types::{LocalAiSpeechResult, LocalAiTtsResult}; +use crate::openhuman::inference::types::{LocalAiSpeechResult, LocalAiTtsResult}; use super::whisper_engine; use super::LocalAiService; @@ -54,7 +54,7 @@ impl LocalAiService { "{LOG_PREFIX} whisper in-process enabled but unloaded; loading model lazily" ); // Detect GPU at lazy-load time so whisper can use acceleration. - let device = crate::openhuman::local_ai::device::detect_device_profile(); + let device = crate::openhuman::inference::device::detect_device_profile(); let gpu = device.has_gpu; let gpu_desc = device.gpu_description.clone(); let load_result = tokio::task::spawn_blocking(move || { diff --git a/src/openhuman/local_ai/service/vision_embed.rs b/src/openhuman/inference/local/service/vision_embed.rs similarity index 95% rename from src/openhuman/local_ai/service/vision_embed.rs rename to src/openhuman/inference/local/service/vision_embed.rs index 950f70b842..0ed010cfa1 100644 --- a/src/openhuman/local_ai/service/vision_embed.rs +++ b/src/openhuman/inference/local/service/vision_embed.rs @@ -1,12 +1,12 @@ use crate::openhuman::agent::multimodal; use crate::openhuman::config::Config; -use crate::openhuman::local_ai::model_ids; -use crate::openhuman::local_ai::ollama_api::{ +use crate::openhuman::inference::local::ollama::{ ollama_base_url, OllamaEmbedRequest, OllamaEmbedResponse, OllamaGenerateOptions, OllamaGenerateRequest, }; -use crate::openhuman::local_ai::presets::{self, VisionMode}; -use crate::openhuman::local_ai::types::LocalAiEmbeddingResult; +use crate::openhuman::inference::model_ids; +use crate::openhuman::inference::presets::{self, VisionMode}; +use crate::openhuman::inference::types::LocalAiEmbeddingResult; use super::LocalAiService; @@ -118,7 +118,7 @@ impl LocalAiService { )); } - let payload: crate::openhuman::local_ai::ollama_api::OllamaGenerateResponse = response + let payload: crate::openhuman::inference::local::ollama::OllamaGenerateResponse = response .json() .await .map_err(|e| format!("ollama vision response parse failed: {e}"))?; @@ -250,7 +250,7 @@ mod tests { #[tokio::test] async fn embed_against_mock_returns_vectors_with_dimensions() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); let app = mock_with_tags_and( "/api/embed", @@ -281,7 +281,7 @@ mod tests { #[tokio::test] async fn embed_rejects_all_empty_inputs_before_network_call() { - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::inference_test_guard(); // Even without a working mock server, entirely-empty inputs must be // rejected before any HTTP call. diff --git a/src/openhuman/local_ai/service/whisper_engine.rs b/src/openhuman/inference/local/service/whisper_engine.rs similarity index 100% rename from src/openhuman/local_ai/service/whisper_engine.rs rename to src/openhuman/inference/local/service/whisper_engine.rs diff --git a/src/openhuman/local_ai/voice_install_common.rs b/src/openhuman/inference/local/voice_install_common.rs similarity index 100% rename from src/openhuman/local_ai/voice_install_common.rs rename to src/openhuman/inference/local/voice_install_common.rs diff --git a/src/openhuman/inference/mod.rs b/src/openhuman/inference/mod.rs new file mode 100644 index 0000000000..7fb5549e0f --- /dev/null +++ b/src/openhuman/inference/mod.rs @@ -0,0 +1,50 @@ +//! Unified inference domain. +//! +//! This module is the canonical home for all inference concerns: +//! - `local/` — Ollama / LM Studio / Whisper / Piper runtime management +//! (was `src/openhuman/local_ai/`) +//! - `provider/` — cloud + local provider trait, routing, reliability +//! (was `src/openhuman/providers/`) +//! - `voice/` — transcription (STT) and TTS inference implementations +//! (moved from `src/openhuman/voice/`) +//! - `http/` — OpenAI-compatible `/v1/chat/completions` endpoint +//! +//! The RPC surface remains under the `inference.*` and `local_ai.*` namespaces +//! for backwards compatibility. + +pub mod device; +pub mod http; +pub mod local; +pub mod model_ids; +pub mod ops; +pub mod parse; +pub mod paths; +pub mod presets; +pub mod provider; +mod schemas; +pub mod sentiment; +pub mod types; +pub mod voice; + +pub use ops as rpc; +pub use schemas::{ + all_controller_schemas as all_inference_controller_schemas, + all_registered_controllers as all_inference_registered_controllers, +}; + +// Re-export the types that external callers (voice, agent, etc.) import from inference +pub use device::DeviceProfile; +pub use local::all_local_ai_controller_schemas; +pub use local::all_local_ai_registered_controllers; +pub use presets::{ModelPreset, ModelTier, VisionMode}; +pub use sentiment::SentimentResult; +pub use types::{ + LocalAiAssetStatus, LocalAiAssetsStatus, LocalAiDownloadProgressItem, LocalAiDownloadsProgress, + LocalAiEmbeddingResult, LocalAiSpeechResult, LocalAiStatus, LocalAiTtsResult, +}; + +// Test helpers (re-exported for sibling test files that use inference_test_guard) +#[cfg(test)] +pub(crate) fn inference_test_guard() -> std::sync::MutexGuard<'static, ()> { + local::inference_test_guard() +} diff --git a/src/openhuman/local_ai/model_ids.rs b/src/openhuman/inference/model_ids.rs similarity index 99% rename from src/openhuman/local_ai/model_ids.rs rename to src/openhuman/inference/model_ids.rs index 754a835ae9..a407df65d2 100644 --- a/src/openhuman/local_ai/model_ids.rs +++ b/src/openhuman/inference/model_ids.rs @@ -8,7 +8,7 @@ //! tier restriction for OpenHuman-managed Ollama assets. use crate::openhuman::config::Config; -use crate::openhuman::local_ai::provider::{provider_from_config, LocalAiProvider}; +use crate::openhuman::inference::local::provider::{provider_from_config, LocalAiProvider}; pub(crate) const DEFAULT_OLLAMA_MODEL: &str = "gemma3:1b-it-qat"; pub(crate) const DEFAULT_OLLAMA_VISION_MODEL: &str = ""; diff --git a/src/openhuman/inference/ops.rs b/src/openhuman/inference/ops.rs new file mode 100644 index 0000000000..06459931ce --- /dev/null +++ b/src/openhuman/inference/ops.rs @@ -0,0 +1,333 @@ +//! JSON-RPC controller surface for inference operations. + +use crate::openhuman::config::rpc as config_rpc; +use crate::openhuman::config::Config; +use crate::openhuman::inference::local as local_runtime; +use crate::openhuman::inference::local::ops::{LocalAiChatMessage, ReactionDecision}; +use crate::openhuman::inference::provider as providers; +use crate::openhuman::inference::{device, presets, sentiment, SentimentResult}; +use crate::openhuman::inference::{LocalAiEmbeddingResult, LocalAiStatus}; +use crate::rpc::RpcOutcome; +use serde_json::{json, Value}; +use tracing::{debug, error}; + +const LOG_PREFIX: &str = "[inference::ops]"; + +pub async fn inference_status(config: &Config) -> Result, String> { + debug!("{LOG_PREFIX} status:start"); + let result = local_runtime::rpc::local_ai_status(config).await; + match &result { + Ok(outcome) => debug!(state = %outcome.value.state, "{LOG_PREFIX} status:ok"), + Err(err) => error!(error = %err, "{LOG_PREFIX} status:error"), + } + result +} + +pub async fn inference_summarize( + config: &Config, + text: &str, + max_tokens: Option, +) -> Result, String> { + debug!( + text_len = text.len(), + ?max_tokens, + "{LOG_PREFIX} summarize:start" + ); + let result = local_runtime::rpc::local_ai_summarize(config, text, max_tokens).await; + match &result { + Ok(outcome) => debug!( + output_len = outcome.value.len(), + "{LOG_PREFIX} summarize:ok" + ), + Err(err) => error!(error = %err, "{LOG_PREFIX} summarize:error"), + } + result +} + +pub async fn inference_prompt( + config: &Config, + prompt: &str, + max_tokens: Option, + no_think: Option, +) -> Result, String> { + debug!( + prompt_len = prompt.len(), + ?max_tokens, + ?no_think, + "{LOG_PREFIX} prompt:start" + ); + let result = local_runtime::rpc::local_ai_prompt(config, prompt, max_tokens, no_think).await; + match &result { + Ok(outcome) => debug!(output_len = outcome.value.len(), "{LOG_PREFIX} prompt:ok"), + Err(err) => error!(error = %err, "{LOG_PREFIX} prompt:error"), + } + result +} + +pub async fn inference_vision_prompt( + config: &Config, + prompt: &str, + image_refs: &[String], + max_tokens: Option, +) -> Result, String> { + debug!( + prompt_len = prompt.len(), + image_count = image_refs.len(), + ?max_tokens, + "{LOG_PREFIX} vision_prompt:start" + ); + let result = + local_runtime::rpc::local_ai_vision_prompt(config, prompt, image_refs, max_tokens).await; + match &result { + Ok(outcome) => debug!( + output_len = outcome.value.len(), + "{LOG_PREFIX} vision_prompt:ok" + ), + Err(err) => error!(error = %err, "{LOG_PREFIX} vision_prompt:error"), + } + result +} + +pub async fn inference_embed( + config: &Config, + inputs: &[String], +) -> Result, String> { + debug!(input_count = inputs.len(), "{LOG_PREFIX} embed:start"); + let result = local_runtime::rpc::local_ai_embed(config, inputs).await; + match &result { + Ok(outcome) => debug!( + vector_count = outcome.value.vectors.len(), + dimensions = outcome.value.dimensions, + "{LOG_PREFIX} embed:ok" + ), + Err(err) => error!(error = %err, "{LOG_PREFIX} embed:error"), + } + result +} + +pub async fn inference_chat( + config: &Config, + messages: Vec, + max_tokens: Option, +) -> Result, String> { + debug!( + message_count = messages.len(), + ?max_tokens, + "{LOG_PREFIX} chat:start" + ); + let result = local_runtime::rpc::local_ai_chat(config, messages, max_tokens).await; + match &result { + Ok(outcome) => debug!(output_len = outcome.value.len(), "{LOG_PREFIX} chat:ok"), + Err(err) => error!(error = %err, "{LOG_PREFIX} chat:error"), + } + result +} + +pub async fn inference_should_react( + config: &Config, + message: &str, + channel_type: &str, +) -> Result, String> { + debug!( + message_len = message.len(), + channel_type, "{LOG_PREFIX} should_react:start" + ); + let result = local_runtime::rpc::local_ai_should_react(config, message, channel_type).await; + match &result { + Ok(outcome) => debug!( + should_react = outcome.value.should_react, + "{LOG_PREFIX} should_react:ok" + ), + Err(err) => error!(error = %err, "{LOG_PREFIX} should_react:error"), + } + result +} + +pub async fn inference_analyze_sentiment( + config: &Config, + message: &str, +) -> Result, String> { + debug!( + message_len = message.len(), + "{LOG_PREFIX} analyze_sentiment:start" + ); + let result = sentiment::local_ai_analyze_sentiment(config, message).await; + match &result { + Ok(outcome) => { + debug!(valence = %outcome.value.valence, "{LOG_PREFIX} analyze_sentiment:ok") + } + Err(err) => error!(error = %err, "{LOG_PREFIX} analyze_sentiment:error"), + } + result +} + +pub async fn inference_get_client_config() -> Result, String> { + debug!("{LOG_PREFIX} get_client_config:start"); + let result = config_rpc::load_and_get_client_config_snapshot().await; + match &result { + Ok(_) => debug!("{LOG_PREFIX} get_client_config:ok"), + Err(err) => error!(error = %err, "{LOG_PREFIX} get_client_config:error"), + } + result +} + +pub async fn inference_update_model_settings( + update: config_rpc::ModelSettingsPatch, +) -> Result, String> { + debug!("{LOG_PREFIX} update_model_settings:start"); + let result = config_rpc::load_and_apply_model_settings(update).await; + match &result { + Ok(_) => debug!("{LOG_PREFIX} update_model_settings:ok"), + Err(err) => error!(error = %err, "{LOG_PREFIX} update_model_settings:error"), + } + result +} + +pub async fn inference_update_local_settings( + update: config_rpc::LocalAiSettingsPatch, +) -> Result, String> { + debug!("{LOG_PREFIX} update_local_settings:start"); + let result = config_rpc::load_and_apply_local_ai_settings(update).await; + match &result { + Ok(_) => debug!("{LOG_PREFIX} update_local_settings:ok"), + Err(err) => error!(error = %err, "{LOG_PREFIX} update_local_settings:error"), + } + result +} + +pub async fn inference_list_models(provider_id: &str) -> Result, String> { + debug!(provider_id, "{LOG_PREFIX} list_models:start"); + let result = providers::ops::list_configured_models(provider_id).await; + match &result { + Ok(_) => debug!("{LOG_PREFIX} list_models:ok"), + Err(err) => error!(error = %err, "{LOG_PREFIX} list_models:error"), + } + result +} + +pub async fn inference_device_profile() -> Result, String> { + debug!("{LOG_PREFIX} device_profile:start"); + let profile = device::detect_device_profile(); + let result = Ok(RpcOutcome::single_log( + serde_json::to_value(profile).map_err(|e| format!("serialize: {e}"))?, + "inference device profile fetched", + )); + debug!("{LOG_PREFIX} device_profile:ok"); + result +} + +pub async fn inference_presets() -> Result, String> { + debug!("{LOG_PREFIX} presets:start"); + let config = config_rpc::load_config_with_timeout().await?; + let device = device::detect_device_profile(); + let recommended = presets::recommend_tier(&device); + let current = presets::current_tier_from_config(&config.local_ai); + let selected_tier = config.local_ai.selected_tier.as_ref().and_then(|value| { + let normalized = value.trim().to_ascii_lowercase(); + presets::ModelTier::from_str_opt(&normalized) + .map(|tier| tier.as_str().to_string()) + .or_else(|| (!normalized.is_empty()).then_some(normalized)) + }); + let presets = presets::mvp_presets(); + let recommend_disabled = presets::should_default_to_cloud_fallback(&device); + let result = Ok(RpcOutcome::single_log( + json!({ + "presets": presets, + "recommended_tier": recommended, + "current_tier": current, + "selected_tier": selected_tier, + "device": device, + "recommend_disabled": recommend_disabled, + "local_ai_enabled": config.local_ai.runtime_enabled, + }), + "inference presets fetched", + )); + debug!("{LOG_PREFIX} presets:ok"); + result +} + +pub async fn inference_apply_preset(tier: &str) -> Result, String> { + let tier_str = tier.trim().to_ascii_lowercase(); + debug!(tier = %tier_str, "{LOG_PREFIX} apply_preset:start"); + + if tier_str == "disabled" { + let mut config = config_rpc::load_config_with_timeout().await?; + config.local_ai.runtime_enabled = false; + config.local_ai.selected_tier = Some("disabled".to_string()); + config.local_ai.opt_in_confirmed = false; + config + .save() + .await + .map_err(|e| format!("save config: {e}"))?; + debug!("{LOG_PREFIX} apply_preset:disabled"); + return Ok(RpcOutcome::single_log( + json!({ + "applied_tier": "disabled", + "local_ai_enabled": false, + }), + "inference preset applied", + )); + } + + let tier = presets::ModelTier::from_str_opt(&tier_str).ok_or_else(|| { + format!( + "invalid tier '{}': expected one of disabled or ram_2_4gb", + tier_str + ) + })?; + + if tier == presets::ModelTier::Custom { + return Err("cannot apply 'custom' tier; set model IDs directly".to_string()); + } + if !tier.is_mvp_allowed() { + return Err(format!( + "tier '{}' is not available in this build; only the 1B local model preset is supported", + tier_str + )); + } + + let mut config = config_rpc::load_config_with_timeout().await?; + config.local_ai.runtime_enabled = true; + config.local_ai.opt_in_confirmed = true; + presets::apply_preset_to_config(&mut config.local_ai, tier); + config + .save() + .await + .map_err(|e| format!("save config: {e}"))?; + + debug!(tier = %tier_str, "{LOG_PREFIX} apply_preset:ok"); + Ok(RpcOutcome::single_log( + json!({ + "applied_tier": tier, + "chat_model_id": config.local_ai.chat_model_id, + "vision_model_id": config.local_ai.vision_model_id, + "embedding_model_id": config.local_ai.embedding_model_id, + "quantization": config.local_ai.quantization, + "vision_mode": presets::vision_mode_for_config(&config.local_ai), + "local_ai_enabled": true, + }), + "inference preset applied", + )) +} + +pub async fn inference_diagnostics(config: &Config) -> Result, String> { + debug!("{LOG_PREFIX} diagnostics:start"); + let service = local_runtime::global(config); + // Return the diagnostics payload directly (no `{result, logs}` wrap) so + // callers (UI + json_rpc_e2e tests) can read `provider`, `lm_studio_running`, + // etc. straight off the response — mirrors the legacy + // `local_ai_diagnostics` shape that the test asserts against. + let result = service + .diagnostics(config) + .await + .map(|value| RpcOutcome::new(value, Vec::new())); + match &result { + Ok(_) => debug!("{LOG_PREFIX} diagnostics:ok"), + Err(err) => error!(error = %err, "{LOG_PREFIX} diagnostics:error"), + } + result +} + +#[cfg(test)] +#[path = "ops_tests.rs"] +mod tests; diff --git a/src/openhuman/inference/ops_tests.rs b/src/openhuman/inference/ops_tests.rs new file mode 100644 index 0000000000..7d76274b11 --- /dev/null +++ b/src/openhuman/inference/ops_tests.rs @@ -0,0 +1,111 @@ +use super::*; +use tempfile::tempdir; + +fn disabled_config() -> (Config, tempfile::TempDir) { + let tmp = tempdir().expect("tempdir"); + let mut config = Config::default(); + config.workspace_dir = tmp.path().join("workspace"); + config.config_path = tmp.path().join("config.toml"); + config.local_ai.runtime_enabled = false; + config.local_ai.opt_in_confirmed = false; + (config, tmp) +} + +#[tokio::test] +async fn inference_status_reports_disabled_state_when_runtime_disabled() { + let (config, _tmp) = disabled_config(); + let outcome = inference_status(&config).await.expect("status"); + assert!( + matches!(outcome.value.state.as_str(), "idle" | "disabled"), + "unexpected state: {}", + outcome.value.state + ); +} + +#[tokio::test] +async fn inference_prompt_reuses_local_ai_disabled_error() { + let (config, _tmp) = disabled_config(); + let err = inference_prompt(&config, "hello", None, Some(true)) + .await + .expect_err("prompt should fail"); + assert!(err.contains("local ai is disabled")); +} + +#[tokio::test] +async fn inference_summarize_reuses_local_ai_disabled_error() { + let (config, _tmp) = disabled_config(); + let err = inference_summarize(&config, "hello", None) + .await + .expect_err("summarize should fail"); + assert!(err.contains("local ai is disabled")); +} + +#[tokio::test] +async fn inference_embed_reuses_local_ai_disabled_error() { + let (config, _tmp) = disabled_config(); + let err = inference_embed(&config, &["hello".to_string()]) + .await + .expect_err("embed should fail"); + assert!(err.contains("local ai is disabled")); +} + +#[tokio::test] +async fn inference_chat_rejects_empty_messages() { + let (config, _tmp) = disabled_config(); + let err = inference_chat(&config, vec![], None) + .await + .expect_err("chat should fail"); + assert!(err.contains("must not be empty")); +} + +#[tokio::test] +async fn inference_should_react_short_circuits_for_empty_message() { + let (config, _tmp) = disabled_config(); + let outcome = inference_should_react(&config, " ", "web") + .await + .expect("reaction decision"); + assert!(!outcome.value.should_react); + assert!(outcome.value.emoji.is_none()); +} + +#[tokio::test] +async fn inference_analyze_sentiment_handles_empty_message() { + let (config, _tmp) = disabled_config(); + let outcome = inference_analyze_sentiment(&config, " ") + .await + .expect("sentiment"); + assert_eq!(outcome.value.valence, "neutral"); +} + +#[tokio::test] +async fn inference_get_client_config_returns_safe_snapshot() { + let (config, _tmp) = disabled_config(); + config.save().await.expect("save config"); + + let outcome = inference_get_client_config() + .await + .expect("client config snapshot"); + assert!(outcome.value.get("cloud_providers").is_some()); + assert!(outcome.value.get("api_key_set").is_some()); +} + +#[tokio::test] +async fn inference_apply_preset_rejects_invalid_tier() { + let (config, _tmp) = disabled_config(); + config.save().await.expect("save config"); + + let err = inference_apply_preset("ram_bogus") + .await + .expect_err("invalid tier should fail"); + assert!(err.contains("invalid tier")); +} + +#[tokio::test] +async fn inference_presets_returns_recommended_tier() { + let (config, _tmp) = disabled_config(); + config.save().await.expect("save config"); + + let outcome = inference_presets().await.expect("presets"); + assert!(outcome.value.get("recommended_tier").is_some()); + assert!(outcome.value.get("presets").is_some()); +} diff --git a/src/openhuman/local_ai/parse.rs b/src/openhuman/inference/parse.rs similarity index 100% rename from src/openhuman/local_ai/parse.rs rename to src/openhuman/inference/parse.rs diff --git a/src/openhuman/local_ai/paths.rs b/src/openhuman/inference/paths.rs similarity index 98% rename from src/openhuman/local_ai/paths.rs rename to src/openhuman/inference/paths.rs index 2ce7a50576..848cf9f24e 100644 --- a/src/openhuman/local_ai/paths.rs +++ b/src/openhuman/inference/paths.rs @@ -154,7 +154,9 @@ pub(crate) fn resolve_whisper_binary() -> Option { /// `Config` reference (e.g. the bare-process voice STT subprocess code) /// stay compiling without rewiring. pub(crate) fn resolve_whisper_binary_with_config(config: &Config) -> Option { - if let Some(workspace) = super::install_whisper::find_workspace_whisper_binary(config) { + if let Some(workspace) = + crate::openhuman::inference::local::install_whisper::find_workspace_whisper_binary(config) + { return Some(workspace); } resolve_whisper_binary() @@ -203,7 +205,9 @@ pub(crate) fn resolve_piper_binary() -> Option { /// `resolve_whisper_binary_with_config` — workspace install first, env /// second, PATH third. pub(crate) fn resolve_piper_binary_with_config(config: &Config) -> Option { - if let Some(workspace) = super::install_piper::find_workspace_piper_binary(config) { + if let Some(workspace) = + crate::openhuman::inference::local::install_piper::find_workspace_piper_binary(config) + { return Some(workspace); } resolve_piper_binary() @@ -620,7 +624,7 @@ mod tests { /// the existing module-wide guard so all readers/writers go through /// one critical section. fn shared_install_lock() -> std::sync::MutexGuard<'static, ()> { - crate::openhuman::local_ai::local_ai_test_guard() + crate::openhuman::inference::inference_test_guard() } #[test] diff --git a/src/openhuman/local_ai/presets.rs b/src/openhuman/inference/presets.rs similarity index 100% rename from src/openhuman/local_ai/presets.rs rename to src/openhuman/inference/presets.rs diff --git a/src/openhuman/local_ai/presets_tests.rs b/src/openhuman/inference/presets_tests.rs similarity index 100% rename from src/openhuman/local_ai/presets_tests.rs rename to src/openhuman/inference/presets_tests.rs diff --git a/src/openhuman/providers/billing_error.rs b/src/openhuman/inference/provider/billing_error.rs similarity index 100% rename from src/openhuman/providers/billing_error.rs rename to src/openhuman/inference/provider/billing_error.rs diff --git a/src/openhuman/providers/compatible.rs b/src/openhuman/inference/provider/compatible.rs similarity index 96% rename from src/openhuman/providers/compatible.rs rename to src/openhuman/inference/provider/compatible.rs index 1701855230..80d7711023 100644 --- a/src/openhuman/providers/compatible.rs +++ b/src/openhuman/inference/provider/compatible.rs @@ -18,7 +18,7 @@ pub(crate) use compatible_parse::{ #[cfg(test)] pub(crate) use compatible_types::ResponsesResponse; -use crate::openhuman::providers::traits::{ +use crate::openhuman::inference::provider::traits::{ ChatMessage, ChatRequest as ProviderChatRequest, ChatResponse as ProviderChatResponse, Provider, StreamChunk, StreamError, StreamOptions, StreamResult, ToolCall as ProviderToolCall, UsageInfo as ProviderUsageInfo, @@ -66,6 +66,11 @@ pub struct OpenAiCompatibleProvider { /// never see an unrecognized field that could trip strict input /// validation. emit_openhuman_thread_id: bool, + /// Shell-style glob patterns (`*` only) for model IDs that MUST NOT + /// receive a `temperature` field. Matches are done by + /// `temperature::glob_match`. Defaults to empty (all models support + /// temperature); populated by the factory when the config has entries. + pub(crate) temperature_unsupported_models: Vec, } /// How the provider expects the API key to be sent. @@ -165,6 +170,35 @@ impl OpenAiCompatibleProvider { user_agent: user_agent.map(ToString::to_string), merge_system_into_user, emit_openhuman_thread_id: false, + temperature_unsupported_models: Vec::new(), + } + } + + /// Set the list of model glob patterns for which temperature must be + /// omitted from request bodies. Called by the provider factory to + /// propagate `config.temperature_unsupported_models`. + pub fn with_temperature_unsupported_models(mut self, patterns: Vec) -> Self { + self.temperature_unsupported_models = patterns; + self + } + + /// Resolve the effective temperature for `model`. Returns `None` when the + /// model matches a pattern in `temperature_unsupported_models` (causing the + /// field to be omitted from the serialised request). + fn effective_temperature(&self, model: &str, temperature: f64) -> Option { + if self + .temperature_unsupported_models + .iter() + .any(|pat| super::temperature::glob_match(pat, model)) + { + tracing::debug!( + "[provider:{}] model='{}' matched temperature_unsupported_models — omitting temperature", + self.name, + model + ); + None + } else { + Some(temperature) } } @@ -543,7 +577,8 @@ impl OpenAiCompatibleProvider { return messages.to_vec(); } - let instructions = crate::openhuman::providers::traits::build_tool_instructions_text(tools); + let instructions = + crate::openhuman::inference::provider::traits::build_tool_instructions_text(tools); let mut modified_messages = messages.to_vec(); if let Some(system_message) = modified_messages.iter_mut().find(|m| m.role == "system") { @@ -712,7 +747,7 @@ impl OpenAiCompatibleProvider { &self, credential: Option<&str>, native_request: &NativeChatRequest, - delta_tx: &tokio::sync::mpsc::Sender, + delta_tx: &tokio::sync::mpsc::Sender, dump_seq: u64, ) -> anyhow::Result { use futures_util::StreamExt; @@ -858,7 +893,7 @@ impl OpenAiCompatibleProvider { if !content.is_empty() { text_accum.push_str(content); let _ = delta_tx - .send(crate::openhuman::providers::ProviderDelta::TextDelta { + .send(crate::openhuman::inference::provider::ProviderDelta::TextDelta { delta: content.clone(), }) .await; @@ -870,7 +905,7 @@ impl OpenAiCompatibleProvider { thinking_accum.push_str(reasoning); let _ = delta_tx .send( - crate::openhuman::providers::ProviderDelta::ThinkingDelta { + crate::openhuman::inference::provider::ProviderDelta::ThinkingDelta { delta: reasoning.clone(), }, ) @@ -950,7 +985,7 @@ impl OpenAiCompatibleProvider { name, ); let _ = delta_tx - .send(crate::openhuman::providers::ProviderDelta::ToolCallStart { + .send(crate::openhuman::inference::provider::ProviderDelta::ToolCallStart { call_id: id.clone(), tool_name: name.clone(), }) @@ -968,7 +1003,7 @@ impl OpenAiCompatibleProvider { ); let buffered = entry.arguments.clone(); let _ = delta_tx - .send(crate::openhuman::providers::ProviderDelta::ToolCallArgsDelta { + .send(crate::openhuman::inference::provider::ProviderDelta::ToolCallArgsDelta { call_id: id.clone(), delta: buffered, }) @@ -984,7 +1019,7 @@ impl OpenAiCompatibleProvider { let fresh = entry.arguments[entry.emitted_chars..].to_string(); let _ = delta_tx - .send(crate::openhuman::providers::ProviderDelta::ToolCallArgsDelta { + .send(crate::openhuman::inference::provider::ProviderDelta::ToolCallArgsDelta { call_id: id.clone(), delta: fresh, }) @@ -1102,8 +1137,8 @@ impl OpenAiCompatibleProvider { #[async_trait] impl Provider for OpenAiCompatibleProvider { - fn capabilities(&self) -> crate::openhuman::providers::traits::ProviderCapabilities { - crate::openhuman::providers::traits::ProviderCapabilities { + fn capabilities(&self) -> crate::openhuman::inference::provider::traits::ProviderCapabilities { + crate::openhuman::inference::provider::traits::ProviderCapabilities { native_tool_calling: true, vision: false, } @@ -1145,7 +1180,7 @@ impl Provider for OpenAiCompatibleProvider { let request = ApiChatRequest { model: model.to_string(), messages, - temperature, + temperature: self.effective_temperature(model, temperature), stream: Some(false), tools: None, tool_choice: None, @@ -1279,7 +1314,7 @@ impl Provider for OpenAiCompatibleProvider { let request = ApiChatRequest { model: model.to_string(), messages: api_messages, - temperature, + temperature: self.effective_temperature(model, temperature), stream: Some(false), tools: None, tool_choice: None, @@ -1379,7 +1414,7 @@ impl Provider for OpenAiCompatibleProvider { let request = ApiChatRequest { model: model.to_string(), messages: api_messages, - temperature, + temperature: self.effective_temperature(model, temperature), stream: Some(false), tools: if tools.is_empty() { None @@ -1476,7 +1511,7 @@ impl Provider for OpenAiCompatibleProvider { let native_request = NativeChatRequest { model: model.to_string(), messages: Self::convert_messages_for_native(&effective_messages), - temperature, + temperature: self.effective_temperature(model, temperature), stream: Some(true), tool_choice: tools.as_ref().map(|_| "auto".to_string()), tools: tools.clone(), @@ -1518,7 +1553,7 @@ impl Provider for OpenAiCompatibleProvider { let native_request = NativeChatRequest { model: model.to_string(), messages: Self::convert_messages_for_native(&effective_messages), - temperature, + temperature: self.effective_temperature(model, temperature), stream: Some(false), tool_choice: tools.as_ref().map(|_| "auto".to_string()), tools, @@ -1669,7 +1704,7 @@ impl Provider for OpenAiCompatibleProvider { let request = ApiChatRequest { model: model.to_string(), messages, - temperature, + temperature: self.effective_temperature(model, temperature), stream: Some(options.enabled), tools: None, tool_choice: None, diff --git a/src/openhuman/providers/compatible_dump.rs b/src/openhuman/inference/provider/compatible_dump.rs similarity index 100% rename from src/openhuman/providers/compatible_dump.rs rename to src/openhuman/inference/provider/compatible_dump.rs diff --git a/src/openhuman/providers/compatible_parse.rs b/src/openhuman/inference/provider/compatible_parse.rs similarity index 98% rename from src/openhuman/providers/compatible_parse.rs rename to src/openhuman/inference/provider/compatible_parse.rs index 1d8e070e45..086482e31e 100644 --- a/src/openhuman/providers/compatible_parse.rs +++ b/src/openhuman/inference/provider/compatible_parse.rs @@ -3,7 +3,7 @@ //! All functions here are stateless transforms — no I/O, no HTTP. They take //! raw strings or deserialized values and return structured results. -use crate::openhuman::providers::traits::{ +use crate::openhuman::inference::provider::traits::{ ChatMessage, StreamError, StreamResult, ToolCall as ProviderToolCall, }; @@ -82,7 +82,7 @@ pub(crate) fn parse_sse_line(line: &str) -> StreamResult> { pub(crate) fn compact_sanitized_body_snippet(body: &str) -> String { // super = compatible module; super::super = providers module (where sanitize_api_error lives) - super::super::sanitize_api_error(body) + crate::openhuman::inference::provider::sanitize_api_error(body) .split_whitespace() .collect::>() .join(" ") diff --git a/src/openhuman/providers/compatible_stream.rs b/src/openhuman/inference/provider/compatible_stream.rs similarity index 97% rename from src/openhuman/providers/compatible_stream.rs rename to src/openhuman/inference/provider/compatible_stream.rs index f19c53a0b7..4209e13571 100644 --- a/src/openhuman/providers/compatible_stream.rs +++ b/src/openhuman/inference/provider/compatible_stream.rs @@ -3,7 +3,7 @@ //! Converts a raw `reqwest::Response` byte stream into a typed //! `StreamChunk` stream via Server-Sent Events parsing. -use crate::openhuman::providers::traits::{StreamChunk, StreamError, StreamResult}; +use crate::openhuman::inference::provider::traits::{StreamChunk, StreamError, StreamResult}; use futures_util::{stream, StreamExt}; use super::compatible_parse::parse_sse_line; diff --git a/src/openhuman/providers/compatible_tests.rs b/src/openhuman/inference/provider/compatible_tests.rs similarity index 99% rename from src/openhuman/providers/compatible_tests.rs rename to src/openhuman/inference/provider/compatible_tests.rs index a8e43d4e0f..fb242db30e 100644 --- a/src/openhuman/providers/compatible_tests.rs +++ b/src/openhuman/inference/provider/compatible_tests.rs @@ -55,7 +55,7 @@ fn native_request_emits_thread_id_when_present() { let req = super::NativeChatRequest { model: "sonnet".to_string(), messages: Vec::new(), - temperature: 0.7, + temperature: Some(0.7), stream: Some(false), tools: None, tool_choice: None, @@ -72,7 +72,7 @@ fn native_request_emits_thread_id_when_present() { let req_no_thread = super::NativeChatRequest { model: "sonnet".to_string(), messages: Vec::new(), - temperature: 0.7, + temperature: Some(0.7), stream: Some(false), tools: None, tool_choice: None, @@ -96,7 +96,7 @@ fn streaming_request_sets_stream_options_include_usage() { let req = super::NativeChatRequest { model: "sonnet".to_string(), messages: Vec::new(), - temperature: 0.0, + temperature: Some(0.0), stream: Some(true), tools: None, tool_choice: None, @@ -119,7 +119,7 @@ fn non_streaming_request_omits_stream_options() { let req = super::NativeChatRequest { model: "sonnet".to_string(), messages: Vec::new(), - temperature: 0.0, + temperature: Some(0.0), stream: Some(false), tools: None, tool_choice: None, @@ -135,7 +135,7 @@ fn non_streaming_request_omits_stream_options() { #[tokio::test] async fn outbound_thread_id_is_gated_per_provider() { - use crate::openhuman::providers::thread_context::with_thread_id; + use crate::openhuman::inference::provider::thread_context::with_thread_id; let third_party = make_provider("Venice", "https://api.venice.ai", None); let openhuman = @@ -171,7 +171,7 @@ fn request_serializes_correctly() { content: "hello".to_string(), }, ], - temperature: 0.4, + temperature: Some(0.4), stream: Some(false), tools: None, tool_choice: None, @@ -771,7 +771,7 @@ fn request_serializes_with_tools() { role: "user".to_string(), content: "What is the weather?".to_string(), }], - temperature: 0.7, + temperature: Some(0.7), stream: Some(false), tools: Some(tools), tool_choice: Some("auto".to_string()), diff --git a/src/openhuman/providers/compatible_types.rs b/src/openhuman/inference/provider/compatible_types.rs similarity index 97% rename from src/openhuman/providers/compatible_types.rs rename to src/openhuman/inference/provider/compatible_types.rs index 87f0ac0631..b956b1b83d 100644 --- a/src/openhuman/providers/compatible_types.rs +++ b/src/openhuman/inference/provider/compatible_types.rs @@ -12,7 +12,8 @@ use serde::{Deserialize, Serialize}; pub(crate) struct ApiChatRequest { pub(crate) model: String, pub(crate) messages: Vec, - pub(crate) temperature: f64, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) temperature: Option, #[serde(skip_serializing_if = "Option::is_none")] pub(crate) stream: Option, #[serde(skip_serializing_if = "Option::is_none")] @@ -31,7 +32,8 @@ pub(crate) struct Message { pub(crate) struct NativeChatRequest { pub(crate) model: String, pub(crate) messages: Vec, - pub(crate) temperature: f64, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) temperature: Option, #[serde(skip_serializing_if = "Option::is_none")] pub(crate) stream: Option, #[serde(skip_serializing_if = "Option::is_none")] @@ -44,7 +46,7 @@ pub(crate) struct NativeChatRequest { /// when serialising for vanilla OpenAI-compatible providers that /// don't recognise it (most reject only unknown *required* fields, /// but emitting it here is gated on the ambient task-local being - /// set — see `crate::openhuman::providers::thread_context`). + /// set — see `crate::openhuman::inference::provider::thread_context`). #[serde(skip_serializing_if = "Option::is_none")] pub(crate) thread_id: Option, /// OpenAI streaming `stream_options`. Set to `{"include_usage": true}` @@ -59,7 +61,7 @@ pub(crate) struct NativeChatRequest { } /// OpenAI-spec `stream_options` payload (sent on the wire). Distinct from -/// `crate::openhuman::providers::traits::StreamOptions`, which is the +/// `crate::openhuman::inference::provider::traits::StreamOptions`, which is the /// caller-side knob set on `ChatRequest` to toggle agent streaming. #[derive(Debug, Serialize)] pub(crate) struct OpenAiStreamOptions { diff --git a/src/openhuman/providers/factory.rs b/src/openhuman/inference/provider/factory.rs similarity index 88% rename from src/openhuman/providers/factory.rs rename to src/openhuman/inference/provider/factory.rs index 7f10cb094d..08522bda03 100644 --- a/src/openhuman/providers/factory.rs +++ b/src/openhuman/inference/provider/factory.rs @@ -20,12 +20,12 @@ use crate::openhuman::config::schema::cloud_providers::AuthStyle; use crate::openhuman::config::Config; use crate::openhuman::credentials::AuthService; -use crate::openhuman::providers::compatible::{ +use crate::openhuman::inference::provider::compatible::{ AuthStyle as CompatAuthStyle, OpenAiCompatibleProvider, }; -use crate::openhuman::providers::openhuman_backend::OpenHumanBackendProvider; -use crate::openhuman::providers::traits::Provider; -use crate::openhuman::providers::ProviderRuntimeOptions; +use crate::openhuman::inference::provider::openhuman_backend::OpenHumanBackendProvider; +use crate::openhuman::inference::provider::traits::Provider; +use crate::openhuman::inference::provider::ProviderRuntimeOptions; /// Sentinel meaning "use the OpenHuman backend session JWT". pub const PROVIDER_OPENHUMAN: &str = "openhuman"; @@ -213,7 +213,12 @@ fn make_ollama_provider( model, redact_endpoint(&endpoint) ); - let p = make_openai_compatible_provider(&endpoint, "", CompatAuthStyle::None)?; + let p = make_openai_compatible_provider_with_config( + &endpoint, + "", + CompatAuthStyle::None, + &config.temperature_unsupported_models, + )?; Ok((p, model.to_string())) } @@ -260,10 +265,15 @@ fn make_cloud_provider_by_slug( let key = lookup_key_for_slug(slug, config)?; + let unsupported = &config.temperature_unsupported_models; match entry.auth_style { AuthStyle::Anthropic => { - let p = - make_openai_compatible_provider(&entry.endpoint, &key, CompatAuthStyle::Anthropic)?; + let p = make_openai_compatible_provider_with_config( + &entry.endpoint, + &key, + CompatAuthStyle::Anthropic, + unsupported, + )?; Ok((p, effective_model)) } AuthStyle::OpenhumanJwt => { @@ -276,12 +286,21 @@ fn make_cloud_provider_by_slug( make_openhuman_backend(config) } AuthStyle::None => { - let p = make_openai_compatible_provider(&entry.endpoint, "", CompatAuthStyle::None)?; + let p = make_openai_compatible_provider_with_config( + &entry.endpoint, + "", + CompatAuthStyle::None, + unsupported, + )?; Ok((p, effective_model)) } AuthStyle::Bearer => { - let p = - make_openai_compatible_provider(&entry.endpoint, &key, CompatAuthStyle::Bearer)?; + let p = make_openai_compatible_provider_with_config( + &entry.endpoint, + &key, + CompatAuthStyle::Bearer, + unsupported, + )?; Ok((p, effective_model)) } } @@ -331,15 +350,27 @@ fn make_openai_compatible_provider( endpoint: &str, api_key: &str, auth_style: CompatAuthStyle, +) -> anyhow::Result> { + make_openai_compatible_provider_with_config(endpoint, api_key, auth_style, &[]) +} + +/// Build an `OpenAiCompatibleProvider` with auth style and temperature +/// suppression list from config. +fn make_openai_compatible_provider_with_config( + endpoint: &str, + api_key: &str, + auth_style: CompatAuthStyle, + temperature_unsupported_models: &[String], ) -> anyhow::Result> { let key = if api_key.trim().is_empty() { None } else { Some(api_key) }; - Ok(Box::new(OpenAiCompatibleProvider::new( - "cloud", endpoint, key, auth_style, - ))) + Ok(Box::new( + OpenAiCompatibleProvider::new("cloud", endpoint, key, auth_style) + .with_temperature_unsupported_models(temperature_unsupported_models.to_vec()), + )) } /// Return a safe-to-log representation of a URL endpoint: `scheme://host` only. diff --git a/src/openhuman/providers/factory_test.rs b/src/openhuman/inference/provider/factory_test.rs similarity index 100% rename from src/openhuman/providers/factory_test.rs rename to src/openhuman/inference/provider/factory_test.rs diff --git a/src/openhuman/providers/mod.rs b/src/openhuman/inference/provider/mod.rs similarity index 55% rename from src/openhuman/providers/mod.rs rename to src/openhuman/inference/provider/mod.rs index bd18f3dad5..e98b51659b 100644 --- a/src/openhuman/providers/mod.rs +++ b/src/openhuman/inference/provider/mod.rs @@ -1,11 +1,22 @@ +//! Unified provider abstraction — cloud + local chat, embedding, and streaming. +//! +//! This module was previously `src/openhuman/providers/`. It now lives under +//! `inference/provider/` so all inference concerns (local runtime, cloud +//! providers, HTTP endpoint) share a single domain root. + pub mod billing_error; pub mod compatible; +pub mod compatible_dump; +pub mod compatible_parse; +pub mod compatible_stream; +pub mod compatible_types; pub mod factory; pub mod openhuman_backend; pub mod ops; pub mod reliable; pub mod router; pub mod schemas; +pub mod temperature; pub mod thread_context; pub mod traits; @@ -18,7 +29,3 @@ pub use traits::{ pub use billing_error::is_budget_exhausted_message; pub use factory::{create_chat_provider, provider_for_role}; pub use ops::*; -pub use schemas::{ - all_controller_schemas as all_providers_controller_schemas, - all_registered_controllers as all_providers_registered_controllers, -}; diff --git a/src/openhuman/providers/openhuman_backend.rs b/src/openhuman/inference/provider/openhuman_backend.rs similarity index 100% rename from src/openhuman/providers/openhuman_backend.rs rename to src/openhuman/inference/provider/openhuman_backend.rs diff --git a/src/openhuman/providers/ops.rs b/src/openhuman/inference/provider/ops.rs similarity index 83% rename from src/openhuman/providers/ops.rs rename to src/openhuman/inference/provider/ops.rs index 4a06d50c87..cb3533de8a 100644 --- a/src/openhuman/providers/ops.rs +++ b/src/openhuman/inference/provider/ops.rs @@ -1,5 +1,6 @@ use super::*; +use serde::Serialize; use std::path::PathBuf; const MAX_API_ERROR_CHARS: usize = 200; @@ -15,6 +16,143 @@ pub struct ProviderRuntimeOptions { pub reasoning_enabled: Option, } +#[derive(Debug, Serialize)] +pub struct ModelInfo { + pub id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub owned_by: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub context_window: Option, +} + +pub async fn list_configured_models( + provider_id: &str, +) -> Result, String> { + let provider_id = provider_id.trim().to_string(); + if provider_id.is_empty() { + return Err("provider_id must not be empty".to_string()); + } + + log::debug!("[providers][list_models] provider_id={}", provider_id); + + let config = crate::openhuman::config::Config::load_or_init() + .await + .map_err(|e| e.to_string())?; + + let entry = config + .cloud_providers + .iter() + .find(|e| e.id == provider_id) + .cloned() + .ok_or_else(|| format!("no cloud provider with id '{}' found", provider_id))?; + + let base = entry.endpoint.trim_end_matches('/'); + let models_url = format!("{}/models", base); + + log::debug!( + "[providers][list_models] fetching url={} slug={}", + models_url, + entry.slug + ); + + let api_key = + crate::openhuman::inference::provider::factory::lookup_key_for_slug(&entry.slug, &config) + .unwrap_or_default(); + + let client = crate::openhuman::config::build_runtime_proxy_client_with_timeouts( + "providers.list_models", + 30, + 10, + ); + + let mut request = client.get(&models_url); + + use crate::openhuman::config::schema::cloud_providers::AuthStyle; + request = match entry.auth_style { + AuthStyle::Bearer => { + if !api_key.is_empty() { + request.header("Authorization", format!("Bearer {}", api_key)) + } else { + request + } + } + AuthStyle::Anthropic => { + let mut r = request.header("anthropic-version", "2023-06-01"); + if !api_key.is_empty() { + r = r.header("x-api-key", &api_key); + } + r + } + AuthStyle::OpenhumanJwt => { + if !api_key.is_empty() { + request.header("Authorization", format!("Bearer {}", api_key)) + } else { + request + } + } + AuthStyle::None => request, + }; + + let response = request + .send() + .await + .map_err(|e| format!("[providers][list_models] HTTP request failed: {}", e))?; + + let status = response.status(); + if !status.is_success() { + let body = response.text().await.unwrap_or_default(); + let sanitized = sanitize_api_error(&body); + let truncated = crate::openhuman::util::truncate_with_ellipsis(&sanitized, 300); + return Err(format!( + "provider returned {}: {}", + status.as_u16(), + truncated + )); + } + + let body: serde_json::Value = response + .json() + .await + .map_err(|e| format!("[providers][list_models] failed to parse JSON: {}", e))?; + + let data = body + .get("data") + .and_then(|d| d.as_array()) + .cloned() + .unwrap_or_default(); + + let models: Vec = data + .iter() + .filter_map(|item| { + let id = item.get("id")?.as_str()?.to_string(); + let owned_by = item + .get("owned_by") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + let context_window = item + .get("context_length") + .or_else(|| item.get("context_window")) + .and_then(|v| v.as_u64()); + Some(ModelInfo { + id, + owned_by, + context_window, + }) + }) + .collect(); + + log::info!( + "[providers][list_models] slug={} fetched {} models", + entry.slug, + models.len() + ); + + Ok(crate::rpc::RpcOutcome::new( + serde_json::json!({ "models": models }), + vec![format!("fetched {} models", models.len())], + )) +} + impl Default for ProviderRuntimeOptions { fn default() -> Self { Self { @@ -246,11 +384,11 @@ pub fn create_backend_inference_provider( key.len() ); Ok(Box::new( - crate::openhuman::providers::compatible::OpenAiCompatibleProvider::new( + crate::openhuman::inference::provider::compatible::OpenAiCompatibleProvider::new( "custom_openai", url, Some(key), - crate::openhuman::providers::compatible::AuthStyle::Bearer, + crate::openhuman::inference::provider::compatible::AuthStyle::Bearer, ), )) } else { diff --git a/src/openhuman/providers/reliable.rs b/src/openhuman/inference/provider/reliable.rs similarity index 100% rename from src/openhuman/providers/reliable.rs rename to src/openhuman/inference/provider/reliable.rs diff --git a/src/openhuman/providers/reliable_tests.rs b/src/openhuman/inference/provider/reliable_tests.rs similarity index 100% rename from src/openhuman/providers/reliable_tests.rs rename to src/openhuman/inference/provider/reliable_tests.rs diff --git a/src/openhuman/providers/router.rs b/src/openhuman/inference/provider/router.rs similarity index 100% rename from src/openhuman/providers/router.rs rename to src/openhuman/inference/provider/router.rs diff --git a/src/openhuman/providers/router_test.rs b/src/openhuman/inference/provider/router_test.rs similarity index 100% rename from src/openhuman/providers/router_test.rs rename to src/openhuman/inference/provider/router_test.rs diff --git a/src/openhuman/inference/provider/schemas.rs b/src/openhuman/inference/provider/schemas.rs new file mode 100644 index 0000000000..fd81d65b53 --- /dev/null +++ b/src/openhuman/inference/provider/schemas.rs @@ -0,0 +1,77 @@ +//! RPC controller schemas for the providers domain. +//! +//! Exposes `openhuman.providers_list_models` — fetches the `/models` endpoint +//! of a configured cloud provider and returns the list. + +use crate::core::all::{ControllerFuture, RegisteredController}; +use crate::core::{ControllerSchema, FieldSchema, TypeSchema}; +use serde::Deserialize; +use serde_json::{Map, Value}; + +// ── Helpers ────────────────────────────────────────────────────────────────── + +fn to_json(outcome: crate::rpc::RpcOutcome) -> Result { + outcome.into_cli_compatible_json() +} + +fn deserialize_params Deserialize<'de>>( + params: Map, +) -> Result { + serde_json::from_value(Value::Object(params)).map_err(|e| e.to_string()) +} + +// ── Schema catalog ──────────────────────────────────────────────────────────── + +pub fn all_controller_schemas() -> Vec { + vec![list_models_schema()] +} + +pub fn all_registered_controllers() -> Vec { + vec![RegisteredController { + schema: list_models_schema(), + handler: handle_list_models, + }] +} + +fn list_models_schema() -> ControllerSchema { + ControllerSchema { + namespace: "providers", + function: "list_models", + description: "Fetch the available model list from a configured cloud provider's /models API.", + inputs: vec![ + FieldSchema { + name: "provider_id", + ty: TypeSchema::String, + comment: "Opaque id of the cloud_providers entry to query.", + required: true, + }, + ], + outputs: vec![ + FieldSchema { + name: "models", + ty: TypeSchema::Json, + comment: "Array of { id, owned_by?, context_window? } model descriptors returned by the provider.", + required: true, + }, + ], + } +} + +// ── Request / response types ────────────────────────────────────────────────── + +#[derive(Debug, Deserialize)] +struct ListModelsRequest { + provider_id: String, +} + +// ── Handler ─────────────────────────────────────────────────────────────────── + +fn handle_list_models(params: Map) -> ControllerFuture { + Box::pin(async move { + let req: ListModelsRequest = deserialize_params(params)?; + to_json( + crate::openhuman::inference::provider::ops::list_configured_models(&req.provider_id) + .await?, + ) + }) +} diff --git a/src/openhuman/inference/provider/temperature.rs b/src/openhuman/inference/provider/temperature.rs new file mode 100644 index 0000000000..40b088fda1 --- /dev/null +++ b/src/openhuman/inference/provider/temperature.rs @@ -0,0 +1,200 @@ +//! Per-model temperature suppression helpers. +//! +//! Some models (OpenAI o-series, GPT-5 reasoning variants) reject the +//! `temperature` field in the request body and return an error when it is +//! present. `temperature_for_model` consults the config's +//! `temperature_unsupported_models` list (which accepts shell-style `*` +//! globs) and returns `None` when the model matches, causing the +//! serialisation layer to omit the field via `skip_serializing_if`. + +use crate::openhuman::config::Config; + +/// Returns the effective temperature for `model`, or `None` if the model +/// is listed in `config.temperature_unsupported_models`. +/// +/// The list entries support shell-style `*` wildcard matching (no `?` or +/// `[]`). Matching is case-sensitive and done against the full model ID. +/// +/// # Examples +/// +/// ``` +/// // model "o1-preview" matches pattern "o1*" → None +/// // model "gpt-4o-mini" matches no pattern → Some(0.7) +/// ``` +pub fn temperature_for_model(model: &str, default: f64, config: &Config) -> Option { + if config + .temperature_unsupported_models + .iter() + .any(|pat| glob_match(pat, model)) + { + tracing::debug!( + "[inference][temperature] model='{}' matched unsupported-temperature list — omitting temperature field", + model + ); + None + } else { + Some(default) + } +} + +/// Minimal shell-style glob matcher supporting only `*` (match any sequence +/// of characters, including empty). Does not support `?` or `[...]`. +/// +/// This avoids pulling in the `glob` crate for what is effectively a +/// starts-with / ends-with / contains check. +pub fn glob_match(pattern: &str, text: &str) -> bool { + // Split on `*` and consume the text segment by segment. + let parts: Vec<&str> = pattern.split('*').collect(); + + if parts.is_empty() { + // Pattern is purely `*` — matches everything. + return true; + } + + let mut remaining = text; + + for (i, part) in parts.iter().enumerate() { + if part.is_empty() { + // Consecutive stars or leading/trailing star — skip. + continue; + } + + if i == 0 { + // First segment: must match the start of `text`. + if !remaining.starts_with(part) { + return false; + } + remaining = &remaining[part.len()..]; + } else { + // Middle or last segment: find first occurrence in `remaining`. + match remaining.find(part) { + Some(pos) => { + remaining = &remaining[pos + part.len()..]; + } + None => return false, + } + } + } + + // If the pattern did NOT end with `*`, the remaining text must be empty. + if !pattern.ends_with('*') && !remaining.is_empty() { + return false; + } + + true +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::openhuman::config::Config; + + // ── glob_match unit tests ───────────────────────────────────────────────── + + #[test] + fn glob_exact_match() { + assert!(glob_match("o1-preview", "o1-preview")); + } + + #[test] + fn glob_prefix_star() { + assert!(glob_match("o1*", "o1-preview")); + assert!(glob_match("o1*", "o1-mini")); + assert!(glob_match("o1*", "o1")); + assert!(!glob_match("o1*", "gpt-4o")); + } + + #[test] + fn glob_suffix_star() { + assert!(glob_match("*mini", "gpt-4o-mini")); + assert!(!glob_match("*mini", "gpt-4o-large")); + } + + #[test] + fn glob_contains_star() { + assert!(glob_match("gpt*mini", "gpt-4o-mini")); + assert!(!glob_match("gpt*mini", "gpt-4o-large")); + } + + #[test] + fn glob_pure_star() { + assert!(glob_match("*", "anything")); + assert!(glob_match("*", "")); + } + + #[test] + fn glob_no_star_mismatch() { + assert!(!glob_match("o1", "o1-preview")); + assert!(glob_match("o1", "o1")); + } + + #[test] + fn glob_gpt5_pattern() { + assert!(glob_match("gpt-5*", "gpt-5")); + assert!(glob_match("gpt-5*", "gpt-5-turbo")); + assert!(!glob_match("gpt-5*", "gpt-4o")); + } + + // ── temperature_for_model tests ─────────────────────────────────────────── + + fn config_with_unsupported(patterns: Vec) -> Config { + let mut config = Config::default(); + config.temperature_unsupported_models = patterns; + config + } + + #[test] + fn temperature_returned_for_normal_model() { + let config = Config::default(); // has ["o1*","o3*","o4*","gpt-5*"] by default + assert_eq!( + temperature_for_model("gpt-4o-mini", 0.7, &config), + Some(0.7) + ); + assert_eq!( + temperature_for_model("claude-3-opus", 0.5, &config), + Some(0.5) + ); + } + + #[test] + fn temperature_suppressed_for_o1_model() { + let config = Config::default(); + assert_eq!(temperature_for_model("o1-preview", 0.7, &config), None); + assert_eq!(temperature_for_model("o1-mini", 0.7, &config), None); + assert_eq!(temperature_for_model("o1", 0.7, &config), None); + } + + #[test] + fn temperature_suppressed_for_o3_o4() { + let config = Config::default(); + assert_eq!(temperature_for_model("o3", 0.7, &config), None); + assert_eq!(temperature_for_model("o3-mini", 0.7, &config), None); + assert_eq!(temperature_for_model("o4-mini", 0.7, &config), None); + } + + #[test] + fn temperature_suppressed_for_gpt5() { + let config = Config::default(); + assert_eq!(temperature_for_model("gpt-5", 0.7, &config), None); + assert_eq!(temperature_for_model("gpt-5-turbo", 0.7, &config), None); + } + + #[test] + fn temperature_uses_custom_unsupported_list() { + let config = config_with_unsupported(vec!["custom-*".to_string()]); + assert_eq!(temperature_for_model("custom-model", 0.7, &config), None); + assert_eq!( + temperature_for_model("gpt-4o-mini", 0.7, &config), + Some(0.7) + ); + // Default patterns no longer apply when list is replaced. + assert_eq!(temperature_for_model("o1-preview", 0.7, &config), Some(0.7)); + } + + #[test] + fn temperature_empty_list_always_returns_some() { + let config = config_with_unsupported(vec![]); + assert_eq!(temperature_for_model("o1-preview", 0.7, &config), Some(0.7)); + assert_eq!(temperature_for_model("gpt-5", 0.3, &config), Some(0.3)); + } +} diff --git a/src/openhuman/providers/thread_context.rs b/src/openhuman/inference/provider/thread_context.rs similarity index 97% rename from src/openhuman/providers/thread_context.rs rename to src/openhuman/inference/provider/thread_context.rs index b70b25b4fb..c8e23088e3 100644 --- a/src/openhuman/providers/thread_context.rs +++ b/src/openhuman/inference/provider/thread_context.rs @@ -14,7 +14,7 @@ //! it. //! //! ```ignore -//! use crate::openhuman::providers::thread_context::{with_thread_id, current_thread_id}; +//! use crate::openhuman::inference::provider::thread_context::{with_thread_id, current_thread_id}; //! //! with_thread_id("abc123", async { //! // any provider.chat() call inside this future sees thread_id=Some("abc123") diff --git a/src/openhuman/providers/traits.rs b/src/openhuman/inference/provider/traits.rs similarity index 100% rename from src/openhuman/providers/traits.rs rename to src/openhuman/inference/provider/traits.rs diff --git a/src/openhuman/providers/traits_tests.rs b/src/openhuman/inference/provider/traits_tests.rs similarity index 100% rename from src/openhuman/providers/traits_tests.rs rename to src/openhuman/inference/provider/traits_tests.rs diff --git a/src/openhuman/inference/schemas.rs b/src/openhuman/inference/schemas.rs new file mode 100644 index 0000000000..7db5f85f71 --- /dev/null +++ b/src/openhuman/inference/schemas.rs @@ -0,0 +1,720 @@ +use serde::de::DeserializeOwned; +use serde::Deserialize; +use serde_json::{Map, Value}; + +use crate::core::all::{ControllerFuture, RegisteredController}; +use crate::core::{ControllerSchema, FieldSchema, TypeSchema}; +use crate::openhuman::config::rpc as config_rpc; +use crate::rpc::RpcOutcome; + +#[derive(Debug, Deserialize)] +struct InferenceSummarizeParams { + text: String, + max_tokens: Option, +} + +#[derive(Debug, Deserialize)] +struct InferencePromptParams { + prompt: String, + max_tokens: Option, + no_think: Option, +} + +#[derive(Debug, Deserialize)] +struct InferenceVisionPromptParams { + prompt: String, + image_refs: Vec, + max_tokens: Option, +} + +#[derive(Debug, Deserialize)] +struct InferenceEmbedParams { + inputs: Vec, +} + +#[derive(Debug, Deserialize)] +struct InferenceChatMessageParam { + role: String, + content: String, +} + +#[derive(Debug, Deserialize)] +struct InferenceChatParams { + messages: Vec, + max_tokens: Option, +} + +#[derive(Debug, Deserialize)] +struct InferenceShouldReactParams { + message: String, + channel_type: String, +} + +#[derive(Debug, Deserialize)] +struct InferenceAnalyzeSentimentParams { + message: String, +} + +#[derive(Debug, Deserialize)] +struct InferenceModelRouteUpdate { + hint: String, + model: String, +} + +#[derive(Debug, Deserialize)] +struct InferenceCloudProviderUpdate { + id: Option, + slug: String, + #[serde(default)] + label: Option, + endpoint: String, + #[serde(default)] + auth_style: Option, + #[serde(rename = "type", default)] + legacy_type: Option, + #[serde(default)] + default_model: Option, +} + +#[derive(Debug, Deserialize)] +struct InferenceUpdateModelSettingsParams { + api_url: Option, + inference_url: Option, + api_key: Option, + default_model: Option, + default_temperature: Option, + model_routes: Option>, + cloud_providers: Option>, + primary_cloud: Option, + reasoning_provider: Option, + agentic_provider: Option, + coding_provider: Option, + memory_provider: Option, + embeddings_provider: Option, + heartbeat_provider: Option, + learning_provider: Option, + subconscious_provider: Option, +} + +#[derive(Debug, Deserialize)] +struct InferenceUpdateLocalSettingsParams { + runtime_enabled: Option, + opt_in_confirmed: Option, + provider: Option, + base_url: Option, + model_id: Option, + chat_model_id: Option, + usage_embeddings: Option, + usage_heartbeat: Option, + usage_learning_reflection: Option, + usage_subconscious: Option, +} + +#[derive(Debug, Deserialize)] +struct InferenceListModelsParams { + provider_id: String, +} + +#[derive(Debug, Deserialize)] +struct InferenceApplyPresetParams { + tier: String, +} + +pub fn all_controller_schemas() -> Vec { + vec![ + schemas("status"), + schemas("get_client_config"), + schemas("update_model_settings"), + schemas("update_local_settings"), + schemas("list_models"), + schemas("device_profile"), + schemas("presets"), + schemas("apply_preset"), + schemas("diagnostics"), + schemas("summarize"), + schemas("prompt"), + schemas("vision_prompt"), + schemas("embed"), + schemas("chat"), + schemas("should_react"), + schemas("analyze_sentiment"), + ] +} + +pub fn all_registered_controllers() -> Vec { + vec![ + RegisteredController { + schema: schemas("status"), + handler: handle_inference_status, + }, + RegisteredController { + schema: schemas("get_client_config"), + handler: handle_inference_get_client_config, + }, + RegisteredController { + schema: schemas("update_model_settings"), + handler: handle_inference_update_model_settings, + }, + RegisteredController { + schema: schemas("update_local_settings"), + handler: handle_inference_update_local_settings, + }, + RegisteredController { + schema: schemas("list_models"), + handler: handle_inference_list_models, + }, + RegisteredController { + schema: schemas("device_profile"), + handler: handle_inference_device_profile, + }, + RegisteredController { + schema: schemas("presets"), + handler: handle_inference_presets, + }, + RegisteredController { + schema: schemas("apply_preset"), + handler: handle_inference_apply_preset, + }, + RegisteredController { + schema: schemas("diagnostics"), + handler: handle_inference_diagnostics, + }, + RegisteredController { + schema: schemas("summarize"), + handler: handle_inference_summarize, + }, + RegisteredController { + schema: schemas("prompt"), + handler: handle_inference_prompt, + }, + RegisteredController { + schema: schemas("vision_prompt"), + handler: handle_inference_vision_prompt, + }, + RegisteredController { + schema: schemas("embed"), + handler: handle_inference_embed, + }, + RegisteredController { + schema: schemas("chat"), + handler: handle_inference_chat, + }, + RegisteredController { + schema: schemas("should_react"), + handler: handle_inference_should_react, + }, + RegisteredController { + schema: schemas("analyze_sentiment"), + handler: handle_inference_analyze_sentiment, + }, + ] +} + +pub fn schemas(function: &str) -> ControllerSchema { + match function { + "status" => ControllerSchema { + namespace: "inference", + function: "status", + description: "Read inference service status.", + inputs: vec![], + outputs: vec![json_output("status", "Inference status payload.")], + }, + "get_client_config" => ControllerSchema { + namespace: "inference", + function: "get_client_config", + description: "Read the client-facing inference/provider config used by the AI settings UI.", + inputs: vec![], + outputs: vec![json_output("config", "Client-facing inference config payload.")], + }, + "update_model_settings" => ControllerSchema { + namespace: "inference", + function: "update_model_settings", + description: "Persist cloud-provider routing, custom inference endpoint, and per-workload provider settings.", + inputs: vec![ + optional_string("api_url", "Optional OpenHuman product backend URL."), + optional_string("inference_url", "Optional custom inference base URL."), + optional_string("api_key", "Optional API key for a custom inference endpoint."), + optional_string("default_model", "Optional default model override."), + optional_f64("default_temperature", "Optional default temperature override."), + optional_json("model_routes", "Optional full replacement for legacy model routes."), + optional_json("cloud_providers", "Optional full replacement for configured cloud providers."), + optional_string("primary_cloud", "Optional primary cloud provider id."), + optional_string("reasoning_provider", "Optional reasoning workload provider string."), + optional_string("agentic_provider", "Optional agentic workload provider string."), + optional_string("coding_provider", "Optional coding workload provider string."), + optional_string("memory_provider", "Optional memory workload provider string."), + optional_string("embeddings_provider", "Optional embeddings workload provider string."), + optional_string("heartbeat_provider", "Optional heartbeat workload provider string."), + optional_string("learning_provider", "Optional learning workload provider string."), + optional_string("subconscious_provider", "Optional subconscious workload provider string."), + ], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "update_local_settings" => ControllerSchema { + namespace: "inference", + function: "update_local_settings", + description: "Persist local inference provider selection, endpoint URL, and local-runtime routing flags.", + inputs: vec![ + optional_bool("runtime_enabled", "Enable or disable local inference runtime routing."), + optional_bool("opt_in_confirmed", "Persist the local inference opt-in flag."), + optional_string("provider", "Optional local provider slug, e.g. ollama or lm_studio."), + optional_string("base_url", "Optional local provider base URL."), + optional_string("model_id", "Optional generic model id override."), + optional_string("chat_model_id", "Optional chat model id override."), + optional_bool("usage_embeddings", "Whether embeddings workload may use the local provider."), + optional_bool("usage_heartbeat", "Whether heartbeat workload may use the local provider."), + optional_bool("usage_learning_reflection", "Whether learning reflection workload may use the local provider."), + optional_bool("usage_subconscious", "Whether subconscious workload may use the local provider."), + ], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "list_models" => ControllerSchema { + namespace: "inference", + function: "list_models", + description: "Fetch the available model list from a configured inference provider's /models API.", + inputs: vec![required_string("provider_id", "Opaque id of the cloud provider entry to query.")], + outputs: vec![json_output("models", "Provider model list payload.")], + }, + "device_profile" => ControllerSchema { + namespace: "inference", + function: "device_profile", + description: "Detect the local hardware profile used for local inference recommendations.", + inputs: vec![], + outputs: vec![json_output("profile", "Device hardware profile.")], + }, + "presets" => ControllerSchema { + namespace: "inference", + function: "presets", + description: "List local inference model presets with recommendation and current selection.", + inputs: vec![], + outputs: vec![json_output("presets", "Inference preset payload.")], + }, + "apply_preset" => ControllerSchema { + namespace: "inference", + function: "apply_preset", + description: "Apply a local inference preset to the persisted config.", + inputs: vec![required_string("tier", "Tier to apply: ram_2_4gb or disabled.")], + outputs: vec![json_output("result", "Applied preset payload.")], + }, + "diagnostics" => ControllerSchema { + namespace: "inference", + function: "diagnostics", + description: "Run diagnostics for the configured local inference provider endpoint and expected models.", + inputs: vec![], + outputs: vec![json_output("diagnostics", "Inference diagnostics payload.")], + }, + "summarize" => ControllerSchema { + namespace: "inference", + function: "summarize", + description: "Summarize text with the configured inference provider.", + inputs: vec![ + required_string("text", "Input text."), + optional_u64("max_tokens", "Optional max output tokens."), + ], + outputs: vec![json_output("summary", "Summary text.")], + }, + "prompt" => ControllerSchema { + namespace: "inference", + function: "prompt", + description: "Run a direct inference prompt.", + inputs: vec![ + required_string("prompt", "Prompt text."), + optional_u64("max_tokens", "Optional max output tokens."), + optional_bool("no_think", "Disable thinking mode."), + ], + outputs: vec![json_output("output", "Prompt output text.")], + }, + "vision_prompt" => ControllerSchema { + namespace: "inference", + function: "vision_prompt", + description: "Run a multimodal inference prompt with image refs.", + inputs: vec![ + required_string("prompt", "Prompt text."), + FieldSchema { + name: "image_refs", + ty: TypeSchema::Array(Box::new(TypeSchema::String)), + comment: "Image references to include.", + required: true, + }, + optional_u64("max_tokens", "Optional max output tokens."), + ], + outputs: vec![json_output("output", "Prompt output text.")], + }, + "embed" => ControllerSchema { + namespace: "inference", + function: "embed", + description: "Generate embeddings for text inputs.", + inputs: vec![FieldSchema { + name: "inputs", + ty: TypeSchema::Array(Box::new(TypeSchema::String)), + comment: "Texts to embed.", + required: true, + }], + outputs: vec![json_output("embedding", "Embedding result payload.")], + }, + "chat" => ControllerSchema { + namespace: "inference", + function: "chat", + description: "Multi-turn chat completion via the configured inference provider.", + inputs: vec![ + FieldSchema { + name: "messages", + ty: TypeSchema::Array(Box::new(TypeSchema::Json)), + comment: "Chat message history [{role, content}]. Last entry is the user turn.", + required: true, + }, + optional_u64("max_tokens", "Optional max output tokens."), + ], + outputs: vec![json_output("reply", "Assistant reply text.")], + }, + "should_react" => ControllerSchema { + namespace: "inference", + function: "should_react", + description: "Ask the inference provider whether the assistant should add an emoji reaction to a user message, based on channel type.", + inputs: vec![ + required_string("message", "User message content to evaluate."), + required_string("channel_type", "Channel type: web, telegram, discord, slack, etc."), + ], + outputs: vec![json_output("decision", "Reaction decision: {should_react, emoji}.")], + }, + "analyze_sentiment" => ControllerSchema { + namespace: "inference", + function: "analyze_sentiment", + description: "Classify the emotion and valence of a user message with the inference provider.", + inputs: vec![required_string("message", "User message content to classify.")], + outputs: vec![json_output("sentiment", "Sentiment analysis payload.")], + }, + other => panic!("unknown inference schema: {other}"), + } +} + +fn required_string(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::String, + comment, + required: true, + } +} + +fn optional_bool(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), + comment, + required: false, + } +} + +fn optional_u64(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::U64)), + comment, + required: false, + } +} + +fn optional_f64(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::F64)), + comment, + required: false, + } +} + +fn optional_string(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::String)), + comment, + required: false, + } +} + +fn optional_json(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::Json)), + comment, + required: false, + } +} + +fn json_output(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Json, + comment, + required: true, + } +} + +fn handle_inference_status(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + to_json(crate::openhuman::inference::rpc::inference_status(&config).await?) + }) +} + +fn handle_inference_get_client_config(_params: Map) -> ControllerFuture { + Box::pin(async move { + to_json(crate::openhuman::inference::rpc::inference_get_client_config().await?) + }) +} + +fn handle_inference_update_model_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + let update = deserialize_params::(params)?; + let patch = config_rpc::ModelSettingsPatch { + api_url: update.api_url, + inference_url: update.inference_url, + api_key: update.api_key, + default_model: update.default_model, + default_temperature: update.default_temperature, + model_routes: update.model_routes.map(|routes| { + routes + .into_iter() + .map(|route| crate::openhuman::config::ModelRouteConfig { + hint: route.hint, + model: route.model, + }) + .collect() + }), + cloud_providers: update + .cloud_providers + .map(|entries| { + use crate::openhuman::config::schema::cloud_providers::{ + generate_provider_id, is_slug_reserved, migrate_legacy_fields, AuthStyle, + CloudProviderCreds, + }; + entries + .into_iter() + .map(|entry| { + let slug = entry.slug.trim().to_string(); + if slug.is_empty() { + return Err("cloud provider slug must not be empty".to_string()); + } + if is_slug_reserved(&slug) { + return Err(format!( + "slug '{}' is reserved and cannot be used for a custom provider", + slug + )); + } + let auth_style = match entry + .auth_style + .as_deref() + .unwrap_or("bearer") + .to_ascii_lowercase() + .as_str() + { + "bearer" => AuthStyle::Bearer, + "anthropic" => AuthStyle::Anthropic, + "openhuman_jwt" | "openhumanjwt" => AuthStyle::OpenhumanJwt, + "none" => AuthStyle::None, + other => { + return Err(format!( + "unknown auth_style '{}'; valid: bearer, anthropic, openhuman_jwt, none", + other + )) + } + }; + let id = entry + .id + .filter(|s| !s.trim().is_empty()) + .unwrap_or_else(|| generate_provider_id(&slug)); + let label = entry + .label + .filter(|s| !s.trim().is_empty()) + .unwrap_or_else(|| slug.clone()); + let mut provider = CloudProviderCreds { + id, + slug, + label, + endpoint: entry.endpoint, + auth_style, + legacy_type: entry.legacy_type, + default_model: entry.default_model, + }; + migrate_legacy_fields(&mut provider); + Ok(provider) + }) + .collect::, String>>() + }) + .transpose()?, + primary_cloud: update.primary_cloud, + reasoning_provider: update.reasoning_provider, + agentic_provider: update.agentic_provider, + coding_provider: update.coding_provider, + memory_provider: update.memory_provider, + embeddings_provider: update.embeddings_provider, + heartbeat_provider: update.heartbeat_provider, + learning_provider: update.learning_provider, + subconscious_provider: update.subconscious_provider, + }; + to_json(crate::openhuman::inference::rpc::inference_update_model_settings(patch).await?) + }) +} + +fn handle_inference_update_local_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + let update = deserialize_params::(params)?; + let patch = config_rpc::LocalAiSettingsPatch { + runtime_enabled: update.runtime_enabled, + opt_in_confirmed: update.opt_in_confirmed, + provider: update.provider, + base_url: update.base_url, + model_id: update.model_id, + chat_model_id: update.chat_model_id, + usage_embeddings: update.usage_embeddings, + usage_heartbeat: update.usage_heartbeat, + usage_learning_reflection: update.usage_learning_reflection, + usage_subconscious: update.usage_subconscious, + }; + to_json(crate::openhuman::inference::rpc::inference_update_local_settings(patch).await?) + }) +} + +fn handle_inference_list_models(params: Map) -> ControllerFuture { + Box::pin(async move { + let request = deserialize_params::(params)?; + to_json( + crate::openhuman::inference::rpc::inference_list_models(&request.provider_id).await?, + ) + }) +} + +fn handle_inference_device_profile(_params: Map) -> ControllerFuture { + Box::pin( + async move { to_json(crate::openhuman::inference::rpc::inference_device_profile().await?) }, + ) +} + +fn handle_inference_presets(_params: Map) -> ControllerFuture { + Box::pin(async move { to_json(crate::openhuman::inference::rpc::inference_presets().await?) }) +} + +fn handle_inference_apply_preset(params: Map) -> ControllerFuture { + Box::pin(async move { + let request = deserialize_params::(params)?; + to_json(crate::openhuman::inference::rpc::inference_apply_preset(&request.tier).await?) + }) +} + +fn handle_inference_diagnostics(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + to_json(crate::openhuman::inference::rpc::inference_diagnostics(&config).await?) + }) +} + +fn handle_inference_summarize(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::rpc::inference_summarize(&config, &p.text, p.max_tokens) + .await?, + ) + }) +} + +fn handle_inference_prompt(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::rpc::inference_prompt( + &config, + &p.prompt, + p.max_tokens, + p.no_think, + ) + .await?, + ) + }) +} + +fn handle_inference_vision_prompt(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::rpc::inference_vision_prompt( + &config, + &p.prompt, + &p.image_refs, + p.max_tokens, + ) + .await?, + ) + }) +} + +fn handle_inference_embed(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json(crate::openhuman::inference::rpc::inference_embed(&config, &p.inputs).await?) + }) +} + +fn handle_inference_chat(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + let messages = p + .messages + .into_iter() + .map( + |message| crate::openhuman::inference::local::ops::LocalAiChatMessage { + role: message.role, + content: message.content, + }, + ) + .collect(); + to_json( + crate::openhuman::inference::rpc::inference_chat(&config, messages, p.max_tokens) + .await?, + ) + }) +} + +fn handle_inference_should_react(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::rpc::inference_should_react( + &config, + &p.message, + &p.channel_type, + ) + .await?, + ) + }) +} + +fn handle_inference_analyze_sentiment(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + to_json( + crate::openhuman::inference::rpc::inference_analyze_sentiment(&config, &p.message) + .await?, + ) + }) +} + +fn deserialize_params(params: Map) -> Result { + serde_json::from_value(Value::Object(params)).map_err(|e| format!("invalid params: {e}")) +} + +fn to_json(outcome: RpcOutcome) -> Result { + outcome.into_cli_compatible_json() +} + +#[cfg(test)] +#[path = "schemas_tests.rs"] +mod tests; diff --git a/src/openhuman/inference/schemas_tests.rs b/src/openhuman/inference/schemas_tests.rs new file mode 100644 index 0000000000..576504701f --- /dev/null +++ b/src/openhuman/inference/schemas_tests.rs @@ -0,0 +1,88 @@ +use super::*; + +#[test] +fn inference_catalog_counts_match_and_nonempty() { + let declared = all_controller_schemas(); + let registered = all_registered_controllers(); + assert_eq!(declared.len(), registered.len()); + assert!(declared.len() >= 16); +} + +#[test] +fn inference_schemas_use_inference_namespace() { + for schema in all_controller_schemas() { + assert_eq!( + schema.namespace, "inference", + "function {}", + schema.function + ); + assert!(!schema.description.is_empty()); + assert!(!schema.outputs.is_empty()); + } +} + +#[test] +fn inference_schema_function_names_are_stable() { + let functions: Vec<&str> = all_controller_schemas() + .into_iter() + .map(|schema| schema.function) + .collect(); + assert!(functions.contains(&"status")); + assert!(functions.contains(&"get_client_config")); + assert!(functions.contains(&"update_model_settings")); + assert!(functions.contains(&"update_local_settings")); + assert!(functions.contains(&"list_models")); + assert!(functions.contains(&"device_profile")); + assert!(functions.contains(&"presets")); + assert!(functions.contains(&"apply_preset")); + assert!(functions.contains(&"diagnostics")); + assert!(functions.contains(&"prompt")); + assert!(functions.contains(&"vision_prompt")); + assert!(functions.contains(&"embed")); + assert!(functions.contains(&"chat")); + assert!(!functions.contains(&"should_send_gif")); + assert!(!functions.contains(&"tenor_search")); +} + +#[test] +fn inference_prompt_schema_reuses_local_ai_shape_with_new_namespace() { + let schema = schemas("prompt"); + assert_eq!(schema.namespace, "inference"); + assert_eq!(schema.function, "prompt"); + assert!(schema.inputs.iter().any(|field| field.name == "prompt")); + assert!(schema.inputs.iter().any(|field| field.name == "max_tokens")); +} + +#[test] +fn inference_chat_schema_requires_messages() { + let schema = schemas("chat"); + assert_eq!(schema.namespace, "inference"); + assert_eq!(schema.function, "chat"); + assert!(schema + .inputs + .iter() + .any(|field| field.name == "messages" && field.required)); +} + +#[test] +fn inference_unknown_schema_panics() { + let panic = std::panic::catch_unwind(|| schemas("no_such_function")); + assert!(panic.is_err()); +} + +#[tokio::test] +async fn inference_status_handler_returns_cli_json() { + let value = handle_inference_status(Map::new()) + .await + .expect("handler value"); + assert!(value.get("result").is_some() || value.get("logs").is_some()); +} + +#[tokio::test] +async fn inference_prompt_handler_rejects_invalid_shape() { + let params = Map::from_iter([("prompt".to_string(), Value::Bool(true))]); + let err = handle_inference_prompt(params) + .await + .expect_err("invalid params"); + assert!(err.contains("invalid params")); +} diff --git a/src/openhuman/local_ai/sentiment.rs b/src/openhuman/inference/sentiment.rs similarity index 99% rename from src/openhuman/local_ai/sentiment.rs rename to src/openhuman/inference/sentiment.rs index c94bd8411c..7d15e6d8b3 100644 --- a/src/openhuman/local_ai/sentiment.rs +++ b/src/openhuman/inference/sentiment.rs @@ -1,7 +1,7 @@ //! Emotion / sentiment analysis via the bundled local AI model. use crate::openhuman::config::Config; -use crate::openhuman::local_ai; +use crate::openhuman::inference::local as local_ai; use crate::rpc::RpcOutcome; /// Result of sentiment / emotion analysis on a user message. diff --git a/src/openhuman/local_ai/types.rs b/src/openhuman/inference/types.rs similarity index 93% rename from src/openhuman/local_ai/types.rs rename to src/openhuman/inference/types.rs index 5a814fbba9..b1cfdbf39e 100644 --- a/src/openhuman/local_ai/types.rs +++ b/src/openhuman/inference/types.rs @@ -3,9 +3,9 @@ use crate::openhuman::config::Config; use serde::{Deserialize, Serialize}; +use super::local::provider::provider_from_config; use super::model_ids; use super::presets; -use super::provider::provider_from_config; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct LocalAiStatus { @@ -95,11 +95,9 @@ pub struct LocalAiAssetsStatus { pub stt: LocalAiAssetStatus, pub tts: LocalAiAssetStatus, pub quantization: String, - /// True when an Ollama binary is discoverable on disk (workspace install, - /// system install, or via `OLLAMA_BIN`/configured path). When false, the - /// frontend should render an "Install Ollama" CTA instead of model state — - /// querying `/api/tags` against a missing server otherwise lets a 30s - /// connect timeout cascade through `has_model`. + /// True when the configured Ollama endpoint is reachable enough for model + /// checks. When false, the frontend should render external-runtime + /// guidance rather than app-managed install/start affordances. pub ollama_available: bool, } @@ -175,7 +173,7 @@ mod tests { #[test] fn disabled_status_reflects_lm_studio_provider() { - use crate::openhuman::local_ai::provider::LocalAiProvider; + use crate::openhuman::inference::local::provider::LocalAiProvider; let mut config = Config::default(); config.local_ai.provider = LocalAiProvider::LmStudio.as_str().to_string(); diff --git a/src/openhuman/voice/cloud_transcribe.rs b/src/openhuman/inference/voice/cloud_transcribe.rs similarity index 100% rename from src/openhuman/voice/cloud_transcribe.rs rename to src/openhuman/inference/voice/cloud_transcribe.rs diff --git a/src/openhuman/voice/hallucination.rs b/src/openhuman/inference/voice/hallucination.rs similarity index 100% rename from src/openhuman/voice/hallucination.rs rename to src/openhuman/inference/voice/hallucination.rs diff --git a/src/openhuman/voice/local_speech.rs b/src/openhuman/inference/voice/local_speech.rs similarity index 97% rename from src/openhuman/voice/local_speech.rs rename to src/openhuman/inference/voice/local_speech.rs index 716f32d95d..ed7bc75ba7 100644 --- a/src/openhuman/voice/local_speech.rs +++ b/src/openhuman/inference/voice/local_speech.rs @@ -26,7 +26,7 @@ //! 2. `piper` / `piper.exe` on `$PATH` //! //! Both branches share the same resolution helper as the legacy voice -//! pipeline ([`crate::openhuman::local_ai::paths::resolve_piper_binary`]), +//! pipeline ([`crate::openhuman::inference::paths::resolve_piper_binary`]), //! so STT availability checks, the installer UI, and the factory dispatch //! all agree on what counts as "installed". //! @@ -34,7 +34,7 @@ //! //! **Easy path:** click "Install Piper" in `Settings → Voice → Voice //! Providers`. That triggers -//! [`crate::openhuman::local_ai::install_piper`] which downloads the +//! [`crate::openhuman::inference::local::install_piper`] which downloads the //! Piper binary archive (`.zip` on Windows, `.tar.gz` on macOS / Linux) //! into `~/.openhuman/bin/piper/`, extracts it, and stages the bundled //! `en_US-lessac-medium` voice (`.onnx` + `.onnx.json`) alongside via a @@ -67,10 +67,12 @@ use base64::{engine::general_purpose::STANDARD as BASE64, Engine}; use log::debug; use crate::openhuman::config::Config; -use crate::openhuman::local_ai::paths::{resolve_piper_binary_with_config, resolve_tts_voice_path}; +use crate::openhuman::inference::paths::{ + resolve_piper_binary_with_config, resolve_tts_voice_path, +}; use crate::rpc::RpcOutcome; -use super::reply_speech::{ReplySpeechResult, VisemeFrame}; +use crate::openhuman::voice::reply_speech::{ReplySpeechResult, VisemeFrame}; const LOG_PREFIX: &str = "[voice-tts]"; diff --git a/src/openhuman/voice/local_transcribe.rs b/src/openhuman/inference/voice/local_transcribe.rs similarity index 97% rename from src/openhuman/voice/local_transcribe.rs rename to src/openhuman/inference/voice/local_transcribe.rs index c2a9106a27..7822b4f742 100644 --- a/src/openhuman/voice/local_transcribe.rs +++ b/src/openhuman/inference/voice/local_transcribe.rs @@ -8,14 +8,14 @@ //! //! When neither resolves, transcription fails with a clear, actionable //! error pointing the user at the install path. Resolution lives in -//! [`crate::openhuman::local_ai::paths::resolve_whisper_binary`] — kept in +//! [`crate::openhuman::inference::paths::resolve_whisper_binary`] — kept in //! one place so STT, voice-status, and the installer all agree. //! //! ## Where to get the binary //! //! **Easy path:** click "Install Whisper" in `Settings → Voice → Voice //! Providers`. That triggers -//! [`crate::openhuman::local_ai::install_whisper`] which streams the +//! [`crate::openhuman::inference::local::install_whisper`] which streams the //! GGML model file (`ggml-.bin`) into //! `~/.openhuman/bin/whisper/` via a `.part` file + atomic rename, plus //! the `whisper-cli` binary on Windows where upstream ships a release @@ -55,7 +55,7 @@ use log::{debug, warn}; use serde::{Deserialize, Serialize}; use crate::openhuman::config::Config; -use crate::openhuman::local_ai::paths::resolve_whisper_binary_with_config; +use crate::openhuman::inference::paths::resolve_whisper_binary_with_config; use crate::rpc::RpcOutcome; const LOG_PREFIX: &str = "[voice-stt]"; @@ -169,7 +169,7 @@ pub async fn transcribe_whisper( // config default is, producing a mismatch between the returned model_id // and the model actually used for transcription. let model_path = - crate::openhuman::local_ai::paths::resolve_stt_model_path_by_id(&model_id, config) + crate::openhuman::inference::paths::resolve_stt_model_path_by_id(&model_id, config) .map_err(|e| format!("{LOG_PREFIX} {e}"))?; debug!("{LOG_PREFIX} resolved STT model path={model_path}"); diff --git a/src/openhuman/inference/voice/mod.rs b/src/openhuman/inference/voice/mod.rs new file mode 100644 index 0000000000..b436408640 --- /dev/null +++ b/src/openhuman/inference/voice/mod.rs @@ -0,0 +1,12 @@ +//! Inference-side voice: local/cloud transcription (STT) and local TTS. +//! +//! Audio I/O, hotkeys, dictation, and the voice RPC surface remain in +//! `crate::openhuman::voice`. The files here are the actual inference +//! implementations that `voice/` imports. + +pub mod cloud_transcribe; +pub mod hallucination; +pub mod local_speech; +pub mod local_transcribe; +pub mod postprocess; +pub mod streaming; diff --git a/src/openhuman/voice/postprocess.rs b/src/openhuman/inference/voice/postprocess.rs similarity index 96% rename from src/openhuman/voice/postprocess.rs rename to src/openhuman/inference/voice/postprocess.rs index 52b17ab353..3e6934f359 100644 --- a/src/openhuman/voice/postprocess.rs +++ b/src/openhuman/inference/voice/postprocess.rs @@ -8,7 +8,7 @@ use log::{debug, info, warn}; use std::time::Instant; use crate::openhuman::config::Config; -use crate::openhuman::local_ai; +use crate::openhuman::inference::local as local_ai; const LOG_PREFIX: &str = "[voice_postprocess]"; @@ -225,7 +225,7 @@ mod tests { #[tokio::test] async fn disabled_cleanup_returns_raw_text() { - let _g = crate::openhuman::local_ai::local_ai_test_guard(); + let _g = crate::openhuman::inference::inference_test_guard(); let mut config = Config::default(); config.local_ai.voice_llm_cleanup_enabled = false; let service = local_ai::global(&config); @@ -241,7 +241,7 @@ mod tests { // Covers the branch where cleanup is enabled in config but the // local LLM hasn't reached the ready/degraded state yet — // cleanup must gracefully fall back to the raw Whisper output. - let _g = crate::openhuman::local_ai::local_ai_test_guard(); + let _g = crate::openhuman::inference::inference_test_guard(); let config = Config::default(); // voice_llm_cleanup_enabled = true by default let service = local_ai::global(&config); let previous = service.status.lock().state.clone(); @@ -282,7 +282,7 @@ mod tests { #[tokio::test] async fn ready_llm_returns_trimmed_cleanup_or_falls_back() { - let _g = crate::openhuman::local_ai::local_ai_test_guard(); + let _g = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/api/generate", post(|| async { @@ -305,7 +305,7 @@ mod tests { #[tokio::test] async fn ready_llm_empty_response_falls_back_to_raw_text() { - let _g = crate::openhuman::local_ai::local_ai_test_guard(); + let _g = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/api/generate", post(|| async { Json(json!({"model":"test","response":" ","done": true})) }), @@ -323,7 +323,7 @@ mod tests { #[tokio::test] async fn ready_llm_error_response_falls_back_to_raw_text() { - let _g = crate::openhuman::local_ai::local_ai_test_guard(); + let _g = crate::openhuman::inference::inference_test_guard(); let app = Router::new().route( "/api/generate", post(|| async { @@ -349,7 +349,7 @@ mod tests { // glued the conversation context in front of the raw text when // the LLM ran. If the global state raced away from "ready" the // call short-circuits to raw — still valid, just the other branch. - let _g = crate::openhuman::local_ai::local_ai_test_guard(); + let _g = crate::openhuman::inference::inference_test_guard(); #[derive(serde::Deserialize)] struct Body { prompt: String, @@ -386,7 +386,7 @@ mod tests { // "Conversation context:" header regardless of which branch // runs — the LLM path uses the raw-text-only prompt, and the // short-circuit path never builds a prompt at all. - let _g = crate::openhuman::local_ai::local_ai_test_guard(); + let _g = crate::openhuman::inference::inference_test_guard(); #[derive(serde::Deserialize)] struct Body { prompt: String, diff --git a/src/openhuman/voice/streaming.rs b/src/openhuman/inference/voice/streaming.rs similarity index 98% rename from src/openhuman/voice/streaming.rs rename to src/openhuman/inference/voice/streaming.rs index df7e6861b0..266341d6c5 100644 --- a/src/openhuman/voice/streaming.rs +++ b/src/openhuman/inference/voice/streaming.rs @@ -22,8 +22,8 @@ use tokio::sync::Mutex; use super::postprocess; use crate::openhuman::config::Config; -use crate::openhuman::local_ai; -use crate::openhuman::local_ai::whisper_engine; +use crate::openhuman::inference::local as local_ai; +use crate::openhuman::inference::local::service::whisper_engine; use crate::openhuman::util::utf8_safe_prefix_at_byte_boundary; const LOG_PREFIX: &str = "[voice-stream]"; diff --git a/src/openhuman/learning/linkedin_enrichment.rs b/src/openhuman/learning/linkedin_enrichment.rs index 081b768b4a..3d8dd6c063 100644 --- a/src/openhuman/learning/linkedin_enrichment.rs +++ b/src/openhuman/learning/linkedin_enrichment.rs @@ -309,7 +309,7 @@ async fn write_profile_md( /// Ask the backend LLM to distil the raw LinkedIn Markdown into a /// concise, high-signal profile document suitable for agent context. pub async fn summarise_profile_with_llm(config: &Config, raw_md: &str) -> anyhow::Result { - use crate::openhuman::providers::ops::{ + use crate::openhuman::inference::provider::ops::{ create_backend_inference_provider, ProviderRuntimeOptions, }; diff --git a/src/openhuman/learning/reflection.rs b/src/openhuman/learning/reflection.rs index a9ec12a27f..7ad8f11ffd 100644 --- a/src/openhuman/learning/reflection.rs +++ b/src/openhuman/learning/reflection.rs @@ -44,7 +44,7 @@ pub struct ReflectionHook { config: LearningConfig, full_config: Arc, memory: Arc, - provider: Option>, + provider: Option>, /// Per-session reflection counts for throttling. Key is session_id (or "__global__"). session_counts: Mutex>, } @@ -54,7 +54,7 @@ impl ReflectionHook { config: LearningConfig, full_config: Arc, memory: Arc, - provider: Option>, + provider: Option>, ) -> Self { Self { config, @@ -186,7 +186,7 @@ impl ReflectionHook { log::debug!( "[learning::reflection] local route — gate permit acquired via LocalAiService" ); - let service = crate::openhuman::local_ai::global(&self.full_config); + let service = crate::openhuman::inference::local::global(&self.full_config); service .prompt(&self.full_config, prompt, Some(512), true) .await diff --git a/src/openhuman/learning/reflection_tests.rs b/src/openhuman/learning/reflection_tests.rs index 9d9d65f850..08c84dbfd7 100644 --- a/src/openhuman/learning/reflection_tests.rs +++ b/src/openhuman/learning/reflection_tests.rs @@ -288,7 +288,7 @@ async fn persist_reflection_writes_to_dedicated_namespace_and_category() { #[tokio::test] async fn on_turn_complete_dedupes_reflections_across_heuristic_and_llm_paths() { - use crate::openhuman::providers::Provider; + use crate::openhuman::inference::provider::Provider; use async_trait::async_trait; // Stub provider returning a reflection LLM response whose @@ -480,8 +480,8 @@ async fn on_turn_complete_emits_candidates_to_buffer_for_heuristic_cues() { #[tokio::test] async fn on_turn_complete_emits_style_candidates_from_llm_preferences() { + use crate::openhuman::inference::provider::Provider; use crate::openhuman::learning::candidate::{self, FacetClass}; - use crate::openhuman::providers::Provider; struct StubPrefProvider; #[async_trait] diff --git a/src/openhuman/learning/transcript_ingest/extract.rs b/src/openhuman/learning/transcript_ingest/extract.rs index bf1f57dd9d..0d456ad46e 100644 --- a/src/openhuman/learning/transcript_ingest/extract.rs +++ b/src/openhuman/learning/transcript_ingest/extract.rs @@ -18,7 +18,7 @@ //! - Tool messages are never mined — they're high-noise and fully //! reconstructable from the transcript itself. -use crate::openhuman::providers::ChatMessage; +use crate::openhuman::inference::provider::ChatMessage; use super::types::{CandidateKind, ConversationReflection, Importance, MemoryCandidate}; diff --git a/src/openhuman/learning/transcript_ingest/tests.rs b/src/openhuman/learning/transcript_ingest/tests.rs index 7536ac3bb7..03197653e4 100644 --- a/src/openhuman/learning/transcript_ingest/tests.rs +++ b/src/openhuman/learning/transcript_ingest/tests.rs @@ -5,8 +5,8 @@ use super::*; use crate::openhuman::agent::harness::session::transcript::{SessionTranscript, TranscriptMeta}; +use crate::openhuman::inference::provider::ChatMessage; use crate::openhuman::memory::{Memory, MemoryCategory, MemoryEntry, NamespaceSummary, RecallOpts}; -use crate::openhuman::providers::ChatMessage; use async_trait::async_trait; use std::path::PathBuf; use std::sync::Mutex; diff --git a/src/openhuman/local_ai/README.md b/src/openhuman/local_ai/README.md deleted file mode 100644 index 2490c5463a..0000000000 --- a/src/openhuman/local_ai/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# Local AI - -On-device inference stack. Owns the bundled Ollama runtime, LM Studio local-server integration, whisper.cpp speech-to-text, Piper text-to-speech, sentiment scoring, vision-embedding routing, the model preset / device-profile chooser, asset download + install management, the GIF-decision heuristic, and the per-session `LocalAiService` singleton. Does NOT own remote-provider HTTP transport (`providers/`) or the agent tool loop (`agent/`). - -## Public surface - -- `pub struct LocalAiService` — `service/mod.rs` — singleton holding Ollama / LM Studio / whisper / Piper handles. -- `pub fn global(config: &Config) -> Arc` — `core.rs` — singleton accessor. -- `pub fn model_artifact_path(config: &Config) -> PathBuf` — `core.rs` — resolve on-disk model path. -- `pub struct DeviceProfile` — `device.rs` — RAM / VRAM / CPU classification used for preset selection. -- `pub struct ModelPreset` / `pub enum ModelTier` / `pub enum VisionMode` — `presets.rs` — bundled preset matrix. -- `pub struct SentimentResult` — `sentiment.rs` — polarity + magnitude scoring. -- `pub struct GifDecision` / `pub struct TenorGifResult` / `pub struct TenorSearchResult` — `gif_decision.rs`. -- Status / progress / result types: `pub struct LocalAiStatus`, `LocalAiAssetStatus`, `LocalAiAssetsStatus`, `LocalAiDownloadProgressItem`, `LocalAiDownloadsProgress`, `LocalAiEmbeddingResult`, `LocalAiSpeechResult`, `LocalAiTtsResult` — `types.rs`. -- `pub mod ops` (re-exported as `rpc`) — `ops.rs` — typed Rust wrappers around each capability (`agent_chat`, `agent_chat_simple`, `summarize`, `prompt`, `vision_prompt`, `embed`, `transcribe`, `tts`, `should_react`, `analyze_sentiment`, `should_send_gif`, `tenor_search`). -- RPC `local_ai.{agent_chat, agent_chat_simple, local_ai_status, local_ai_download, local_ai_download_all_assets, local_ai_summarize, local_ai_prompt, local_ai_vision_prompt, local_ai_embed, local_ai_transcribe, local_ai_transcribe_bytes, local_ai_tts, local_ai_assets_status, local_ai_downloads_progress, local_ai_download_asset, local_ai_device_profile, local_ai_presets, local_ai_apply_preset, local_ai_diagnostics, local_ai_set_ollama_path, local_ai_chat, local_ai_should_react, local_ai_analyze_sentiment, local_ai_should_send_gif, local_ai_tenor_search}` — `schemas.rs`. - -## Calls into - -- `src/openhuman/config/` — provider selection, model IDs, local server URL override, device-profile inputs. -- `src/openhuman/encryption/` — Tenor / asset keys at rest. -- Bundled binaries: Ollama (HTTP `OLLAMA_BASE_URL`), whisper.cpp, Piper. -- LM Studio local server via OpenAI-compatible `GET /v1/models` and `POST /v1/chat/completions`. -- HTTP for Tenor GIF search. -- Filesystem under `~/.openhuman/local-ai/` for downloaded model artifacts. - -## Called by - -- `src/openhuman/agent/` — `local_ai::rpc::agent_chat` / `agent_chat_simple` are the primary chat backends; triage uses `agent::triage::routing` to decide local vs remote. -- `src/openhuman/voice/{streaming,postprocess,ops,types}.rs` — speech-to-text + text-to-speech. -- `src/openhuman/screen_intelligence/processing_worker.rs` — vision embedding + summarisation. -- `src/openhuman/autocomplete/core/engine.rs` — local-AI completions. -- `src/openhuman/tree_summarizer/ops.rs` — summarisation backend. -- `src/openhuman/app_state/ops.rs` — `LocalAiStatus` snapshot. -- `src/core/all.rs` — registers `all_local_ai_*`. - -## Tests - -- Unit: `ops_tests.rs`, `schemas_tests.rs`, plus `service/ollama_admin_tests.rs`, `service/public_infer_tests.rs`. -- Domain mutex: `LOCAL_AI_TEST_MUTEX` (`mod.rs:4`) serializes tests that mutate the singleton or env vars. -- Routing: `agent/triage/routing_tests.rs` covers local-vs-remote escalation. - -## LM Studio - -Set `local_ai.provider = "lm_studio"`, `local_ai.runtime_enabled = true`, and `local_ai.opt_in_confirmed = true`, then run LM Studio's local server with the OpenAI-compatible API enabled. The default base URL is `http://localhost:1234/v1`; override it with `local_ai.base_url`, `OPENHUMAN_LM_STUDIO_BASE_URL`, or `LM_STUDIO_BASE_URL`. - -This first provider slice covers connection validation, model discovery, diagnostics, direct local chat/prompt requests, and intelligent-routing local chat through LM Studio. LM Studio manages its own model downloads and loading; OpenHuman reports missing chat models as actionable status instead of trying to pull them. Vision and embeddings stay on the existing Ollama-specific paths until those provider surfaces are split. diff --git a/src/openhuman/local_ai/gif_decision.rs b/src/openhuman/local_ai/gif_decision.rs deleted file mode 100644 index 2820f36634..0000000000 --- a/src/openhuman/local_ai/gif_decision.rs +++ /dev/null @@ -1,317 +0,0 @@ -//! GIF decision via local AI model + Tenor search via the backend API. - -use serde_json::Value; - -use crate::api::config::effective_backend_api_url; -use crate::api::jwt::get_session_token; -use crate::api::rest::BackendOAuthClient; -use crate::openhuman::config::Config; -use crate::openhuman::local_ai; -use crate::rpc::RpcOutcome; - -// --------------------------------------------------------------------------- -// GIF decision — local model decides whether a GIF response is appropriate -// --------------------------------------------------------------------------- - -/// Result of the GIF-decision prompt. -#[derive(Debug, serde::Serialize)] -pub struct GifDecision { - /// Whether the model thinks sending a GIF is appropriate right now. - pub should_send_gif: bool, - /// Tenor search query (only meaningful when `should_send_gif` is true). - pub search_query: Option, -} - -/// Ask the local model whether the assistant should respond with a GIF, -/// based on channel type and message content. Designed to be called every -/// ~5-10 messages, not on every message. Lightweight: ~12 output tokens. -pub async fn local_ai_should_send_gif( - config: &Config, - message: &str, - channel_type: &str, -) -> Result, String> { - tracing::debug!( - channel_type, - msg_len = message.len(), - "[local_ai:gif] evaluating gif decision" - ); - - if message.trim().is_empty() { - return Ok(RpcOutcome::single_log( - GifDecision { - should_send_gif: false, - search_query: None, - }, - "empty message — no gif", - )); - } - - let service = local_ai::global(config); - let status = service.status(); - if !matches!(status.state.as_str(), "ready") { - tracing::debug!("[local_ai:gif] local model not ready, skipping"); - return Ok(RpcOutcome::single_log( - GifDecision { - should_send_gif: false, - search_query: None, - }, - "local model not ready", - )); - } - - let prompt = format!( - "You decide whether an AI assistant should respond with a GIF.\n\ - GIFs are appropriate for: humor, celebration, empathy, reactions to exciting news, \ - casual banter in friendly channels.\n\ - GIFs are NOT appropriate for: technical questions, serious topics, first messages, \ - professional channels (slack, email), or when the user seems upset or frustrated.\n\n\ - Channel: {channel_type}\nUser message: {message}\n\n\ - Reply with EXACTLY one line:\n\ - NONE (no GIF) OR a 2-4 word Tenor search query for a fitting GIF." - ); - - let output = service.prompt(config, &prompt, Some(12), true).await; - - let decision = match output { - Ok(raw) => { - let trimmed = raw.trim(); - tracing::debug!( - response = %trimmed, - "[local_ai:gif] model response" - ); - parse_gif_response(trimmed) - } - Err(e) => { - tracing::debug!(error = %e, "[local_ai:gif] inference failed, skipping"); - GifDecision { - should_send_gif: false, - search_query: None, - } - } - }; - - tracing::debug!( - should_send = decision.should_send_gif, - query = ?decision.search_query, - "[local_ai:gif] decision" - ); - Ok(RpcOutcome::single_log(decision, "gif decision completed")) -} - -/// Parse the model's response into a `GifDecision`. -fn parse_gif_response(text: &str) -> GifDecision { - let trimmed = text.trim(); - - if trimmed.is_empty() - || trimmed.eq_ignore_ascii_case("NONE") - || trimmed.eq_ignore_ascii_case("no gif") - { - return GifDecision { - should_send_gif: false, - search_query: None, - }; - } - - // The model should return a short search query. Sanity-check length: - // reject anything too long (probably the model rambled) or too short. - let word_count = trimmed.split_whitespace().count(); - if word_count > 8 || trimmed.len() > 80 { - tracing::debug!( - words = word_count, - len = trimmed.len(), - "[local_ai:gif] response too long, treating as NONE" - ); - return GifDecision { - should_send_gif: false, - search_query: None, - }; - } - - GifDecision { - should_send_gif: true, - search_query: Some(trimmed.to_string()), - } -} - -// --------------------------------------------------------------------------- -// Tenor search — proxy through the backend API -// --------------------------------------------------------------------------- - -/// A single GIF result from Tenor. -#[derive(Debug, serde::Serialize, serde::Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct TenorGifResult { - pub id: String, - pub title: String, - #[serde(default)] - pub content_description: String, - pub url: String, - #[serde(default)] - pub media: Value, - #[serde(default)] - pub created: i64, -} - -/// Wrapper for the Tenor search response. -#[derive(Debug, serde::Serialize, serde::Deserialize)] -pub struct TenorSearchResult { - pub results: Vec, - #[serde(default)] - pub next: String, -} - -/// Search for GIFs via the backend's Tenor proxy endpoint. -/// Requires a valid session JWT (the backend charges against user budget). -pub async fn tenor_search( - config: &Config, - query: &str, - limit: Option, -) -> Result, String> { - tracing::debug!( - query, - limit = ?limit, - "[local_ai:gif] searching tenor" - ); - - if query.trim().is_empty() { - return Err("query is required".to_string()); - } - - let api_url = effective_backend_api_url(&config.api_url); - let jwt = get_session_token(config)? - .ok_or_else(|| "session JWT required; complete login first".to_string())?; - - let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; - let raw = client - .search_tenor_gifs(&jwt, query, limit) - .await - .map_err(|e| format!("tenor search failed: {e}"))?; - - tracing::debug!( - result_keys = ?raw.as_object().map(|o| o.keys().collect::>()), - "[local_ai:gif] tenor search response received" - ); - - // The backend wraps results in { success, data: { results, next, costUsd } }. - // Extract the inner data. - let data = raw.get("data").cloned().unwrap_or_else(|| raw.clone()); - - let result: TenorSearchResult = serde_json::from_value(data).map_err(|e| { - tracing::debug!(error = %e, "[local_ai:gif] failed to parse tenor response"); - format!("parse tenor response: {e}") - })?; - - tracing::debug!( - count = result.results.len(), - "[local_ai:gif] tenor returned {} results", - result.results.len() - ); - - Ok(RpcOutcome::single_log(result, "tenor search completed")) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_none_response() { - let d = parse_gif_response("NONE"); - assert!(!d.should_send_gif); - assert!(d.search_query.is_none()); - } - - #[test] - fn parse_none_case_insensitive() { - let d = parse_gif_response("none"); - assert!(!d.should_send_gif); - } - - #[test] - fn parse_empty_response() { - let d = parse_gif_response(""); - assert!(!d.should_send_gif); - } - - #[test] - fn parse_valid_query() { - let d = parse_gif_response("happy dance celebration"); - assert!(d.should_send_gif); - assert_eq!(d.search_query.as_deref(), Some("happy dance celebration")); - } - - #[test] - fn parse_short_query() { - let d = parse_gif_response("thumbs up"); - assert!(d.should_send_gif); - assert_eq!(d.search_query.as_deref(), Some("thumbs up")); - } - - #[test] - fn parse_too_long_response() { - let long = "this is a very long response that the model should not have generated because it rambled on and on"; - let d = parse_gif_response(long); - assert!(!d.should_send_gif); - } - - #[test] - fn parse_no_gif_variant() { - let d = parse_gif_response("no gif"); - assert!(!d.should_send_gif); - } - - #[test] - fn parse_trims_surrounding_whitespace() { - let d = parse_gif_response(" NONE "); - assert!(!d.should_send_gif); - - let d = parse_gif_response(" hello wave "); - assert!(d.should_send_gif); - assert_eq!(d.search_query.as_deref(), Some("hello wave")); - } - - #[test] - fn parse_reject_over_eighty_chars_even_if_word_count_small() { - // 8 words but ≥ 80 chars is still rejected — protects against - // words that are URL-like or extremely long. - let long_word = "x".repeat(90); - let d = parse_gif_response(&long_word); - assert!(!d.should_send_gif); - } - - #[test] - fn parse_reject_more_than_eight_words() { - let nine_words = "one two three four five six seven eight nine"; - let d = parse_gif_response(nine_words); - assert!(!d.should_send_gif); - } - - #[test] - fn parse_accepts_boundary_eight_words() { - // Exactly 8 words: accepted. - let eight = "one two three four five six seven eight"; - let d = parse_gif_response(eight); - assert!(d.should_send_gif); - } - - // ── tenor_search guard paths ───────────────────────────────── - - #[tokio::test] - async fn tenor_search_rejects_empty_query() { - let config = crate::openhuman::config::Config::default(); - let err = tenor_search(&config, " ", Some(5)).await.unwrap_err(); - assert!(err.contains("query is required")); - } - - // ── local_ai_should_send_gif early-returns ────────────────── - - #[tokio::test] - async fn should_send_gif_returns_false_for_empty_message() { - let config = crate::openhuman::config::Config::default(); - let outcome = local_ai_should_send_gif(&config, " ", "slack") - .await - .unwrap(); - assert!(!outcome.value.should_send_gif); - assert!(outcome.logs.iter().any(|l| l.contains("empty message"))); - } -} diff --git a/src/openhuman/local_ai/mod.rs b/src/openhuman/local_ai/mod.rs deleted file mode 100644 index 596e983973..0000000000 --- a/src/openhuman/local_ai/mod.rs +++ /dev/null @@ -1,53 +0,0 @@ -//! Bundled local AI stack (Ollama / LM Studio, whisper.cpp, Piper). - -#[cfg(test)] -pub(crate) static LOCAL_AI_TEST_MUTEX: once_cell::sync::Lazy> = - once_cell::sync::Lazy::new(|| std::sync::Mutex::new(())); - -#[cfg(test)] -pub(crate) fn local_ai_test_guard() -> std::sync::MutexGuard<'static, ()> { - LOCAL_AI_TEST_MUTEX - .lock() - .unwrap_or_else(|p| p.into_inner()) -} - -mod core; -pub mod device; -pub mod gif_decision; -pub mod ops; -pub mod presets; -mod schemas; -pub mod sentiment; - -mod install; -pub(crate) mod install_piper; -pub(crate) mod install_whisper; -pub(crate) mod lm_studio_api; -pub(crate) mod model_ids; -mod ollama_api; -mod process_util; -pub(crate) use ollama_api::{ollama_base_url, OLLAMA_BASE_URL}; -mod parse; -pub(crate) mod paths; -pub(crate) mod provider; -mod service; -mod types; -pub(crate) mod voice_install_common; - -pub use core::*; -pub use device::DeviceProfile; -pub use gif_decision::{GifDecision, TenorGifResult, TenorSearchResult}; -pub use ops as rpc; -pub use ops::*; -pub use presets::{ModelPreset, ModelTier, VisionMode}; -pub use schemas::{ - all_controller_schemas as all_local_ai_controller_schemas, - all_registered_controllers as all_local_ai_registered_controllers, -}; -pub use sentiment::SentimentResult; -pub(crate) use service::whisper_engine; -pub use service::LocalAiService; -pub use types::{ - LocalAiAssetStatus, LocalAiAssetsStatus, LocalAiDownloadProgressItem, LocalAiDownloadsProgress, - LocalAiEmbeddingResult, LocalAiSpeechResult, LocalAiStatus, LocalAiTtsResult, -}; diff --git a/src/openhuman/local_ai/schemas.rs b/src/openhuman/local_ai/schemas.rs deleted file mode 100644 index cf8c56f45e..0000000000 --- a/src/openhuman/local_ai/schemas.rs +++ /dev/null @@ -1,1271 +0,0 @@ -use serde::de::DeserializeOwned; -use serde::Deserialize; -use serde_json::{Map, Value}; - -use crate::core::all::{ControllerFuture, RegisteredController}; -use crate::core::{ControllerSchema, FieldSchema, TypeSchema}; -use crate::openhuman::config::rpc as config_rpc; -use crate::rpc::RpcOutcome; - -#[derive(Debug, Deserialize)] -struct AgentChatParams { - message: String, - model_override: Option, - temperature: Option, -} - -#[derive(Debug, Deserialize)] -struct LocalAiDownloadParams { - force: Option, -} - -#[derive(Debug, Deserialize)] -struct LocalAiSummarizeParams { - text: String, - max_tokens: Option, -} - -#[derive(Debug, Deserialize)] -struct LocalAiPromptParams { - prompt: String, - max_tokens: Option, - no_think: Option, -} - -#[derive(Debug, Deserialize)] -struct LocalAiVisionPromptParams { - prompt: String, - image_refs: Vec, - max_tokens: Option, -} - -#[derive(Debug, Deserialize)] -struct LocalAiEmbedParams { - inputs: Vec, -} - -#[derive(Debug, Deserialize)] -struct LocalAiTranscribeParams { - audio_path: String, -} - -#[derive(Debug, Deserialize)] -struct LocalAiTranscribeBytesParams { - audio_bytes: Vec, - extension: Option, -} - -#[derive(Debug, Deserialize)] -struct LocalAiTtsParams { - text: String, - output_path: Option, -} - -#[derive(Debug, Deserialize)] -struct LocalAiDownloadAssetParams { - capability: String, -} - -#[derive(Debug, Deserialize)] -struct LocalAiApplyPresetParams { - tier: String, -} - -#[derive(Debug, Deserialize)] -struct LocalAiSetOllamaPathParams { - path: String, -} - -#[derive(Debug, Deserialize)] -struct LocalAiChatMessageParam { - role: String, - content: String, -} - -#[derive(Debug, Deserialize)] -struct LocalAiChatParams { - messages: Vec, - max_tokens: Option, -} - -#[derive(Debug, Deserialize)] -struct LocalAiShouldReactParams { - message: String, - channel_type: String, -} - -#[derive(Debug, Deserialize)] -struct LocalAiAnalyzeSentimentParams { - message: String, -} - -#[derive(Debug, Deserialize)] -struct LocalAiShouldSendGifParams { - message: String, - channel_type: String, -} - -#[derive(Debug, Deserialize)] -struct LocalAiTenorSearchParams { - query: String, - limit: Option, -} - -#[derive(Debug, Deserialize)] -struct LocalAiInstallWhisperParams { - /// Optional model size (`tiny`, `base`, `small`, `medium`, - /// `large-v3-turbo`). Defaults to `large-v3-turbo`. - #[serde(default)] - model_size: Option, - /// When true, blow away any existing model file and re-download. - #[serde(default)] - force: Option, -} - -#[derive(Debug, Deserialize)] -struct LocalAiInstallPiperParams { - /// Optional Piper voice id (e.g. `en_US-lessac-medium`). Defaults to - /// the bundled US-English Lessac voice. - #[serde(default)] - voice_id: Option, - /// When true, blow away any existing voice file and re-download. - #[serde(default)] - force: Option, -} - -pub fn all_controller_schemas() -> Vec { - vec![ - schemas("agent_chat"), - schemas("agent_chat_simple"), - schemas("local_ai_status"), - schemas("local_ai_shutdown_owned"), - schemas("local_ai_download"), - schemas("local_ai_download_all_assets"), - schemas("local_ai_summarize"), - schemas("local_ai_prompt"), - schemas("local_ai_vision_prompt"), - schemas("local_ai_embed"), - schemas("local_ai_transcribe"), - schemas("local_ai_transcribe_bytes"), - schemas("local_ai_tts"), - schemas("local_ai_assets_status"), - schemas("local_ai_downloads_progress"), - schemas("local_ai_download_asset"), - schemas("local_ai_device_profile"), - schemas("local_ai_presets"), - schemas("local_ai_apply_preset"), - schemas("local_ai_set_ollama_path"), - schemas("local_ai_diagnostics"), - schemas("local_ai_chat"), - schemas("local_ai_should_react"), - schemas("local_ai_analyze_sentiment"), - schemas("local_ai_should_send_gif"), - schemas("local_ai_tenor_search"), - schemas("local_ai_install_whisper"), - schemas("local_ai_install_piper"), - schemas("local_ai_whisper_install_status"), - schemas("local_ai_piper_install_status"), - ] -} - -pub fn all_registered_controllers() -> Vec { - vec![ - RegisteredController { - schema: schemas("agent_chat"), - handler: handle_agent_chat, - }, - RegisteredController { - schema: schemas("agent_chat_simple"), - handler: handle_agent_chat_simple, - }, - RegisteredController { - schema: schemas("local_ai_status"), - handler: handle_local_ai_status, - }, - RegisteredController { - schema: schemas("local_ai_shutdown_owned"), - handler: handle_local_ai_shutdown_owned, - }, - RegisteredController { - schema: schemas("local_ai_download"), - handler: handle_local_ai_download, - }, - RegisteredController { - schema: schemas("local_ai_download_all_assets"), - handler: handle_local_ai_download_all_assets, - }, - RegisteredController { - schema: schemas("local_ai_summarize"), - handler: handle_local_ai_summarize, - }, - RegisteredController { - schema: schemas("local_ai_prompt"), - handler: handle_local_ai_prompt, - }, - RegisteredController { - schema: schemas("local_ai_vision_prompt"), - handler: handle_local_ai_vision_prompt, - }, - RegisteredController { - schema: schemas("local_ai_embed"), - handler: handle_local_ai_embed, - }, - RegisteredController { - schema: schemas("local_ai_transcribe"), - handler: handle_local_ai_transcribe, - }, - RegisteredController { - schema: schemas("local_ai_transcribe_bytes"), - handler: handle_local_ai_transcribe_bytes, - }, - RegisteredController { - schema: schemas("local_ai_tts"), - handler: handle_local_ai_tts, - }, - RegisteredController { - schema: schemas("local_ai_assets_status"), - handler: handle_local_ai_assets_status, - }, - RegisteredController { - schema: schemas("local_ai_downloads_progress"), - handler: handle_local_ai_downloads_progress, - }, - RegisteredController { - schema: schemas("local_ai_download_asset"), - handler: handle_local_ai_download_asset, - }, - RegisteredController { - schema: schemas("local_ai_device_profile"), - handler: handle_local_ai_device_profile, - }, - RegisteredController { - schema: schemas("local_ai_presets"), - handler: handle_local_ai_presets, - }, - RegisteredController { - schema: schemas("local_ai_apply_preset"), - handler: handle_local_ai_apply_preset, - }, - RegisteredController { - schema: schemas("local_ai_set_ollama_path"), - handler: handle_local_ai_set_ollama_path, - }, - RegisteredController { - schema: schemas("local_ai_diagnostics"), - handler: handle_local_ai_diagnostics, - }, - RegisteredController { - schema: schemas("local_ai_chat"), - handler: handle_local_ai_chat, - }, - RegisteredController { - schema: schemas("local_ai_should_react"), - handler: handle_local_ai_should_react, - }, - RegisteredController { - schema: schemas("local_ai_analyze_sentiment"), - handler: handle_local_ai_analyze_sentiment, - }, - RegisteredController { - schema: schemas("local_ai_should_send_gif"), - handler: handle_local_ai_should_send_gif, - }, - RegisteredController { - schema: schemas("local_ai_tenor_search"), - handler: handle_local_ai_tenor_search, - }, - RegisteredController { - schema: schemas("local_ai_install_whisper"), - handler: handle_local_ai_install_whisper, - }, - RegisteredController { - schema: schemas("local_ai_install_piper"), - handler: handle_local_ai_install_piper, - }, - RegisteredController { - schema: schemas("local_ai_whisper_install_status"), - handler: handle_local_ai_whisper_install_status, - }, - RegisteredController { - schema: schemas("local_ai_piper_install_status"), - handler: handle_local_ai_piper_install_status, - }, - ] -} - -pub fn schemas(function: &str) -> ControllerSchema { - match function { - "agent_chat" => ControllerSchema { - namespace: "local_ai", - function: "agent_chat", - description: "Run one-shot agent chat with optional model overrides.", - inputs: vec![ - required_string("message", "User message."), - optional_string("model_override", "Optional model override."), - optional_f64("temperature", "Optional temperature override."), - ], - outputs: vec![json_output("response", "Agent response payload.")], - }, - "agent_chat_simple" => ControllerSchema { - namespace: "local_ai", - function: "agent_chat_simple", - description: "Run one-shot lightweight provider chat.", - inputs: vec![ - required_string("message", "User message."), - optional_string("model_override", "Optional model override."), - optional_f64("temperature", "Optional temperature override."), - ], - outputs: vec![json_output("response", "Agent response payload.")], - }, - "local_ai_status" => ControllerSchema { - namespace: "local_ai", - function: "status", - description: "Read local AI service status.", - inputs: vec![], - outputs: vec![json_output("status", "Local AI status payload.")], - }, - "local_ai_shutdown_owned" => ControllerSchema { - namespace: "local_ai", - function: "shutdown_owned", - description: - "Gate off the local AI runtime. Kills the Ollama daemon only \ - if OpenHuman spawned it (external daemons are left running). \ - Forces status to \"disabled\" so the UI flips immediately.", - inputs: vec![], - outputs: vec![json_output("status", "Local AI status after shutdown.")], - }, - "local_ai_download" => ControllerSchema { - namespace: "local_ai", - function: "download", - description: "Trigger local AI model download bootstrap.", - inputs: vec![optional_bool("force", "Reset state before download.")], - outputs: vec![json_output("status", "Local AI status payload.")], - }, - "local_ai_download_all_assets" => ControllerSchema { - namespace: "local_ai", - function: "download_all_assets", - description: "Trigger full local AI asset download.", - inputs: vec![optional_bool("force", "Reset state before download.")], - outputs: vec![json_output("progress", "Download progress payload.")], - }, - "local_ai_summarize" => ControllerSchema { - namespace: "local_ai", - function: "summarize", - description: "Summarize text with local AI model.", - inputs: vec![ - required_string("text", "Input text."), - optional_u64("max_tokens", "Optional max output tokens."), - ], - outputs: vec![json_output("summary", "Summary text.")], - }, - "local_ai_prompt" => ControllerSchema { - namespace: "local_ai", - function: "prompt", - description: "Run direct local AI prompt.", - inputs: vec![ - required_string("prompt", "Prompt text."), - optional_u64("max_tokens", "Optional max output tokens."), - optional_bool("no_think", "Disable thinking mode."), - ], - outputs: vec![json_output("output", "Prompt output text.")], - }, - "local_ai_vision_prompt" => ControllerSchema { - namespace: "local_ai", - function: "vision_prompt", - description: "Run multimodal local AI prompt with image refs.", - inputs: vec![ - required_string("prompt", "Prompt text."), - FieldSchema { - name: "image_refs", - ty: TypeSchema::Array(Box::new(TypeSchema::String)), - comment: "Image references to include.", - required: true, - }, - optional_u64("max_tokens", "Optional max output tokens."), - ], - outputs: vec![json_output("output", "Prompt output text.")], - }, - "local_ai_embed" => ControllerSchema { - namespace: "local_ai", - function: "embed", - description: "Generate embeddings for text inputs.", - inputs: vec![FieldSchema { - name: "inputs", - ty: TypeSchema::Array(Box::new(TypeSchema::String)), - comment: "Texts to embed.", - required: true, - }], - outputs: vec![json_output("embedding", "Embedding result payload.")], - }, - "local_ai_transcribe" => ControllerSchema { - namespace: "local_ai", - function: "transcribe", - description: "Transcribe audio from file path.", - inputs: vec![required_string("audio_path", "Input audio path.")], - outputs: vec![json_output("speech", "Transcription payload.")], - }, - "local_ai_transcribe_bytes" => ControllerSchema { - namespace: "local_ai", - function: "transcribe_bytes", - description: "Transcribe audio from raw bytes.", - inputs: vec![ - FieldSchema { - name: "audio_bytes", - ty: TypeSchema::Bytes, - comment: "Raw audio bytes.", - required: true, - }, - optional_string("extension", "Optional audio extension."), - ], - outputs: vec![json_output("speech", "Transcription payload.")], - }, - "local_ai_tts" => ControllerSchema { - namespace: "local_ai", - function: "tts", - description: "Synthesize speech from text.", - inputs: vec![ - required_string("text", "Input text."), - optional_string("output_path", "Optional output path."), - ], - outputs: vec![json_output("tts", "TTS result payload.")], - }, - "local_ai_assets_status" => ControllerSchema { - namespace: "local_ai", - function: "assets_status", - description: "Get local AI asset installation status.", - inputs: vec![], - outputs: vec![json_output("status", "Assets status payload.")], - }, - "local_ai_downloads_progress" => ControllerSchema { - namespace: "local_ai", - function: "downloads_progress", - description: "Get local AI download progress.", - inputs: vec![], - outputs: vec![json_output("progress", "Download progress payload.")], - }, - "local_ai_download_asset" => ControllerSchema { - namespace: "local_ai", - function: "download_asset", - description: "Trigger download for one local AI asset capability.", - inputs: vec![required_string("capability", "Asset capability id.")], - outputs: vec![json_output("status", "Assets status payload.")], - }, - "local_ai_device_profile" => ControllerSchema { - namespace: "local_ai", - function: "device_profile", - description: "Detect local device hardware profile (RAM, CPU, GPU).", - inputs: vec![], - outputs: vec![json_output("profile", "Device hardware profile.")], - }, - "local_ai_presets" => ControllerSchema { - namespace: "local_ai", - function: "presets", - description: "List model tier presets with recommendation and current selection.", - inputs: vec![], - outputs: vec![json_output( - "presets", - "Object containing: presets (array of ModelPreset), recommended_tier (string), \ - current_tier (string), selected_tier (string | null), device (DeviceProfile), \ - recommend_disabled (boolean — true when the device is below the RAM floor and \ - cloud fallback is the recommended default), local_ai_enabled (boolean — mirrors \ - config.local_ai.runtime_enabled so the UI can render the active state when disabled).", - )], - }, - "local_ai_apply_preset" => ControllerSchema { - namespace: "local_ai", - function: "apply_preset", - description: "Apply a model tier preset to local AI config and persist.", - inputs: vec![required_string( - "tier", - "Tier to apply: ram_2_4gb, or disabled to use cloud fallback.", - )], - outputs: vec![json_output("result", "Applied tier status.")], - }, - "local_ai_diagnostics" => ControllerSchema { - namespace: "local_ai", - function: "diagnostics", - description: "Run Ollama diagnostics: check server health, list installed models, verify expected models.", - inputs: vec![], - outputs: vec![json_output("diagnostics", "Diagnostic report.")], - }, - "local_ai_set_ollama_path" => ControllerSchema { - namespace: "local_ai", - function: "set_ollama_path", - description: "Set a custom Ollama binary path, persist to config, and trigger re-bootstrap.", - inputs: vec![required_string("path", "Absolute path to Ollama binary. Empty string to clear.")], - outputs: vec![json_output("result", "Updated status.")], - }, - "local_ai_chat" => ControllerSchema { - namespace: "local_ai", - function: "chat", - description: "Multi-turn chat completion via local Ollama model. Does not call the cloud API.", - inputs: vec![ - FieldSchema { - name: "messages", - ty: TypeSchema::Array(Box::new(TypeSchema::Json)), - comment: "Chat message history [{role, content}]. Last entry is the user turn.", - required: true, - }, - optional_u64("max_tokens", "Optional max output tokens."), - ], - outputs: vec![json_output("reply", "Assistant reply text.")], - }, - "local_ai_should_react" => ControllerSchema { - namespace: "local_ai", - function: "should_react", - description: "Ask the local model whether the assistant should add an emoji reaction to a user message, based on channel type.", - inputs: vec![ - required_string("message", "User message content to evaluate."), - required_string("channel_type", "Channel type: web, telegram, discord, slack, etc."), - ], - outputs: vec![json_output("decision", "Reaction decision: {should_react, emoji}.")], - }, - "local_ai_analyze_sentiment" => ControllerSchema { - namespace: "local_ai", - function: "analyze_sentiment", - description: "Classify the emotion and sentiment of a user message. Returns emotion label, valence, and confidence.", - inputs: vec![ - required_string("message", "User message content to analyze."), - ], - outputs: vec![json_output("sentiment", "Sentiment result: {emotion, valence, confidence}.")], - }, - "local_ai_should_send_gif" => ControllerSchema { - namespace: "local_ai", - function: "should_send_gif", - description: "Ask the local model whether a GIF response is appropriate, and if so return a Tenor search query.", - inputs: vec![ - required_string("message", "User message content to evaluate."), - required_string("channel_type", "Channel type: web, telegram, discord, slack, etc."), - ], - outputs: vec![json_output("decision", "GIF decision: {should_send_gif, search_query}.")], - }, - "local_ai_tenor_search" => ControllerSchema { - namespace: "local_ai", - function: "tenor_search", - description: "Search for GIFs via the backend Tenor proxy. Requires a valid session.", - inputs: vec![ - required_string("query", "Tenor search query."), - optional_u64("limit", "Max results to return (default 5, max 50)."), - ], - outputs: vec![json_output("result", "Tenor search result: {results, next}.")], - }, - "local_ai_install_whisper" => ControllerSchema { - namespace: "local_ai", - function: "install_whisper", - description: "Download whisper.cpp's GGML model (and on Windows the whisper-cli binary) into the workspace so the local STT factory has everything it needs to run.", - inputs: vec![ - optional_string( - "model_size", - "Whisper model size (tiny, base, small, medium, large-v3-turbo). Defaults to large-v3-turbo.", - ), - optional_bool( - "force", - "When true, re-download even if the workspace already has a matching model.", - ), - ], - outputs: vec![json_output("status", "Whisper install status payload.")], - }, - "local_ai_install_piper" => ControllerSchema { - namespace: "local_ai", - function: "install_piper", - description: "Download the Piper binary archive and the bundled en_US-lessac-medium voice files into the workspace.", - inputs: vec![ - optional_string( - "voice_id", - "Piper voice id (e.g. en_US-lessac-medium). Defaults to en_US-lessac-medium.", - ), - optional_bool( - "force", - "When true, re-download even if the workspace already has the voice files.", - ), - ], - outputs: vec![json_output("status", "Piper install status payload.")], - }, - "local_ai_whisper_install_status" => ControllerSchema { - namespace: "local_ai", - function: "whisper_install_status", - description: "Query the Whisper install state (missing / installing / installed / broken / error) plus per-stage download progress.", - inputs: vec![], - outputs: vec![json_output("status", "Whisper install status payload.")], - }, - "local_ai_piper_install_status" => ControllerSchema { - namespace: "local_ai", - function: "piper_install_status", - description: "Query the Piper install state (missing / installing / installed / broken / error) plus per-stage download progress.", - inputs: vec![], - outputs: vec![json_output("status", "Piper install status payload.")], - }, - _ => ControllerSchema { - namespace: "local_ai", - function: "unknown", - description: "Unknown local_ai controller function.", - inputs: vec![], - outputs: vec![FieldSchema { - name: "error", - ty: TypeSchema::String, - comment: "Lookup error details.", - required: true, - }], - }, - } -} - -fn handle_agent_chat(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let mut config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::rpc::agent_chat( - &mut config, - &p.message, - p.model_override, - p.temperature, - ) - .await?, - ) - }) -} - -fn handle_agent_chat_simple(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::rpc::agent_chat_simple( - &config, - &p.message, - p.model_override, - p.temperature, - ) - .await?, - ) - }) -} - -fn handle_local_ai_status(_params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - to_json(crate::openhuman::local_ai::rpc::local_ai_status(&config).await?) - }) -} - -fn handle_local_ai_shutdown_owned(_params: Map) -> ControllerFuture { - Box::pin(async move { - let mut config = config_rpc::load_config_with_timeout().await?; - to_json(crate::openhuman::local_ai::rpc::local_ai_shutdown_owned(&mut config).await?) - }) -} - -fn handle_local_ai_download(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::rpc::local_ai_download(&config, p.force.unwrap_or(false)) - .await?, - ) - }) -} - -fn handle_local_ai_download_all_assets(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::rpc::local_ai_download_all_assets( - &config, - p.force.unwrap_or(false), - ) - .await?, - ) - }) -} - -fn handle_local_ai_summarize(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::rpc::local_ai_summarize(&config, &p.text, p.max_tokens) - .await?, - ) - }) -} - -fn handle_local_ai_prompt(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::rpc::local_ai_prompt( - &config, - &p.prompt, - p.max_tokens, - p.no_think, - ) - .await?, - ) - }) -} - -fn handle_local_ai_vision_prompt(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::rpc::local_ai_vision_prompt( - &config, - &p.prompt, - &p.image_refs, - p.max_tokens, - ) - .await?, - ) - }) -} - -fn handle_local_ai_embed(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json(crate::openhuman::local_ai::rpc::local_ai_embed(&config, &p.inputs).await?) - }) -} - -fn handle_local_ai_transcribe(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::rpc::local_ai_transcribe(&config, p.audio_path.trim()) - .await?, - ) - }) -} - -fn handle_local_ai_transcribe_bytes(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::rpc::local_ai_transcribe_bytes( - &config, - &p.audio_bytes, - p.extension, - ) - .await?, - ) - }) -} - -fn handle_local_ai_tts(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::rpc::local_ai_tts( - &config, - &p.text, - p.output_path.as_deref(), - ) - .await?, - ) - }) -} - -fn handle_local_ai_assets_status(_params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - to_json(crate::openhuman::local_ai::rpc::local_ai_assets_status(&config).await?) - }) -} - -fn handle_local_ai_downloads_progress(_params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - to_json(crate::openhuman::local_ai::rpc::local_ai_downloads_progress(&config).await?) - }) -} - -fn handle_local_ai_download_asset(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::rpc::local_ai_download_asset(&config, p.capability.trim()) - .await?, - ) - }) -} - -fn handle_local_ai_device_profile(_params: Map) -> ControllerFuture { - Box::pin(async move { - tracing::debug!("[local_ai] device_profile: detecting hardware"); - let profile = crate::openhuman::local_ai::device::detect_device_profile(); - tracing::debug!("[local_ai] device_profile: done"); - let value = serde_json::to_value(&profile).map_err(|e| format!("serialize: {e}"))?; - Ok(value) - }) -} - -fn handle_local_ai_presets(_params: Map) -> ControllerFuture { - Box::pin(async move { - tracing::debug!("[local_ai] presets: loading config and computing tiers"); - let config = config_rpc::load_config_with_timeout().await?; - let device = crate::openhuman::local_ai::device::detect_device_profile(); - let recommended = crate::openhuman::local_ai::presets::recommend_tier(&device); - let current = - crate::openhuman::local_ai::presets::current_tier_from_config(&config.local_ai); - let selected_tier = config.local_ai.selected_tier.as_ref().and_then(|value| { - let normalized = value.trim().to_ascii_lowercase(); - crate::openhuman::local_ai::presets::ModelTier::from_str_opt(&normalized) - .map(|tier| tier.as_str().to_string()) - .or_else(|| (!normalized.is_empty()).then_some(normalized)) - }); - let presets = crate::openhuman::local_ai::presets::mvp_presets(); - tracing::debug!( - ?recommended, - ?current, - selected_tier = ?selected_tier, - preset_count = presets.len(), - "[local_ai] presets: returning" - ); - let recommend_disabled = - crate::openhuman::local_ai::presets::should_default_to_cloud_fallback(&device); - let value = serde_json::json!({ - "presets": presets, - "recommended_tier": recommended, - "current_tier": current, - "selected_tier": selected_tier, - "device": device, - "recommend_disabled": recommend_disabled, - "local_ai_enabled": config.local_ai.runtime_enabled, - }); - Ok(value) - }) -} - -fn handle_local_ai_apply_preset(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let tier_str = p.tier.trim().to_ascii_lowercase(); - tracing::debug!(tier = %tier_str, "[local_ai] apply_preset: parsing tier"); - - // Special "disabled" tier: turn local_ai off and route AI to cloud. - if tier_str == "disabled" { - let mut config = config_rpc::load_config_with_timeout().await?; - config.local_ai.runtime_enabled = false; - config.local_ai.selected_tier = Some("disabled".to_string()); - // Explicit opt-out also clears the MVP opt-in marker so bootstrap - // keeps local AI off across restarts. - config.local_ai.opt_in_confirmed = false; - config - .save() - .await - .map_err(|e| format!("save config: {e}"))?; - tracing::debug!("[local_ai] apply_preset: local_ai disabled (cloud fallback)"); - return Ok(serde_json::json!({ - "applied_tier": "disabled", - "local_ai_enabled": false, - })); - } - - let tier = crate::openhuman::local_ai::presets::ModelTier::from_str_opt(&tier_str) - .ok_or_else(|| { - format!( - "invalid tier '{}': expected one of disabled or ram_2_4gb", - tier_str - ) - })?; - - if tier == crate::openhuman::local_ai::presets::ModelTier::Custom { - return Err("cannot apply 'custom' tier; set model IDs directly".to_string()); - } - if !tier.is_mvp_allowed() { - return Err(format!( - "tier '{}' is not available in this build; only the 1B local model preset is supported", - tier_str - )); - } - - let mut config = config_rpc::load_config_with_timeout().await?; - // Re-enable local AI in case it was previously disabled via the - // "disabled" tier, so the user can switch back to local inference. - config.local_ai.runtime_enabled = true; - // Explicit tier selection is the MVP opt-in — flip the marker so - // `config_with_recommended_tier_if_unselected` stops hard-overriding - // to disabled on subsequent boots. - config.local_ai.opt_in_confirmed = true; - crate::openhuman::local_ai::presets::apply_preset_to_config(&mut config.local_ai, tier); - config - .save() - .await - .map_err(|e| format!("save config: {e}"))?; - tracing::debug!(tier = %tier_str, "[local_ai] apply_preset: config saved"); - - Ok(serde_json::json!({ - "applied_tier": tier, - "chat_model_id": config.local_ai.chat_model_id, - "vision_model_id": config.local_ai.vision_model_id, - "embedding_model_id": config.local_ai.embedding_model_id, - "quantization": config.local_ai.quantization, - "vision_mode": crate::openhuman::local_ai::presets::vision_mode_for_config(&config.local_ai), - "local_ai_enabled": true, - })) - }) -} - -fn handle_local_ai_diagnostics(_params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - let service = crate::openhuman::local_ai::global(&config); - service.diagnostics(&config).await - }) -} - -fn handle_local_ai_set_ollama_path(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let path_str = p.path.trim().to_string(); - tracing::debug!(path = %path_str, "[local_ai] set_ollama_path: validating"); - - let new_value = if path_str.is_empty() { - None - } else { - let path = std::path::Path::new(&path_str); - if !path.is_file() { - return Err(format!( - "Ollama binary not found at '{}'. Provide a valid path to the ollama executable.", - path_str - )); - } - Some(path_str.clone()) - }; - - let mut config = config_rpc::load_config_with_timeout().await?; - config.local_ai.ollama_binary_path = new_value.clone(); - config - .save() - .await - .map_err(|e| format!("save config: {e}"))?; - tracing::debug!(path = ?new_value, "[local_ai] set_ollama_path: config saved, triggering re-bootstrap"); - - let service = crate::openhuman::local_ai::global(&config); - service.reset_to_idle(&config); - let service_clone = service.clone(); - let config_clone = config.clone(); - tokio::spawn(async move { - service_clone.bootstrap(&config_clone).await; - }); - - let current_status = - serde_json::to_value(service.status()).map_err(|e| format!("serialize: {e}"))?; - Ok(serde_json::json!({ - "ollama_binary_path": new_value, - "status": current_status, - })) - }) -} - -fn handle_local_ai_should_react(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::rpc::local_ai_should_react( - &config, - &p.message, - &p.channel_type, - ) - .await?, - ) - }) -} - -fn handle_local_ai_analyze_sentiment(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::sentiment::local_ai_analyze_sentiment(&config, &p.message) - .await?, - ) - }) -} - -fn handle_local_ai_should_send_gif(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::gif_decision::local_ai_should_send_gif( - &config, - &p.message, - &p.channel_type, - ) - .await?, - ) - }) -} - -fn handle_local_ai_tenor_search(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - to_json( - crate::openhuman::local_ai::gif_decision::tenor_search(&config, &p.query, p.limit) - .await?, - ) - }) -} - -fn handle_local_ai_chat(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - let messages: Vec = p - .messages - .into_iter() - .map(|m| crate::openhuman::local_ai::rpc::LocalAiChatMessage { - role: m.role, - content: m.content, - }) - .collect(); - to_json( - crate::openhuman::local_ai::rpc::local_ai_chat(&config, messages, p.max_tokens).await?, - ) - }) -} - -// The install RPCs are intentionally fire-and-forget: a binary+model -// download can take minutes (1.6 GB GGML model, ~5 MB Piper binary -// archive) but the core JSON-RPC client times out at -// VITE_CORE_RPC_TIMEOUT_MS (default 30s). Blocking the handler on the -// full download would force the UI into a retry loop that deletes the -// in-flight .part on each retry, looping forever. -// -// Shape: mark the engine as `installing(0%)` in the shared status table, -// spawn the real install on a background tokio task, return the -// just-written status immediately. The UI's status-polling RPC -// (handle_local_ai_*_install_status) reads from the same table and -// renders real-time progress. The eventual `installed` / `error` -// transition lands on the table when the background task finishes; -// no caller awaits it. - -fn handle_local_ai_install_whisper(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - let force = p.force.unwrap_or(false); - - // Atomic install-start guard. A duplicate click while an install - // is already in flight (or a parallel auto-install firing - // alongside a manual click) must be a no-op — not a second - // concurrent download racing on the same `.part` file inside - // `download_to_file`. The previous read_status -> check -> - // write_status sequence was non-atomic and let two callers slip - // through; `try_acquire_install_slot` does the check-and-claim - // under a single mutex acquisition. - let slot = match crate::openhuman::local_ai::voice_install_common::try_acquire_install_slot( - crate::openhuman::local_ai::voice_install_common::ENGINE_WHISPER, - ) { - Some(slot) => slot, - None => { - tracing::debug!( - "[voice-install:whisper] slot already held — returning current status" - ); - let current = crate::openhuman::local_ai::voice_install_common::read_status( - crate::openhuman::local_ai::voice_install_common::ENGINE_WHISPER, - ); - return serde_json::to_value(current) - .map_err(|e| format!("serialize whisper status: {e}")); - } - }; - - // Mark "installing" before the spawn so the very next status poll - // (≤ 2s away) reflects the new state without a stale read. - crate::openhuman::local_ai::voice_install_common::write_status( - crate::openhuman::local_ai::voice_install_common::VoiceInstallStatus { - engine: crate::openhuman::local_ai::voice_install_common::ENGINE_WHISPER - .to_string(), - state: - crate::openhuman::local_ai::voice_install_common::VoiceInstallState::Installing, - progress: Some(0), - downloaded_bytes: None, - total_bytes: None, - stage: Some("queued".to_string()), - error_detail: None, - }, - ); - - tracing::debug!( - model_size = ?p.model_size, - force, - "[voice-install:whisper] spawning background install" - ); - let model_size = p.model_size.clone(); - // Move the slot into the spawned task so it lives for the actual - // install duration (download + extract + validate), not just the - // RPC handler's lifetime. The slot's Drop releases the - // single-writer guard on task exit, including via panic. - tokio::spawn(async move { - let _slot = slot; - if let Err(e) = crate::openhuman::local_ai::install_whisper::install_whisper( - &config, model_size, force, - ) - .await - { - log::warn!("[voice-install:whisper] background install failed: {e}"); - } - }); - - let status = crate::openhuman::local_ai::voice_install_common::read_status( - crate::openhuman::local_ai::voice_install_common::ENGINE_WHISPER, - ); - serde_json::to_value(status).map_err(|e| format!("serialize whisper status: {e}")) - }) -} - -fn handle_local_ai_install_piper(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - let force = p.force.unwrap_or(false); - - // See the whisper handler above for why this is an atomic slot - // acquisition rather than a read_status / write_status pair. - let slot = match crate::openhuman::local_ai::voice_install_common::try_acquire_install_slot( - crate::openhuman::local_ai::voice_install_common::ENGINE_PIPER, - ) { - Some(slot) => slot, - None => { - tracing::debug!( - "[voice-install:piper] slot already held — returning current status" - ); - let current = crate::openhuman::local_ai::voice_install_common::read_status( - crate::openhuman::local_ai::voice_install_common::ENGINE_PIPER, - ); - return serde_json::to_value(current) - .map_err(|e| format!("serialize piper status: {e}")); - } - }; - - crate::openhuman::local_ai::voice_install_common::write_status( - crate::openhuman::local_ai::voice_install_common::VoiceInstallStatus { - engine: crate::openhuman::local_ai::voice_install_common::ENGINE_PIPER.to_string(), - state: - crate::openhuman::local_ai::voice_install_common::VoiceInstallState::Installing, - progress: Some(0), - downloaded_bytes: None, - total_bytes: None, - stage: Some("queued".to_string()), - error_detail: None, - }, - ); - - tracing::debug!( - voice_id = ?p.voice_id, - force, - "[voice-install:piper] spawning background install" - ); - let voice_id = p.voice_id.clone(); - // Move the slot into the spawned task — same rationale as the - // whisper handler. - tokio::spawn(async move { - let _slot = slot; - if let Err(e) = - crate::openhuman::local_ai::install_piper::install_piper(&config, voice_id, force) - .await - { - log::warn!("[voice-install:piper] background install failed: {e}"); - } - }); - - let status = crate::openhuman::local_ai::voice_install_common::read_status( - crate::openhuman::local_ai::voice_install_common::ENGINE_PIPER, - ); - serde_json::to_value(status).map_err(|e| format!("serialize piper status: {e}")) - }) -} - -fn handle_local_ai_whisper_install_status(_params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - let status = crate::openhuman::local_ai::install_whisper::status(&config); - serde_json::to_value(status).map_err(|e| format!("serialize whisper status: {e}")) - }) -} - -fn handle_local_ai_piper_install_status(_params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - let status = crate::openhuman::local_ai::install_piper::status(&config); - serde_json::to_value(status).map_err(|e| format!("serialize piper status: {e}")) - }) -} - -fn deserialize_params(params: Map) -> Result { - serde_json::from_value(Value::Object(params)).map_err(|e| format!("invalid params: {e}")) -} - -fn required_string(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::String, - comment, - required: true, - } -} - -fn optional_string(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Option(Box::new(TypeSchema::String)), - comment, - required: false, - } -} - -fn optional_bool(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), - comment, - required: false, - } -} - -fn optional_f64(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Option(Box::new(TypeSchema::F64)), - comment, - required: false, - } -} - -fn optional_u64(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Option(Box::new(TypeSchema::U64)), - comment, - required: false, - } -} - -fn json_output(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Json, - comment, - required: true, - } -} - -fn to_json(outcome: RpcOutcome) -> Result { - outcome.into_cli_compatible_json() -} - -#[cfg(test)] -#[path = "schemas_tests.rs"] -mod tests; diff --git a/src/openhuman/local_ai/schemas_tests.rs b/src/openhuman/local_ai/schemas_tests.rs deleted file mode 100644 index d898f1f562..0000000000 --- a/src/openhuman/local_ai/schemas_tests.rs +++ /dev/null @@ -1,398 +0,0 @@ -use super::*; - -#[test] -fn catalog_counts_match_and_nonempty() { - let s = all_controller_schemas(); - let h = all_registered_controllers(); - assert_eq!(s.len(), h.len()); - assert!(s.len() >= 20, "local_ai should expose >=20 controller fns"); -} - -#[test] -fn all_schemas_use_local_ai_namespace_and_have_descriptions() { - for s in all_controller_schemas() { - assert_eq!(s.namespace, "local_ai", "function {}", s.function); - assert!(!s.description.is_empty(), "function {} desc", s.function); - assert!(!s.outputs.is_empty(), "function {} outputs", s.function); - } -} - -#[test] -fn unknown_function_returns_unknown_schema() { - let s = schemas("no_such_fn"); - assert_eq!(s.function, "unknown"); - assert_eq!(s.namespace, "local_ai"); -} - -#[test] -fn every_registered_key_resolves_to_non_unknown_schema() { - let keys = [ - "agent_chat", - "agent_chat_simple", - "local_ai_status", - "local_ai_download", - "local_ai_download_all_assets", - "local_ai_summarize", - "local_ai_prompt", - "local_ai_vision_prompt", - "local_ai_embed", - "local_ai_transcribe", - "local_ai_transcribe_bytes", - "local_ai_tts", - "local_ai_assets_status", - "local_ai_downloads_progress", - "local_ai_download_asset", - "local_ai_device_profile", - "local_ai_presets", - "local_ai_apply_preset", - "local_ai_set_ollama_path", - "local_ai_diagnostics", - "local_ai_chat", - "local_ai_should_react", - "local_ai_analyze_sentiment", - "local_ai_should_send_gif", - "local_ai_tenor_search", - ]; - for k in keys { - let s = schemas(k); - assert_eq!(s.namespace, "local_ai"); - assert_ne!(s.function, "unknown", "key `{k}` fell through"); - } -} - -#[test] -fn registered_controllers_all_in_local_ai_namespace() { - for h in all_registered_controllers() { - assert_eq!(h.schema.namespace, "local_ai"); - assert!(!h.schema.function.is_empty()); - } -} - -#[test] -fn field_builder_helpers_are_correct_shape() { - let r = required_string("k", "c"); - assert!(r.required); - assert!(matches!(r.ty, TypeSchema::String)); - - let o = optional_string("k", "c"); - assert!(!o.required); - - let ou = optional_u64("k", "c"); - assert!(!ou.required); - - let j = json_output("result", "c"); - assert!(j.required); - assert!(matches!(j.ty, TypeSchema::Json)); -} - -#[test] -fn to_json_wraps_rpc_outcome() { - let v = - to_json(RpcOutcome::single_log(serde_json::json!({"ok": true}), "l")).expect("serialize"); - assert!(v.get("logs").is_some() || v.get("result").is_some() || v.get("ok").is_some()); -} - -#[test] -fn deserialize_params_parses_valid_object() { - let mut m = Map::new(); - m.insert("message".into(), Value::String("hi".into())); - let p: AgentChatParams = deserialize_params(m).expect("parse"); - assert_eq!(p.message, "hi"); -} - -#[test] -fn deserialize_params_errors_on_invalid_shape() { - let mut m = Map::new(); - m.insert("message".into(), Value::Bool(true)); - let err = deserialize_params::(m).unwrap_err(); - assert!(err.contains("invalid params")); -} - -#[test] -fn prompt_schema_has_inputs() { - let s = schemas("local_ai_prompt"); - assert!(!s.inputs.is_empty()); -} - -#[test] -fn apply_preset_schema_has_inputs() { - let s = schemas("local_ai_apply_preset"); - assert!(!s.inputs.is_empty()); -} - -#[test] -fn download_schema_optional_force_flag() { - let s = schemas("local_ai_download"); - let force = s.inputs.iter().find(|f| f.name == "force"); - assert!(force.is_some_and(|f| !f.required)); -} - -#[test] -fn summarize_schema_requires_text_or_equivalent() { - let s = schemas("local_ai_summarize"); - assert!(s.inputs.iter().any(|f| f.required)); -} - -// ── Handler-level tests that don't need Ollama ──────────────── - -use crate::openhuman::config::TEST_ENV_LOCK as ENV_LOCK; -use tempfile::TempDir; - -#[tokio::test] -async fn handle_device_profile_returns_device_shape() { - let v = handle_local_ai_device_profile(Map::new()) - .await - .expect("ok"); - // device profile exposes at least a few expected fields. - assert!(v.is_object()); -} - -#[tokio::test] -async fn handle_presets_returns_presets_list_and_recommended_tier() { - let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - let tmp = TempDir::new().unwrap(); - unsafe { - std::env::set_var("OPENHUMAN_WORKSPACE", tmp.path()); - } - let v = handle_local_ai_presets(Map::new()).await.expect("ok"); - unsafe { - std::env::remove_var("OPENHUMAN_WORKSPACE"); - } - assert!(v.get("presets").is_some()); - assert!(v.get("recommended_tier").is_some()); - assert!(v.get("device").is_some()); - let presets = v - .get("presets") - .and_then(|value| value.as_array()) - .expect("presets array"); - assert_eq!(presets.len(), 1, "only the 1B preset should be exposed"); - assert_eq!( - presets[0] - .get("chat_model_id") - .and_then(|value| value.as_str()), - Some("gemma3:1b-it-qat") - ); -} - -#[tokio::test] -async fn handle_apply_preset_rejects_invalid_tier() { - let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - let tmp = TempDir::new().unwrap(); - unsafe { - std::env::set_var("OPENHUMAN_WORKSPACE", tmp.path()); - } - let params = Map::from_iter([("tier".to_string(), serde_json::json!("ram_bogus"))]); - let err = handle_local_ai_apply_preset(params).await.unwrap_err(); - unsafe { - std::env::remove_var("OPENHUMAN_WORKSPACE"); - } - assert!(err.contains("invalid tier")); -} - -#[tokio::test] -async fn handle_apply_preset_rejects_custom_tier() { - let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - let tmp = TempDir::new().unwrap(); - unsafe { - std::env::set_var("OPENHUMAN_WORKSPACE", tmp.path()); - } - let params = Map::from_iter([("tier".to_string(), serde_json::json!("custom"))]); - let err = handle_local_ai_apply_preset(params).await.unwrap_err(); - unsafe { - std::env::remove_var("OPENHUMAN_WORKSPACE"); - } - assert!(err.contains("cannot apply 'custom'")); -} - -#[tokio::test] -async fn handle_apply_preset_rejects_unsupported_large_tier() { - let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - let tmp = TempDir::new().unwrap(); - unsafe { - std::env::set_var("OPENHUMAN_WORKSPACE", tmp.path()); - } - let params = Map::from_iter([("tier".to_string(), serde_json::json!("ram_8_16gb"))]); - let err = handle_local_ai_apply_preset(params).await.unwrap_err(); - unsafe { - std::env::remove_var("OPENHUMAN_WORKSPACE"); - } - assert!(err.contains("only the 1B local model preset is supported")); -} - -#[tokio::test] -async fn handle_apply_preset_accepts_valid_tier_and_persists() { - let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - let tmp = TempDir::new().unwrap(); - unsafe { - std::env::set_var("OPENHUMAN_WORKSPACE", tmp.path()); - } - let params = Map::from_iter([("tier".to_string(), serde_json::json!("ram_2_4gb"))]); - let result = handle_local_ai_apply_preset(params) - .await - .expect("apply ok"); - unsafe { - std::env::remove_var("OPENHUMAN_WORKSPACE"); - } - assert!(result.get("applied_tier").is_some()); - assert!(result.get("chat_model_id").is_some()); -} - -#[tokio::test] -async fn handle_set_ollama_path_rejects_nonexistent_path() { - let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - let tmp = TempDir::new().unwrap(); - unsafe { - std::env::set_var("OPENHUMAN_WORKSPACE", tmp.path()); - } - let params = Map::from_iter([( - "path".to_string(), - serde_json::json!("/this/path/should/not/exist/ollama"), - )]); - let err = handle_local_ai_set_ollama_path(params).await.unwrap_err(); - unsafe { - std::env::remove_var("OPENHUMAN_WORKSPACE"); - } - assert!(err.contains("Ollama binary not found")); -} - -#[tokio::test] -async fn handle_set_ollama_path_accepts_empty_string_to_clear() { - let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - let tmp = TempDir::new().unwrap(); - unsafe { - std::env::set_var("OPENHUMAN_WORKSPACE", tmp.path()); - } - let params = Map::from_iter([("path".to_string(), serde_json::json!(""))]); - // Empty path clears the setting — must not error. - let _ = handle_local_ai_set_ollama_path(params).await.expect("ok"); - unsafe { - std::env::remove_var("OPENHUMAN_WORKSPACE"); - } -} - -/// Regression test for the CodeRabbit #7 race on PR #1755: when two -/// concurrent RPC calls (e.g. a double-click, or the auto-install firing -/// alongside a manual click) hit `handle_local_ai_install_whisper` at -/// the same time, only one of them must spawn a real install task. The -/// other must short-circuit and return the in-flight status without -/// starting a second download that would race on the same `.part` file. -/// -/// We exercise the actual handler — not just the slot primitive — so -/// the wiring at the call site is also covered. -#[tokio::test] -async fn install_whisper_handler_serializes_concurrent_calls() { - let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - let tmp = TempDir::new().unwrap(); - unsafe { - std::env::set_var("OPENHUMAN_WORKSPACE", tmp.path()); - } - - // Pre-acquire the install slot from the test so we're guaranteed to - // observe the "already in flight" code path. Holding the slot here - // also means the handler under test will short-circuit immediately - // rather than spawning a real install task that would try to hit - // the network in CI. - let slot = crate::openhuman::local_ai::voice_install_common::try_acquire_install_slot( - crate::openhuman::local_ai::voice_install_common::ENGINE_WHISPER, - ) - .expect("test should be able to claim the slot first"); - - // Mark the status table as `Installing` so the handler's - // short-circuit branch (which reads current status to return) sees - // a coherent snapshot. - crate::openhuman::local_ai::voice_install_common::write_status( - crate::openhuman::local_ai::voice_install_common::VoiceInstallStatus { - engine: crate::openhuman::local_ai::voice_install_common::ENGINE_WHISPER.to_string(), - state: crate::openhuman::local_ai::voice_install_common::VoiceInstallState::Installing, - progress: Some(0), - downloaded_bytes: None, - total_bytes: None, - stage: Some("queued".to_string()), - error_detail: None, - }, - ); - - // Fire two handler calls in parallel. Both must succeed and both - // must return the existing `Installing` status — neither must - // mutate or re-spawn. This is exactly the double-click / auto-fire - // shape described in CodeRabbit #7. - let (r1, r2) = tokio::join!( - handle_local_ai_install_whisper(Map::new()), - handle_local_ai_install_whisper(Map::new()) - ); - - unsafe { - std::env::remove_var("OPENHUMAN_WORKSPACE"); - } - drop(slot); - // Clean up so other tests see Missing. - crate::openhuman::local_ai::voice_install_common::reset_status( - crate::openhuman::local_ai::voice_install_common::ENGINE_WHISPER, - ); - - let v1 = r1.expect("first call ok"); - let v2 = r2.expect("second call ok"); - // Both calls must report the engine is already installing — proving - // the handler short-circuited rather than running the spawn path. - for (label, v) in [("first", &v1), ("second", &v2)] { - let state = v.get("state").and_then(|s| s.as_str()); - assert_eq!( - state, - Some("installing"), - "{label} concurrent call should see Installing, got {v:?}" - ); - } -} - -/// Same regression for Piper. The two handlers share the slot -/// infrastructure but live in separate code paths, so the wiring needs -/// independent coverage. -#[tokio::test] -async fn install_piper_handler_serializes_concurrent_calls() { - let _g = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); - let tmp = TempDir::new().unwrap(); - unsafe { - std::env::set_var("OPENHUMAN_WORKSPACE", tmp.path()); - } - - let slot = crate::openhuman::local_ai::voice_install_common::try_acquire_install_slot( - crate::openhuman::local_ai::voice_install_common::ENGINE_PIPER, - ) - .expect("test should be able to claim the slot first"); - - crate::openhuman::local_ai::voice_install_common::write_status( - crate::openhuman::local_ai::voice_install_common::VoiceInstallStatus { - engine: crate::openhuman::local_ai::voice_install_common::ENGINE_PIPER.to_string(), - state: crate::openhuman::local_ai::voice_install_common::VoiceInstallState::Installing, - progress: Some(0), - downloaded_bytes: None, - total_bytes: None, - stage: Some("queued".to_string()), - error_detail: None, - }, - ); - - let (r1, r2) = tokio::join!( - handle_local_ai_install_piper(Map::new()), - handle_local_ai_install_piper(Map::new()) - ); - - unsafe { - std::env::remove_var("OPENHUMAN_WORKSPACE"); - } - drop(slot); - crate::openhuman::local_ai::voice_install_common::reset_status( - crate::openhuman::local_ai::voice_install_common::ENGINE_PIPER, - ); - - let v1 = r1.expect("first call ok"); - let v2 = r2.expect("second call ok"); - for (label, v) in [("first", &v1), ("second", &v2)] { - let state = v.get("state").and_then(|s| s.as_str()); - assert_eq!( - state, - Some("installing"), - "{label} concurrent call should see Installing, got {v:?}" - ); - } -} diff --git a/src/openhuman/mcp_server/tools.rs b/src/openhuman/mcp_server/tools.rs index e893cefcc8..6dc2263f8b 100644 --- a/src/openhuman/mcp_server/tools.rs +++ b/src/openhuman/mcp_server/tools.rs @@ -4,7 +4,7 @@ use crate::core::all; use crate::openhuman::agent::harness::AgentDefinitionRegistry; use crate::openhuman::agent::Agent; use crate::openhuman::config::rpc as config_rpc; -use crate::openhuman::providers::traits::build_tool_instructions_text; +use crate::openhuman::inference::provider::traits::build_tool_instructions_text; use crate::openhuman::security::{SecurityPolicy, ToolOperation}; const DEFAULT_LIMIT: u64 = 10; diff --git a/src/openhuman/memory/store/factories.rs b/src/openhuman/memory/store/factories.rs index 7f889ed30b..f2d42bc6ae 100644 --- a/src/openhuman/memory/store/factories.rs +++ b/src/openhuman/memory/store/factories.rs @@ -85,12 +85,12 @@ fn reset_health_gate_for_test() { /// Effective Ollama base URL. /// -/// Delegates to [`crate::openhuman::local_ai::ollama_base_url`] so the probe +/// Delegates to [`crate::openhuman::inference::local::ollama_base_url`] so the probe /// always agrees with the rest of the Ollama machinery on the daemon address. /// If a future change adds another env-var override or shifts precedence, the /// memory health-gate picks it up automatically. fn ollama_base_url_for_probe() -> String { - crate::openhuman::local_ai::ollama_base_url() + crate::openhuman::inference::local::ollama_base_url() } /// Canonical `(provider, model, dimensions)` tuple used everywhere the @@ -465,7 +465,7 @@ mod tests { impl EnvGuard { fn set(value: &str) -> Self { - let lock = crate::openhuman::local_ai::local_ai_test_guard(); + let lock = crate::openhuman::inference::local::inference_test_guard(); let prev = std::env::var_os("OPENHUMAN_OLLAMA_BASE_URL"); // SAFETY: env mutation is wrapped because Rust 2024 marks it // unsafe; the call is gated by the local-AI domain mutex so no @@ -709,7 +709,7 @@ mod tests { /// fresh "first", flaking the suppression assertion. #[test] fn ollama_health_gate_reports_at_most_once_per_process() { - let _lock = crate::openhuman::local_ai::local_ai_test_guard(); + let _lock = crate::openhuman::inference::local::inference_test_guard(); reset_health_gate_for_test(); assert!( diff --git a/src/openhuman/memory/tree/chat/cloud.rs b/src/openhuman/memory/tree/chat/cloud.rs index 210c7c55a9..a95a979e00 100644 --- a/src/openhuman/memory/tree/chat/cloud.rs +++ b/src/openhuman/memory/tree/chat/cloud.rs @@ -1,6 +1,6 @@ //! Cloud chat provider — routes through the OpenHuman backend's //! `/openai/v1/chat/completions` surface using the existing -//! [`crate::openhuman::providers::openhuman_backend::OpenHumanBackendProvider`]. +//! [`crate::openhuman::inference::provider::openhuman_backend::OpenHumanBackendProvider`]. //! //! Used when `memory_tree.llm_backend = "cloud"` (the default). The //! request shape is the standard OpenAI-compatible chat-completions @@ -16,9 +16,9 @@ use std::path::PathBuf; use anyhow::{Context, Result}; use async_trait::async_trait; -use crate::openhuman::providers::openhuman_backend::OpenHumanBackendProvider; -use crate::openhuman::providers::traits::{ChatMessage, Provider}; -use crate::openhuman::providers::ProviderRuntimeOptions; +use crate::openhuman::inference::provider::openhuman_backend::OpenHumanBackendProvider; +use crate::openhuman::inference::provider::traits::{ChatMessage, Provider}; +use crate::openhuman::inference::provider::ProviderRuntimeOptions; use super::{ChatPrompt, ChatProvider}; diff --git a/src/openhuman/memory/tree/chat/mod.rs b/src/openhuman/memory/tree/chat/mod.rs index 087648a6bb..0efded0388 100644 --- a/src/openhuman/memory/tree/chat/mod.rs +++ b/src/openhuman/memory/tree/chat/mod.rs @@ -15,7 +15,7 @@ //! //! ## Why a memory-tree-local trait //! -//! The existing top-level [`crate::openhuman::providers::Provider`] trait +//! The existing top-level [`crate::openhuman::inference::provider::Provider`] trait //! is rich (streaming, native tool calling, vision, …) and depends on the //! agent's full conversation surface. The extractor and summariser only //! need: diff --git a/src/openhuman/memory/tree/score/embed/factory.rs b/src/openhuman/memory/tree/score/embed/factory.rs index d570041858..e3fb73a969 100644 --- a/src/openhuman/memory/tree/score/embed/factory.rs +++ b/src/openhuman/memory/tree/score/embed/factory.rs @@ -33,7 +33,7 @@ use anyhow::Result; use super::{CloudEmbedder, Embedder, InertEmbedder, OllamaEmbedder}; use crate::openhuman::config::Config; -use crate::openhuman::local_ai::ollama_base_url; +use crate::openhuman::inference::local::ollama_base_url; /// Cheap heuristic for "is a backend session reachable?" — the cloud /// embedder needs one and bails on first embed call without it. We use diff --git a/src/openhuman/migrations/mod_tests.rs b/src/openhuman/migrations/mod_tests.rs index 3256c2089b..defe5651d2 100644 --- a/src/openhuman/migrations/mod_tests.rs +++ b/src/openhuman/migrations/mod_tests.rs @@ -2,7 +2,7 @@ use super::*; use crate::openhuman::agent::harness::session::transcript::{ read_transcript, write_transcript, TranscriptMeta, }; -use crate::openhuman::providers::ChatMessage; +use crate::openhuman::inference::provider::ChatMessage; use std::fs; use std::path::Path; use tempfile::TempDir; diff --git a/src/openhuman/migrations/phase_out_profile_md_tests.rs b/src/openhuman/migrations/phase_out_profile_md_tests.rs index 48f191e10b..a084cee523 100644 --- a/src/openhuman/migrations/phase_out_profile_md_tests.rs +++ b/src/openhuman/migrations/phase_out_profile_md_tests.rs @@ -2,7 +2,7 @@ use super::*; use crate::openhuman::agent::harness::session::transcript::{ read_transcript, write_transcript, TranscriptMeta, }; -use crate::openhuman::providers::ChatMessage; +use crate::openhuman::inference::provider::ChatMessage; use std::fs; use tempfile::TempDir; diff --git a/src/openhuman/migrations/unify_ai_provider_settings.rs b/src/openhuman/migrations/unify_ai_provider_settings.rs index bb51eba657..a45ef05fe1 100644 --- a/src/openhuman/migrations/unify_ai_provider_settings.rs +++ b/src/openhuman/migrations/unify_ai_provider_settings.rs @@ -14,7 +14,7 @@ //! - `memory_tree.llm_backend` (+ `cloud_llm_model`) — memory summariser //! //! After this migration there is one grammar — provider strings parsed by -//! [`crate::openhuman::providers::factory`] — addressing all eight workloads +//! [`crate::openhuman::inference::provider::factory`] — addressing all eight workloads //! uniformly: //! //! ```text diff --git a/src/openhuman/mod.rs b/src/openhuman/mod.rs index 0cb871a739..6d3d85a8a6 100644 --- a/src/openhuman/mod.rs +++ b/src/openhuman/mod.rs @@ -37,10 +37,10 @@ pub mod encryption; pub mod health; pub mod heartbeat; pub mod http_host; +pub mod inference; pub mod integrations; pub mod javascript; pub mod learning; -pub mod local_ai; pub mod mcp_client; pub mod mcp_server; pub mod meet; @@ -53,7 +53,6 @@ pub mod overlay; pub mod people; pub mod prompt_injection; pub mod provider_surfaces; -pub mod providers; pub mod redirect_links; pub mod referral; pub mod routing; diff --git a/src/openhuman/providers/schemas.rs b/src/openhuman/providers/schemas.rs deleted file mode 100644 index 45feb283fd..0000000000 --- a/src/openhuman/providers/schemas.rs +++ /dev/null @@ -1,207 +0,0 @@ -//! RPC controller schemas for the providers domain. -//! -//! Exposes `openhuman.providers_list_models` — fetches the `/models` endpoint -//! of a configured cloud provider and returns the list. - -use crate::core::all::{ControllerFuture, RegisteredController}; -use crate::core::{ControllerSchema, FieldSchema, TypeSchema}; -use crate::rpc::RpcOutcome; -use serde::{Deserialize, Serialize}; -use serde_json::{Map, Value}; - -// ── Helpers ────────────────────────────────────────────────────────────────── - -fn to_json(outcome: RpcOutcome) -> Result { - outcome.into_cli_compatible_json() -} - -fn deserialize_params Deserialize<'de>>( - params: Map, -) -> Result { - serde_json::from_value(Value::Object(params)).map_err(|e| e.to_string()) -} - -// ── Schema catalog ──────────────────────────────────────────────────────────── - -pub fn all_controller_schemas() -> Vec { - vec![list_models_schema()] -} - -pub fn all_registered_controllers() -> Vec { - vec![RegisteredController { - schema: list_models_schema(), - handler: handle_list_models, - }] -} - -fn list_models_schema() -> ControllerSchema { - ControllerSchema { - namespace: "providers", - function: "list_models", - description: "Fetch the available model list from a configured cloud provider's /models API.", - inputs: vec![ - FieldSchema { - name: "provider_id", - ty: TypeSchema::String, - comment: "Opaque id of the cloud_providers entry to query.", - required: true, - }, - ], - outputs: vec![ - FieldSchema { - name: "models", - ty: TypeSchema::Json, - comment: "Array of { id, owned_by?, context_window? } model descriptors returned by the provider.", - required: true, - }, - ], - } -} - -// ── Request / response types ────────────────────────────────────────────────── - -#[derive(Debug, Deserialize)] -struct ListModelsRequest { - provider_id: String, -} - -#[derive(Debug, Serialize)] -struct ModelInfo { - id: String, - #[serde(skip_serializing_if = "Option::is_none")] - owned_by: Option, - #[serde(skip_serializing_if = "Option::is_none")] - context_window: Option, -} - -// ── Handler ─────────────────────────────────────────────────────────────────── - -fn handle_list_models(params: Map) -> ControllerFuture { - Box::pin(async move { - let req: ListModelsRequest = deserialize_params(params)?; - let provider_id = req.provider_id.trim().to_string(); - - if provider_id.is_empty() { - return Err("provider_id must not be empty".to_string()); - } - - log::debug!("[providers][list_models] provider_id={}", provider_id); - - let config = crate::openhuman::config::Config::load_or_init() - .await - .map_err(|e| e.to_string())?; - - let entry = config - .cloud_providers - .iter() - .find(|e| e.id == provider_id) - .cloned() - .ok_or_else(|| format!("no cloud provider with id '{}' found", provider_id))?; - - // Build the /models URL from the provider's endpoint. - let base = entry.endpoint.trim_end_matches('/'); - let models_url = format!("{}/models", base); - - log::debug!( - "[providers][list_models] fetching url={} slug={}", - models_url, - entry.slug - ); - - // Fetch the API key for this provider. - let api_key = - crate::openhuman::providers::factory::lookup_key_for_slug(&entry.slug, &config) - .unwrap_or_default(); - - // Build the HTTP client (reuse the runtime proxy config). Explicit - // timeouts mirror the other external integrations (composio, - // multimodal) so a slow/unresponsive provider can't hang the panel. - let client = crate::openhuman::config::build_runtime_proxy_client_with_timeouts( - "providers.list_models", - 30, - 10, - ); - - let mut request = client.get(&models_url); - - // Attach auth header per auth_style. - use crate::openhuman::config::schema::cloud_providers::AuthStyle; - request = match entry.auth_style { - AuthStyle::Bearer => { - if !api_key.is_empty() { - request.header("Authorization", format!("Bearer {}", api_key)) - } else { - request - } - } - AuthStyle::Anthropic => { - let mut r = request.header("anthropic-version", "2023-06-01"); - if !api_key.is_empty() { - r = r.header("x-api-key", &api_key); - } - r - } - AuthStyle::OpenhumanJwt | AuthStyle::None => request, - }; - - let response = request - .send() - .await - .map_err(|e| format!("[providers][list_models] HTTP request failed: {}", e))?; - - let status = response.status(); - if !status.is_success() { - let body = response.text().await.unwrap_or_default(); - let truncated = crate::openhuman::util::truncate_with_ellipsis(&body, 300); - return Err(format!( - "provider returned {}: {}", - status.as_u16(), - truncated - )); - } - - let body: Value = response - .json() - .await - .map_err(|e| format!("[providers][list_models] failed to parse JSON: {}", e))?; - - // Parse OpenAI-compatible `{ data: [{ id, owned_by? }] }` or - // Anthropic `{ data: [{ id, display_name }] }`. - let data = body - .get("data") - .and_then(|d| d.as_array()) - .cloned() - .unwrap_or_default(); - - let models: Vec = data - .iter() - .filter_map(|item| { - let id = item.get("id")?.as_str()?.to_string(); - let owned_by = item - .get("owned_by") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - let context_window = item - .get("context_length") - .or_else(|| item.get("context_window")) - .and_then(|v| v.as_u64()); - Some(ModelInfo { - id, - owned_by, - context_window, - }) - }) - .collect(); - - log::info!( - "[providers][list_models] slug={} fetched {} models", - entry.slug, - models.len() - ); - - to_json(RpcOutcome::new( - serde_json::json!({ "models": models }), - vec![format!("fetched {} models", models.len())], - )) - }) -} diff --git a/src/openhuman/routing/factory.rs b/src/openhuman/routing/factory.rs index c03b6c3ecc..07941d65e0 100644 --- a/src/openhuman/routing/factory.rs +++ b/src/openhuman/routing/factory.rs @@ -2,11 +2,11 @@ use std::sync::Arc; use std::time::Duration; use crate::openhuman::config::LocalAiConfig; -use crate::openhuman::local_ai::lm_studio_api::lm_studio_base_url_from_local_ai; -use crate::openhuman::local_ai::ollama_base_url; -use crate::openhuman::local_ai::provider::normalize_provider; -use crate::openhuman::providers::compatible::{AuthStyle, OpenAiCompatibleProvider}; -use crate::openhuman::providers::Provider; +use crate::openhuman::inference::local::lm_studio::lm_studio_base_url_from_local_ai; +use crate::openhuman::inference::local::ollama_base_url; +use crate::openhuman::inference::local::provider::normalize_provider; +use crate::openhuman::inference::provider::compatible::{AuthStyle, OpenAiCompatibleProvider}; +use crate::openhuman::inference::provider::Provider; use super::health::LocalHealthChecker; use super::provider::IntelligentRoutingProvider; @@ -135,7 +135,7 @@ pub fn new_provider( mod tests { use super::*; use crate::openhuman::config::LocalAiConfig; - use crate::openhuman::providers::traits::{ProviderCapabilities, ToolsPayload}; + use crate::openhuman::inference::provider::traits::{ProviderCapabilities, ToolsPayload}; use crate::openhuman::tools::ToolSpec; use async_trait::async_trait; @@ -242,7 +242,7 @@ mod tests { // OPENHUMAN_LOCAL_INFERENCE_URL env var must override config.base_url. // This is tested by ensuring construction succeeds when the env var // is set — a real URL check would require a running server. - let _guard = crate::openhuman::local_ai::local_ai_test_guard(); + let _guard = crate::openhuman::inference::local::inference_test_guard(); unsafe { std::env::set_var("OPENHUMAN_LOCAL_INFERENCE_URL", "http://127.0.0.1:9999/v1"); } diff --git a/src/openhuman/routing/mod.rs b/src/openhuman/routing/mod.rs index ffff8c5771..2fa6fba166 100644 --- a/src/openhuman/routing/mod.rs +++ b/src/openhuman/routing/mod.rs @@ -24,8 +24,8 @@ //! ```rust,ignore //! use std::sync::Arc; //! use crate::openhuman::routing; -//! use crate::openhuman::providers::create_backend_inference_provider; -//! use crate::openhuman::providers::compatible::{AuthStyle, OpenAiCompatibleProvider}; +//! use crate::openhuman::inference::provider::create_backend_inference_provider; +//! use crate::openhuman::inference::provider::compatible::{AuthStyle, OpenAiCompatibleProvider}; //! //! let remote = create_backend_inference_provider(api_url, &opts)?; //! let provider = routing::new_provider(remote, &config.local_ai, &config.default_model); diff --git a/src/openhuman/routing/provider.rs b/src/openhuman/routing/provider.rs index cfbf8043fb..f595d55184 100644 --- a/src/openhuman/routing/provider.rs +++ b/src/openhuman/routing/provider.rs @@ -20,7 +20,7 @@ use async_trait::async_trait; use crate::openhuman::config::{ MODEL_AGENTIC_V1, MODEL_CODING_V1, MODEL_REASONING_QUICK_V1, MODEL_REASONING_V1, }; -use crate::openhuman::providers::traits::{ +use crate::openhuman::inference::provider::traits::{ ChatMessage, ChatRequest, ChatResponse, Provider, ProviderCapabilities, StreamChunk, StreamError, StreamOptions, StreamResult, ToolsPayload, }; diff --git a/src/openhuman/routing/provider_tests.rs b/src/openhuman/routing/provider_tests.rs index 0ceee1039a..90094f30cc 100644 --- a/src/openhuman/routing/provider_tests.rs +++ b/src/openhuman/routing/provider_tests.rs @@ -1,5 +1,5 @@ use super::*; -use crate::openhuman::providers::traits::ProviderCapabilities; +use crate::openhuman::inference::provider::traits::ProviderCapabilities; use crate::openhuman::routing::health::LocalHealthChecker; use crate::openhuman::routing::policy::RoutingHints; use std::sync::{ @@ -463,7 +463,7 @@ async fn capabilities_delegate_to_remote() { #[tokio::test] async fn history_lightweight_uses_local_when_healthy() { - use crate::openhuman::providers::traits::ChatMessage; + use crate::openhuman::inference::provider::traits::ChatMessage; let local = MockProvider::new("local", "local history answer"); let remote = MockProvider::new("remote", "remote answer"); let health = LocalHealthChecker::seeded(true); @@ -487,7 +487,7 @@ async fn history_lightweight_uses_local_when_healthy() { #[tokio::test] async fn history_local_error_falls_back_to_remote() { - use crate::openhuman::providers::traits::ChatMessage; + use crate::openhuman::inference::provider::traits::ChatMessage; let local = MockProvider::new("local", "never"); local.set_fail(true); let remote = MockProvider::new("remote", "remote recovery"); @@ -512,7 +512,7 @@ async fn history_local_error_falls_back_to_remote() { #[tokio::test] async fn history_low_quality_local_falls_back_to_remote() { - use crate::openhuman::providers::traits::ChatMessage; + use crate::openhuman::inference::provider::traits::ChatMessage; // "I cannot help with that." is a known low-quality refusal phrase. let local = MockProvider::new("local", "I cannot help with that."); let remote = MockProvider::new("remote", "proper answer from remote"); @@ -537,7 +537,7 @@ async fn history_low_quality_local_falls_back_to_remote() { #[tokio::test] async fn history_privacy_required_suppresses_fallback_even_on_error() { - use crate::openhuman::providers::traits::ChatMessage; + use crate::openhuman::inference::provider::traits::ChatMessage; let local = MockProvider::new("local", "blocked"); local.set_fail(true); let remote = MockProvider::new("remote", "should not be called"); @@ -567,7 +567,7 @@ async fn history_privacy_required_suppresses_fallback_even_on_error() { #[tokio::test] async fn tools_present_forces_remote_even_when_local_healthy_and_lightweight() { - use crate::openhuman::providers::traits::{ChatMessage, ChatRequest}; + use crate::openhuman::inference::provider::traits::{ChatMessage, ChatRequest}; use crate::openhuman::tools::ToolSpec; let local = MockProvider::new("local", "local answer"); diff --git a/src/openhuman/screen_intelligence/processing_worker.rs b/src/openhuman/screen_intelligence/processing_worker.rs index ef2784b6ae..a088922f5b 100644 --- a/src/openhuman/screen_intelligence/processing_worker.rs +++ b/src/openhuman/screen_intelligence/processing_worker.rs @@ -12,7 +12,7 @@ use std::path::PathBuf; use std::sync::Arc; use crate::openhuman::config::Config; -use crate::openhuman::local_ai; +use crate::openhuman::inference::local as local_ai; use super::helpers::{persist_vision_summary, push_ephemeral_vision_summary, truncate_tail}; use super::state::AccessibilityEngine; diff --git a/src/openhuman/subconscious/executor.rs b/src/openhuman/subconscious/executor.rs index a9e8305207..43a8054773 100644 --- a/src/openhuman/subconscious/executor.rs +++ b/src/openhuman/subconscious/executor.rs @@ -199,17 +199,17 @@ async fn execute_with_local_model( let prompt_text = prompt::build_text_execution_prompt(task, situation_report, identity_context); let messages = vec![ - crate::openhuman::local_ai::ops::LocalAiChatMessage { + crate::openhuman::inference::local::ops::LocalAiChatMessage { role: "system".to_string(), content: prompt_text, }, - crate::openhuman::local_ai::ops::LocalAiChatMessage { + crate::openhuman::inference::local::ops::LocalAiChatMessage { role: "user".to_string(), content: "Execute the task now.".to_string(), }, ]; - let outcome = crate::openhuman::local_ai::ops::local_ai_chat(&config, messages, None) + let outcome = crate::openhuman::inference::ops::inference_chat(&config, messages, None) .await .map_err(|e| format!("local model: {e}"))?; @@ -259,7 +259,8 @@ async fn agent_chat_with_retry( loop { let result = - crate::openhuman::local_ai::ops::agent_chat(config, prompt, None, Some(0.3)).await; + crate::openhuman::inference::local::ops::agent_chat(config, prompt, None, Some(0.3)) + .await; match result { Ok(outcome) => return Ok(outcome.value), diff --git a/src/openhuman/threads/ops.rs b/src/openhuman/threads/ops.rs index 4cc569d902..7bcec8de0c 100644 --- a/src/openhuman/threads/ops.rs +++ b/src/openhuman/threads/ops.rs @@ -2,6 +2,7 @@ use crate::openhuman::channels::providers::web as web_channel; use crate::openhuman::config::Config; +use crate::openhuman::inference::provider::{self, ProviderRuntimeOptions}; use crate::openhuman::memory::conversations::{ self, ConversationMessage, ConversationMessagePatch, ConversationThread, CreateConversationThread, @@ -15,7 +16,6 @@ use crate::openhuman::memory::{ UpdateConversationMessageRequest, UpdateConversationThreadLabelsRequest, UpsertConversationThreadRequest, }; -use crate::openhuman::providers::{self, ProviderRuntimeOptions}; use crate::openhuman::threads::title::{ build_title_prompt, is_auto_generated_thread_title, sanitize_generated_title, title_from_user_message, title_log_fingerprint, THREAD_TITLE_LOG_PREFIX, @@ -322,7 +322,7 @@ pub async fn thread_generate_title( reasoning_enabled: config.runtime.reasoning_enabled, }; - let provider = match providers::create_intelligent_routing_provider( + let provider = match provider::create_intelligent_routing_provider( config.inference_url.as_deref(), config.api_url.as_deref(), config.api_key.as_deref(), diff --git a/src/openhuman/tools/impl/agent/delegate.rs b/src/openhuman/tools/impl/agent/delegate.rs index a122966556..dd43b6479f 100644 --- a/src/openhuman/tools/impl/agent/delegate.rs +++ b/src/openhuman/tools/impl/agent/delegate.rs @@ -1,5 +1,7 @@ use crate::openhuman::config::DelegateAgentConfig; -use crate::openhuman::providers::{self, Provider}; +use crate::openhuman::inference::provider::{ + create_backend_inference_provider, Provider, ProviderRuntimeOptions, INFERENCE_BACKEND_ID, +}; use crate::openhuman::security::policy::ToolOperation; use crate::openhuman::security::SecurityPolicy; use crate::openhuman::tool_timeout::tool_execution_timeout_secs; @@ -18,7 +20,7 @@ pub struct DelegateTool { agents: Arc>, security: Arc, /// Provider runtime options inherited from root config. - provider_runtime_options: providers::ProviderRuntimeOptions, + provider_runtime_options: ProviderRuntimeOptions, /// Depth at which this tool instance lives in the delegation chain. depth: u32, } @@ -28,17 +30,13 @@ impl DelegateTool { agents: HashMap, security: Arc, ) -> Self { - Self::new_with_options( - agents, - security, - providers::ProviderRuntimeOptions::default(), - ) + Self::new_with_options(agents, security, ProviderRuntimeOptions::default()) } pub fn new_with_options( agents: HashMap, security: Arc, - provider_runtime_options: providers::ProviderRuntimeOptions, + provider_runtime_options: ProviderRuntimeOptions, ) -> Self { Self { agents: Arc::new(agents), @@ -56,19 +54,14 @@ impl DelegateTool { security: Arc, depth: u32, ) -> Self { - Self::with_depth_and_options( - agents, - security, - depth, - providers::ProviderRuntimeOptions::default(), - ) + Self::with_depth_and_options(agents, security, depth, ProviderRuntimeOptions::default()) } pub fn with_depth_and_options( agents: HashMap, security: Arc, depth: u32, - provider_runtime_options: providers::ProviderRuntimeOptions, + provider_runtime_options: ProviderRuntimeOptions, ) -> Self { Self { agents: Arc::new(agents), @@ -184,7 +177,7 @@ impl Tool for DelegateTool { return Ok(ToolResult::error(error)); } - let provider: Box = match providers::create_backend_inference_provider( + let provider: Box = match create_backend_inference_provider( None, None, None, @@ -238,8 +231,7 @@ impl Tool for DelegateTool { Ok(ToolResult::success(format!( "[Agent '{agent_name}' ({}/{})]\n{rendered}", - providers::INFERENCE_BACKEND_ID, - agent_config.model + INFERENCE_BACKEND_ID, agent_config.model ))) } Err(e) => Ok(ToolResult::error(format!( diff --git a/src/openhuman/tools/impl/agent/spawn_parallel_agents_test.rs b/src/openhuman/tools/impl/agent/spawn_parallel_agents_test.rs index aa364457d5..eaa228fc1a 100644 --- a/src/openhuman/tools/impl/agent/spawn_parallel_agents_test.rs +++ b/src/openhuman/tools/impl/agent/spawn_parallel_agents_test.rs @@ -5,11 +5,11 @@ use crate::openhuman::agent::harness::fork_context::{with_parent_context, Parent use crate::openhuman::agent::Agent; use crate::openhuman::config::AgentConfig; use crate::openhuman::context::prompt::ToolCallFormat; -use crate::openhuman::memory::{Memory, MemoryCategory, MemoryEntry, NamespaceSummary, RecallOpts}; -use crate::openhuman::providers::traits::ProviderCapabilities; -use crate::openhuman::providers::{ +use crate::openhuman::inference::provider::traits::ProviderCapabilities; +use crate::openhuman::inference::provider::{ ChatRequest, ChatResponse, ConversationMessage, Provider, ToolCall, }; +use crate::openhuman::memory::{Memory, MemoryCategory, MemoryEntry, NamespaceSummary, RecallOpts}; use crate::openhuman::tools::{PermissionLevel, Tool, ToolResult}; use async_trait::async_trait; use parking_lot::Mutex; diff --git a/src/openhuman/tools/impl/agent/spawn_worker_thread.rs b/src/openhuman/tools/impl/agent/spawn_worker_thread.rs index ba7241222d..7e04023331 100644 --- a/src/openhuman/tools/impl/agent/spawn_worker_thread.rs +++ b/src/openhuman/tools/impl/agent/spawn_worker_thread.rs @@ -135,8 +135,9 @@ impl Tool for SpawnWorkerThreadTool { // ── Depth Guard ──────────────────────────────────────────────── // Check if the current thread is already a worker thread. - let current_thread_id = crate::openhuman::providers::thread_context::current_thread_id() - .unwrap_or_else(|| "unknown".to_string()); + let current_thread_id = + crate::openhuman::inference::provider::thread_context::current_thread_id() + .unwrap_or_else(|| "unknown".to_string()); tracing::info!( agent_id = %agent_id, @@ -289,7 +290,7 @@ mod tests { struct MockProvider; #[async_trait] - impl crate::openhuman::providers::Provider for MockProvider { + impl crate::openhuman::inference::provider::Provider for MockProvider { async fn chat_with_system( &self, _: Option<&str>, @@ -301,11 +302,11 @@ mod tests { } async fn chat( &self, - _: crate::openhuman::providers::ChatRequest<'_>, + _: crate::openhuman::inference::provider::ChatRequest<'_>, _: &str, _: f64, - ) -> anyhow::Result { - Ok(crate::openhuman::providers::ChatResponse { + ) -> anyhow::Result { + Ok(crate::openhuman::inference::provider::ChatResponse { text: Some("done".into()), tool_calls: vec![], usage: None, @@ -409,26 +410,29 @@ mod tests { ) .unwrap(); - crate::openhuman::providers::thread_context::with_thread_id(thread_id.to_string(), async { - let parent = test_parent_ctx(temp.path().to_path_buf()); - with_parent_context(parent, async { - let tool = SpawnWorkerThreadTool::new(); - let result = tool - .execute(json!({ - "agent_id": "researcher", - "prompt": "do it", - "task_title": "Task" - })) - .await - .unwrap(); - - assert!(result.is_error); - assert!(result - .output() - .contains("cannot spawn other worker threads")); - }) - .await; - }) + crate::openhuman::inference::provider::thread_context::with_thread_id( + thread_id.to_string(), + async { + let parent = test_parent_ctx(temp.path().to_path_buf()); + with_parent_context(parent, async { + let tool = SpawnWorkerThreadTool::new(); + let result = tool + .execute(json!({ + "agent_id": "researcher", + "prompt": "do it", + "task_title": "Task" + })) + .await + .unwrap(); + + assert!(result.is_error); + assert!(result + .output() + .contains("cannot spawn other worker threads")); + }) + .await; + }, + ) .await; } @@ -448,26 +452,29 @@ mod tests { ) .unwrap(); - crate::openhuman::providers::thread_context::with_thread_id(thread_id.to_string(), async { - let parent = test_parent_ctx(temp.path().to_path_buf()); - with_parent_context(parent, async { - let tool = SpawnWorkerThreadTool::new(); - let result = tool - .execute(json!({ - "agent_id": "researcher", - "prompt": "do it", - "task_title": "Task" - })) - .await - .unwrap(); - - assert!(result.is_error); - assert!(result - .output() - .contains("cannot spawn other worker threads")); - }) - .await; - }) + crate::openhuman::inference::provider::thread_context::with_thread_id( + thread_id.to_string(), + async { + let parent = test_parent_ctx(temp.path().to_path_buf()); + with_parent_context(parent, async { + let tool = SpawnWorkerThreadTool::new(); + let result = tool + .execute(json!({ + "agent_id": "researcher", + "prompt": "do it", + "task_title": "Task" + })) + .await + .unwrap(); + + assert!(result.is_error); + assert!(result + .output() + .contains("cannot spawn other worker threads")); + }) + .await; + }, + ) .await; } } diff --git a/src/openhuman/tools/impl/agent/todo.rs b/src/openhuman/tools/impl/agent/todo.rs index 71f7d174d0..6c29b92d15 100644 --- a/src/openhuman/tools/impl/agent/todo.rs +++ b/src/openhuman/tools/impl/agent/todo.rs @@ -8,7 +8,7 @@ //! rendering so transcripts read cleanly. use crate::openhuman::agent::task_board::{TaskBoardCard, TaskCardStatus}; -use crate::openhuman::providers::thread_context; +use crate::openhuman::inference::provider::thread_context; use crate::openhuman::todos::ops::{self, BoardLocation, CardPatch}; use crate::openhuman::tools::traits::{PermissionLevel, Tool, ToolResult}; use async_trait::async_trait; diff --git a/src/openhuman/tools/ops.rs b/src/openhuman/tools/ops.rs index 22418b76de..5912cdbdea 100644 --- a/src/openhuman/tools/ops.rs +++ b/src/openhuman/tools/ops.rs @@ -338,7 +338,7 @@ pub fn all_tools_with_runtime( tools.push(Box::new(DelegateTool::new_with_options( delegate_agents, security.clone(), - crate::openhuman::providers::ProviderRuntimeOptions { + crate::openhuman::inference::provider::ProviderRuntimeOptions { auth_profile_override: None, openhuman_dir: root_config .config_path diff --git a/src/openhuman/tree_summarizer/engine.rs b/src/openhuman/tree_summarizer/engine.rs index 8398604e55..83ca2425a9 100644 --- a/src/openhuman/tree_summarizer/engine.rs +++ b/src/openhuman/tree_summarizer/engine.rs @@ -7,7 +7,7 @@ use std::collections::BTreeMap; use crate::core::event_bus::{publish_global, DomainEvent}; use crate::openhuman::config::Config; -use crate::openhuman::providers::traits::Provider; +use crate::openhuman::inference::provider::traits::Provider; use crate::openhuman::tree_summarizer::store; use crate::openhuman::tree_summarizer::types::{ derive_node_ids, derive_parent_id, estimate_tokens, level_from_node_id, NodeLevel, TreeNode, diff --git a/src/openhuman/tree_summarizer/ops.rs b/src/openhuman/tree_summarizer/ops.rs index 667c9c1fd2..87ad921296 100644 --- a/src/openhuman/tree_summarizer/ops.rs +++ b/src/openhuman/tree_summarizer/ops.rs @@ -146,7 +146,7 @@ pub async fn tree_summarizer_rebuild( fn create_provider( config: &Config, -) -> Result, String> { +) -> Result, String> { // Tree summarization runs exclusively on local AI to keep memory // processing private and offline — no backend calls. if !config.local_ai.runtime_enabled { @@ -159,10 +159,10 @@ fn create_provider( /// wrapped in `ReliableProvider` for retry/backoff on transient failures. fn create_local_ai_provider( config: &Config, -) -> Result, String> { - use crate::openhuman::local_ai::OLLAMA_BASE_URL; - use crate::openhuman::providers::compatible::{AuthStyle, OpenAiCompatibleProvider}; - use crate::openhuman::providers::reliable::ReliableProvider; +) -> Result, String> { + use crate::openhuman::inference::local::OLLAMA_BASE_URL; + use crate::openhuman::inference::provider::compatible::{AuthStyle, OpenAiCompatibleProvider}; + use crate::openhuman::inference::provider::reliable::ReliableProvider; let base_url = format!("{}/v1", OLLAMA_BASE_URL); let inner = OpenAiCompatibleProvider::new_no_responses_fallback( @@ -174,7 +174,7 @@ fn create_local_ai_provider( let providers: Vec<( String, - Box, + Box, )> = vec![("ollama-local".to_string(), Box::new(inner))]; let reliable = ReliableProvider::new( providers, diff --git a/src/openhuman/voice/mod.rs b/src/openhuman/voice/mod.rs index ffcc043ce1..6d713dcfbe 100644 --- a/src/openhuman/voice/mod.rs +++ b/src/openhuman/voice/mod.rs @@ -3,25 +3,33 @@ //! Provides RPC endpoints under the `openhuman.voice_*` namespace for //! transcription, synthesis, proactive availability checking, and a //! standalone voice dictation server (hotkey → record → transcribe → insert). +//! +//! Inference implementations (local_speech, local_transcribe, cloud_transcribe, +//! hallucination, streaming, postprocess) now live under +//! `crate::openhuman::inference::voice` so all inference concerns share a +//! single domain root. pub mod audio_capture; pub(crate) mod cli; -pub mod cloud_transcribe; pub mod dictation_listener; pub mod factory; -pub mod hallucination; pub mod hotkey; -pub mod local_speech; -pub mod local_transcribe; mod ops; -mod postprocess; pub mod reply_speech; mod schemas; pub mod server; -pub mod streaming; pub mod text_input; mod types; +// Re-export the inference-side voice modules so `voice::local_speech`, +// `voice::local_transcribe`, etc. continue to resolve for existing callers. +pub use crate::openhuman::inference::voice::cloud_transcribe; +pub use crate::openhuman::inference::voice::hallucination; +pub use crate::openhuman::inference::voice::local_speech; +pub use crate::openhuman::inference::voice::local_transcribe; +pub use crate::openhuman::inference::voice::postprocess; +pub use crate::openhuman::inference::voice::streaming; + pub use factory::{ create_stt_provider, create_tts_provider, default_stt_provider, default_tts_provider, SttProvider, SttResult, TtsProvider, DEFAULT_PIPER_VOICE, DEFAULT_WHISPER_MODEL, diff --git a/src/openhuman/voice/ops.rs b/src/openhuman/voice/ops.rs index 5f25e0570f..85e59ea559 100644 --- a/src/openhuman/voice/ops.rs +++ b/src/openhuman/voice/ops.rs @@ -8,12 +8,12 @@ use log::{debug, warn}; use std::time::Instant; use crate::openhuman::config::Config; -use crate::openhuman::local_ai; -use crate::openhuman::local_ai::model_ids; -use crate::openhuman::local_ai::paths::{ +use crate::openhuman::inference::local as local_ai; +use crate::openhuman::inference::local::model_ids; +use crate::openhuman::inference::local::paths::{ resolve_piper_binary, resolve_stt_model_path, resolve_tts_voice_path, resolve_whisper_binary, }; -use crate::openhuman::local_ai::whisper_engine; +use crate::openhuman::inference::local::whisper_engine; use crate::rpc::RpcOutcome; use super::hallucination::{is_hallucinated_output, HallucinationMode}; diff --git a/src/openhuman/voice/types.rs b/src/openhuman/voice/types.rs index 1103090633..b4136647f5 100644 --- a/src/openhuman/voice/types.rs +++ b/src/openhuman/voice/types.rs @@ -2,7 +2,7 @@ use serde::{Deserialize, Serialize}; -use crate::openhuman::local_ai::{LocalAiSpeechResult, LocalAiTtsResult}; +use crate::openhuman::inference::{LocalAiSpeechResult, LocalAiTtsResult}; /// Result of a speech-to-text transcription. #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/tests/agent_builder_public.rs b/tests/agent_builder_public.rs index bb16e0206b..46433459b5 100644 --- a/tests/agent_builder_public.rs +++ b/tests/agent_builder_public.rs @@ -3,8 +3,8 @@ use async_trait::async_trait; use openhuman_core::openhuman::agent::dispatcher::XmlToolDispatcher; use openhuman_core::openhuman::agent::Agent; use openhuman_core::openhuman::context::prompt::SystemPromptBuilder; +use openhuman_core::openhuman::inference::provider::{ChatRequest, ChatResponse, Provider}; use openhuman_core::openhuman::memory::{Memory, MemoryCategory, MemoryEntry}; -use openhuman_core::openhuman::providers::{ChatRequest, ChatResponse, Provider}; use openhuman_core::openhuman::tools::{Tool, ToolResult}; use std::collections::HashSet; use std::sync::Arc; diff --git a/tests/agent_harness_public.rs b/tests/agent_harness_public.rs index 6eb9e21ccc..5e1fd96c3a 100644 --- a/tests/agent_harness_public.rs +++ b/tests/agent_harness_public.rs @@ -7,8 +7,10 @@ use openhuman_core::openhuman::agent::hooks::{ fire_hooks, sanitize_tool_output, PostTurnHook, ToolCallRecord, TurnContext, }; use openhuman_core::openhuman::config::AgentConfig; +use openhuman_core::openhuman::inference::provider::{ + ChatMessage, ChatRequest, ChatResponse, Provider, +}; use openhuman_core::openhuman::memory::{Memory, MemoryCategory, MemoryEntry}; -use openhuman_core::openhuman::providers::{ChatMessage, ChatRequest, ChatResponse, Provider}; use parking_lot::Mutex; use std::sync::atomic::Ordering; use std::sync::Arc; diff --git a/tests/agent_multimodal_public.rs b/tests/agent_multimodal_public.rs index 7d67da8703..5b17ca276f 100644 --- a/tests/agent_multimodal_public.rs +++ b/tests/agent_multimodal_public.rs @@ -4,7 +4,7 @@ use openhuman_core::openhuman::agent::multimodal::{ prepare_messages_for_provider, }; use openhuman_core::openhuman::config::MultimodalConfig; -use openhuman_core::openhuman::providers::ChatMessage; +use openhuman_core::openhuman::inference::provider::ChatMessage; #[test] fn marker_helpers_cover_mixed_content_and_payload_extraction() { diff --git a/tests/calendar_grounding_e2e.rs b/tests/calendar_grounding_e2e.rs index cdd3664b58..20df3900ab 100644 --- a/tests/calendar_grounding_e2e.rs +++ b/tests/calendar_grounding_e2e.rs @@ -2,7 +2,7 @@ use anyhow::Result; use async_trait::async_trait; use openhuman_core::openhuman::agent::dispatcher::NativeToolDispatcher; use openhuman_core::openhuman::agent::Agent; -use openhuman_core::openhuman::providers::{ +use openhuman_core::openhuman::inference::provider::{ ChatMessage, ChatRequest, ChatResponse, Provider, ToolCall, }; use openhuman_core::openhuman::tools::{PermissionLevel, Tool, ToolResult}; diff --git a/tests/inference_provider_e2e.rs b/tests/inference_provider_e2e.rs new file mode 100644 index 0000000000..7aadc3a0ae --- /dev/null +++ b/tests/inference_provider_e2e.rs @@ -0,0 +1,569 @@ +//! Inference provider end-to-end tests using wiremock. +//! +//! These tests spin up a wiremock HTTP server on a random port and verify +//! that `OpenAiCompatibleProvider` sends correct request bodies and correctly +//! interprets responses for the major provider shapes (OpenAI-compat, +//! Anthropic auth, streaming, temperature suppression, Ollama endpoint). +//! +//! The `/v1/chat/completions` and `/v1/models` HTTP endpoint tests verify the +//! full axum router layer (auth middleware + provider routing) end-to-end. +//! +//! No live LLM API calls are made. + +use std::sync::{Mutex, OnceLock}; + +use axum::body::Body; +use axum::http::{header, Method, Request, StatusCode}; +use serde_json::{json, Value}; +use tempfile::tempdir; +use tower::ServiceExt; +use wiremock::matchers::{header as wm_header, method, path}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +use openhuman_core::core::auth::{init_rpc_token, CORE_TOKEN_ENV_VAR}; +use openhuman_core::core::jsonrpc::build_core_http_router; +use openhuman_core::openhuman::inference::provider::compatible::{ + AuthStyle, OpenAiCompatibleProvider, +}; +use openhuman_core::openhuman::inference::provider::traits::{ChatMessage, Provider}; + +// ── Environment serialisation lock ─────────────────────────────────────────── +// +// Tests that mutate OPENHUMAN_WORKSPACE or OPENHUMAN_CORE_TOKEN must acquire +// this lock first to prevent races when cargo runs tests in parallel threads +// within the same process. + +static ENV_LOCK: OnceLock> = OnceLock::new(); +static RPC_AUTH_INIT: OnceLock<()> = OnceLock::new(); + +fn env_lock() -> std::sync::MutexGuard<'static, ()> { + let m = ENV_LOCK.get_or_init(|| Mutex::new(())); + match m.lock() { + Ok(g) => g, + Err(p) => p.into_inner(), + } +} + +const TEST_RPC_TOKEN: &str = "inference-provider-e2e-token"; + +fn ensure_rpc_auth() { + RPC_AUTH_INIT.get_or_init(|| { + // SAFETY: test-only, serialised by OnceLock. + unsafe { std::env::set_var(CORE_TOKEN_ENV_VAR, TEST_RPC_TOKEN) }; + let tmp = tempdir().expect("tempdir"); + init_rpc_token(tmp.path()).expect("init rpc auth token"); + // Keep tmp alive for the process duration by leaking it — the token + // file must remain readable for all subsequent auth checks. + std::mem::forget(tmp); + }); +} + +// ── Canned OpenAI-compatible response body ──────────────────────────────────── + +fn openai_chat_response(content: &str) -> Value { + json!({ + "id": "chatcmpl-test", + "object": "chat.completion", + "created": 1_700_000_000_u64, + "model": "gpt-4o-mini", + "choices": [{ + "index": 0, + "message": { "role": "assistant", "content": content }, + "finish_reason": "stop" + }], + "usage": { "prompt_tokens": 5, "completion_tokens": 10, "total_tokens": 15 } + }) +} + +// ── Helper: build an env-isolated Config pointing at tempdir ───────────────── + +/// Sets OPENHUMAN_WORKSPACE to `dir` and returns an `EnvVarGuard` that +/// restores the previous value on drop. Must be called under `env_lock()`. +struct EnvGuard { + key: &'static str, + prev: Option, +} + +impl EnvGuard { + fn set(key: &'static str, val: &str) -> Self { + let prev = std::env::var(key).ok(); + // SAFETY: caller holds env_lock(). + unsafe { std::env::set_var(key, val) }; + Self { key, prev } + } +} + +impl Drop for EnvGuard { + fn drop(&mut self) { + match &self.prev { + // SAFETY: caller's env_lock guard is still alive during drop. + Some(v) => unsafe { std::env::set_var(self.key, v) }, + None => unsafe { std::env::remove_var(self.key) }, + } + } +} + +// ── Test 1: OpenAI-compat chat returns canned text ─────────────────────────── + +#[tokio::test] +async fn openai_compat_chat_returns_canned_text() { + let server = MockServer::start().await; + + Mock::given(method("POST")) + .and(path("/v1/chat/completions")) + .respond_with(ResponseTemplate::new(200).set_body_json(openai_chat_response("Hello!"))) + .mount(&server) + .await; + + let provider = OpenAiCompatibleProvider::new( + "test", + &format!("{}/v1", server.uri()), + Some("test-key"), + AuthStyle::Bearer, + ); + + let messages = vec![ChatMessage::user("hi")]; + let result = provider + .chat_with_history(&messages, "gpt-4o-mini", 0.7) + .await + .expect("chat_with_history should succeed"); + + assert_eq!(result, "Hello!"); +} + +// ── Test 2: Temperature present for normal model ────────────────────────────── + +#[tokio::test] +async fn openai_compat_temperature_present_for_normal_model() { + let server = MockServer::start().await; + + Mock::given(method("POST")) + .and(path("/v1/chat/completions")) + .respond_with(ResponseTemplate::new(200).set_body_json(openai_chat_response("ok"))) + .mount(&server) + .await; + + let provider = OpenAiCompatibleProvider::new( + "test", + &format!("{}/v1", server.uri()), + Some("key"), + AuthStyle::Bearer, + ); + + provider + .chat_with_history(&[ChatMessage::user("hi")], "gpt-4o-mini", 0.7) + .await + .expect("should succeed"); + + let requests = server.received_requests().await.unwrap(); + assert_eq!(requests.len(), 1); + let body: Value = serde_json::from_slice(&requests[0].body).unwrap(); + assert!( + body.get("temperature").is_some(), + "temperature should be present for gpt-4o-mini; body={body}" + ); + assert_eq!(body["temperature"].as_f64().unwrap(), 0.7); +} + +// ── Test 3: Temperature omitted for o1 models ──────────────────────────────── + +#[tokio::test] +async fn openai_compat_omits_temperature_for_o1_models() { + let server = MockServer::start().await; + + Mock::given(method("POST")) + .and(path("/v1/chat/completions")) + .respond_with(ResponseTemplate::new(200).set_body_json(openai_chat_response("done"))) + .mount(&server) + .await; + + let provider = OpenAiCompatibleProvider::new( + "test", + &format!("{}/v1", server.uri()), + Some("key"), + AuthStyle::Bearer, + ) + .with_temperature_unsupported_models(vec!["o1*".to_string()]); + + provider + .chat_with_history(&[ChatMessage::user("reason")], "o1-preview", 0.7) + .await + .expect("should succeed"); + + let requests = server.received_requests().await.unwrap(); + assert_eq!(requests.len(), 1); + let body: Value = serde_json::from_slice(&requests[0].body).unwrap(); + assert!( + body.get("temperature").is_none(), + "temperature must be absent for o1-preview; body={body}" + ); + // Response should still be returned correctly. +} + +// ── Test 4: Temperature omitted for gpt-5 models ───────────────────────────── + +#[tokio::test] +async fn openai_compat_omits_temperature_for_gpt5_models() { + let server = MockServer::start().await; + + Mock::given(method("POST")) + .and(path("/v1/chat/completions")) + .respond_with(ResponseTemplate::new(200).set_body_json(openai_chat_response("done"))) + .mount(&server) + .await; + + let provider = OpenAiCompatibleProvider::new( + "test", + &format!("{}/v1", server.uri()), + Some("key"), + AuthStyle::Bearer, + ) + .with_temperature_unsupported_models(vec![ + "o1*".to_string(), + "o3*".to_string(), + "o4*".to_string(), + "gpt-5*".to_string(), + ]); + + for model in &["gpt-5", "gpt-5-turbo", "o3-mini", "o4-preview"] { + server.reset().await; + Mock::given(method("POST")) + .and(path("/v1/chat/completions")) + .respond_with(ResponseTemplate::new(200).set_body_json(openai_chat_response("done"))) + .mount(&server) + .await; + + provider + .chat_with_history(&[ChatMessage::user("test")], model, 0.7) + .await + .expect("should succeed"); + + let requests = server.received_requests().await.unwrap(); + assert_eq!(requests.len(), 1, "model={model}"); + let body: Value = serde_json::from_slice(&requests[0].body).unwrap(); + assert!( + body.get("temperature").is_none(), + "temperature must be absent for model={model}; body={body}" + ); + } +} + +// ── Test 5: Anthropic auth style ───────────────────────────────────────────── + +#[tokio::test] +async fn openai_compat_anthropic_auth_uses_x_api_key_header() { + let server = MockServer::start().await; + + Mock::given(method("POST")) + .and(path("/v1/chat/completions")) + .and(wm_header("x-api-key", "sk-ant-test")) + .and(wm_header("anthropic-version", "2023-06-01")) + .respond_with(ResponseTemplate::new(200).set_body_json(openai_chat_response("hi"))) + .mount(&server) + .await; + + let provider = OpenAiCompatibleProvider::new( + "anthropic", + &format!("{}/v1", server.uri()), + Some("sk-ant-test"), + AuthStyle::Anthropic, + ); + + let result = provider + .chat_with_history(&[ChatMessage::user("hello")], "claude-3-haiku", 0.5) + .await + .expect("Anthropic auth chat should succeed"); + + assert_eq!(result, "hi"); + + // Verify Bearer header was NOT sent. + let requests = server.received_requests().await.unwrap(); + assert_eq!(requests.len(), 1); + let auth = requests[0].headers.get("authorization"); + assert!( + auth.is_none(), + "Authorization header must NOT be set for Anthropic auth; found {:?}", + auth + ); +} + +// ── Test 6: Streaming response returns ordered deltas ──────────────────────── + +#[tokio::test] +async fn openai_compat_streaming_returns_ordered_deltas() { + let server = MockServer::start().await; + + let sse_body = concat!( + "data: {\"id\":\"x\",\"choices\":[{\"delta\":{\"role\":\"assistant\",\"content\":\"Hel\"},\"finish_reason\":null}]}\n\n", + "data: {\"id\":\"x\",\"choices\":[{\"delta\":{\"content\":\"lo\"},\"finish_reason\":null}]}\n\n", + "data: {\"id\":\"x\",\"choices\":[{\"delta\":{\"content\":\"!\"},\"finish_reason\":\"stop\"}]}\n\n", + "data: [DONE]\n\n", + ); + + Mock::given(method("POST")) + .and(path("/v1/chat/completions")) + .respond_with( + ResponseTemplate::new(200) + .insert_header("content-type", "text/event-stream") + .set_body_string(sse_body), + ) + .mount(&server) + .await; + + let provider = OpenAiCompatibleProvider::new( + "test", + &format!("{}/v1", server.uri()), + Some("key"), + AuthStyle::Bearer, + ); + + // stream_chat_with_system is the implemented streaming method on this provider. + let options = openhuman_core::openhuman::inference::provider::traits::StreamOptions::new(true); + use futures_util::StreamExt; + let mut stream = provider.stream_chat_with_system( + Some("You are helpful."), + "Say Hello!", + "gpt-4o-mini", + 0.7, + options, + ); + + let mut deltas = Vec::new(); + while let Some(result) = stream.next().await { + let chunk = result.expect("stream chunk should be Ok"); + if !chunk.delta.is_empty() { + deltas.push(chunk.delta); + } + } + + let combined = deltas.join(""); + assert_eq!( + combined, "Hello!", + "combined stream deltas should equal 'Hello!'; got '{combined}'" + ); +} + +// ── Test 7: Ollama endpoint shape ──────────────────────────────────────────── + +#[tokio::test] +async fn ollama_compat_chat_via_openai_v1_endpoint() { + let server = MockServer::start().await; + + // Ollama via OpenAI-compat /v1 endpoint — wiremock pretends to be Ollama. + Mock::given(method("POST")) + .and(path("/v1/chat/completions")) + .respond_with(ResponseTemplate::new(200).set_body_json(openai_chat_response("Bonjour!"))) + .mount(&server) + .await; + + // Factory builds Ollama provider via OpenAiCompatibleProvider at /v1. + let base = server.uri(); + let endpoint = format!("{}/v1", base.trim_end_matches('/')); + let provider = OpenAiCompatibleProvider::new("ollama", &endpoint, None, AuthStyle::None); + + let result = provider + .chat_with_history(&[ChatMessage::user("Bonjour?")], "llama3", 0.7) + .await + .expect("Ollama compat chat should succeed"); + + assert_eq!(result, "Bonjour!"); +} + +// ── Test 8: /v1/chat/completions HTTP endpoint — unauthorized ───────────────── + +#[tokio::test] +async fn http_endpoint_chat_completions_no_bearer_returns_401() { + let _lock = env_lock(); + ensure_rpc_auth(); + + let body = json!({ + "model": "ollama:llama3", + "messages": [{ "role": "user", "content": "hello" }] + }); + let req = Request::builder() + .method(Method::POST) + .uri("/v1/chat/completions") + .header(header::CONTENT_TYPE, "application/json") + .body(Body::from(serde_json::to_string(&body).unwrap())) + .unwrap(); + + let resp = build_core_http_router(false).oneshot(req).await.unwrap(); + assert_eq!(resp.status(), StatusCode::UNAUTHORIZED); +} + +// ── Test 9: /v1/models — unauthorized ──────────────────────────────────────── + +#[tokio::test] +async fn http_endpoint_models_no_bearer_returns_401() { + let _lock = env_lock(); + ensure_rpc_auth(); + + let req = Request::builder() + .method(Method::GET) + .uri("/v1/models") + .body(Body::empty()) + .unwrap(); + + let resp = build_core_http_router(false).oneshot(req).await.unwrap(); + assert_eq!(resp.status(), StatusCode::UNAUTHORIZED); +} + +// ── Test 10: /v1/models with bearer returns non-empty list ─────────────────── + +#[tokio::test] +async fn http_endpoint_models_with_bearer_returns_model_list() { + let _lock = env_lock(); + ensure_rpc_auth(); + + let tmp = tempdir().expect("tempdir"); + let _workspace_guard = EnvGuard::set("OPENHUMAN_WORKSPACE", tmp.path().to_str().unwrap()); + + let req = Request::builder() + .method(Method::GET) + .uri("/v1/models") + .header(header::AUTHORIZATION, format!("Bearer {TEST_RPC_TOKEN}")) + .body(Body::empty()) + .unwrap(); + + let resp = build_core_http_router(false).oneshot(req).await.unwrap(); + assert_ne!( + resp.status(), + StatusCode::UNAUTHORIZED, + "401 must not fire when bearer is present" + ); + assert_ne!( + resp.status(), + StatusCode::FORBIDDEN, + "403 must not fire when bearer is present" + ); + + if resp.status().is_success() { + let body = axum::body::to_bytes(resp.into_body(), usize::MAX) + .await + .unwrap(); + let json: Value = serde_json::from_slice(&body).unwrap(); + let models = json.get("data").and_then(Value::as_array); + if let Some(list) = models { + assert!( + !list.is_empty(), + "/v1/models should return at least one model" + ); + } + } +} + +// ── Test 11: /v1/chat/completions with bearer passes auth ──────────────────── + +#[tokio::test] +async fn http_endpoint_chat_completions_with_bearer_passes_auth() { + let _lock = env_lock(); + ensure_rpc_auth(); + + let body = json!({ + "model": "ollama:llama3", + "messages": [{ "role": "user", "content": "ping" }], + "stream": false + }); + let req = Request::builder() + .method(Method::POST) + .uri("/v1/chat/completions") + .header(header::CONTENT_TYPE, "application/json") + .header(header::AUTHORIZATION, format!("Bearer {TEST_RPC_TOKEN}")) + .body(Body::from(serde_json::to_string(&body).unwrap())) + .unwrap(); + + let resp = build_core_http_router(false).oneshot(req).await.unwrap(); + assert_ne!( + resp.status(), + StatusCode::UNAUTHORIZED, + "401 must not fire when bearer is present" + ); + assert_ne!( + resp.status(), + StatusCode::FORBIDDEN, + "403 must not fire when bearer is present" + ); +} + +// ── Test 12: Request model field is preserved ───────────────────────────────── + +#[tokio::test] +async fn openai_compat_request_body_contains_correct_model() { + let server = MockServer::start().await; + + Mock::given(method("POST")) + .and(path("/v1/chat/completions")) + .respond_with(ResponseTemplate::new(200).set_body_json(openai_chat_response("ok"))) + .mount(&server) + .await; + + let provider = OpenAiCompatibleProvider::new( + "test", + &format!("{}/v1", server.uri()), + Some("key"), + AuthStyle::Bearer, + ); + + provider + .chat_with_history(&[ChatMessage::user("hi")], "claude-3-sonnet", 0.5) + .await + .expect("should succeed"); + + let requests = server.received_requests().await.unwrap(); + let body: Value = serde_json::from_slice(&requests[0].body).unwrap(); + assert_eq!(body["model"].as_str().unwrap(), "claude-3-sonnet"); +} + +// ── Test 13: Bearer token is sent in Authorization header ──────────────────── + +#[tokio::test] +async fn openai_compat_bearer_auth_sends_authorization_header() { + let server = MockServer::start().await; + + Mock::given(method("POST")) + .and(path("/v1/chat/completions")) + .and(wm_header("authorization", "Bearer secret-key")) + .respond_with(ResponseTemplate::new(200).set_body_json(openai_chat_response("ok"))) + .mount(&server) + .await; + + let provider = OpenAiCompatibleProvider::new( + "test", + &format!("{}/v1", server.uri()), + Some("secret-key"), + AuthStyle::Bearer, + ); + + let result = provider + .chat_with_history(&[ChatMessage::user("hi")], "gpt-4o", 0.7) + .await + .expect("should succeed"); + + assert_eq!(result, "ok"); +} + +// ── Test 14: temperature_for_model helper ──────────────────────────────────── + +#[test] +fn temperature_helper_suppresses_o1_by_default_config() { + use openhuman_core::openhuman::config::Config; + use openhuman_core::openhuman::inference::provider::temperature::temperature_for_model; + + let config = Config::default(); + + // Normal model → temperature returned + assert_eq!( + temperature_for_model("gpt-4o-mini", 0.7, &config), + Some(0.7) + ); + assert_eq!( + temperature_for_model("claude-3-sonnet", 0.5, &config), + Some(0.5) + ); + + // o1/o3/o4/gpt-5 → temperature suppressed + assert_eq!(temperature_for_model("o1-preview", 0.7, &config), None); + assert_eq!(temperature_for_model("o3-mini", 0.7, &config), None); + assert_eq!(temperature_for_model("o4-turbo", 0.7, &config), None); + assert_eq!(temperature_for_model("gpt-5-turbo", 0.7, &config), None); +} diff --git a/tests/json_rpc_e2e.rs b/tests/json_rpc_e2e.rs index e1c3ccde7b..d1a82d1058 100644 --- a/tests/json_rpc_e2e.rs +++ b/tests/json_rpc_e2e.rs @@ -2247,9 +2247,11 @@ async fn json_rpc_web_chat_custom_reasoning_provider_with_auth_none_omits_auth_h let loaded_config = openhuman_core::openhuman::config::load_config_with_timeout() .await .expect("load_config after auth-none update"); - let (provider, model) = - openhuman_core::openhuman::providers::create_chat_provider("reasoning", &loaded_config) - .expect("custom auth-none provider should build"); + let (provider, model) = openhuman_core::openhuman::inference::provider::create_chat_provider( + "reasoning", + &loaded_config, + ) + .expect("custom auth-none provider should build"); let direct = provider .simple_chat("direct custom-provider smoke test", &model, 0.0) .await @@ -3353,13 +3355,14 @@ async fn json_rpc_local_ai_device_profile_and_presets() { let profile = post_json_rpc( &rpc_base, 30, - "openhuman.local_ai_device_profile", + "openhuman.inference_device_profile", json!({}), ) .await; let profile_result = assert_no_jsonrpc_error(&profile, "device_profile"); + let profile_payload = profile_result.get("result").unwrap_or(profile_result); assert!( - profile_result + profile_payload .get("total_ram_bytes") .and_then(Value::as_u64) .unwrap_or(0) @@ -3367,7 +3370,7 @@ async fn json_rpc_local_ai_device_profile_and_presets() { "expected positive RAM: {profile_result}" ); assert!( - profile_result + profile_payload .get("cpu_count") .and_then(Value::as_u64) .unwrap_or(0) @@ -3376,9 +3379,10 @@ async fn json_rpc_local_ai_device_profile_and_presets() { ); // --- presets --- - let presets = post_json_rpc(&rpc_base, 31, "openhuman.local_ai_presets", json!({})).await; + let presets = post_json_rpc(&rpc_base, 31, "openhuman.inference_presets", json!({})).await; let presets_result = assert_no_jsonrpc_error(&presets, "presets"); - let presets_arr = presets_result + let presets_payload = presets_result.get("result").unwrap_or(presets_result); + let presets_arr = presets_payload .get("presets") .and_then(Value::as_array) .expect("presets should be an array"); @@ -3393,7 +3397,7 @@ async fn json_rpc_local_ai_device_profile_and_presets() { "only the ram_2_4gb (1B) preset should be exposed: {presets_result}" ); - let recommended = presets_result + let recommended = presets_payload .get("recommended_tier") .and_then(Value::as_str) .expect("should have recommended_tier"); @@ -3402,7 +3406,7 @@ async fn json_rpc_local_ai_device_profile_and_presets() { "MVP recommends the only allowed tier: {recommended}" ); - let current = presets_result + let current = presets_payload .get("current_tier") .and_then(Value::as_str) .expect("should have current_tier"); @@ -3416,29 +3420,34 @@ async fn json_rpc_local_ai_device_profile_and_presets() { let apply = post_json_rpc( &rpc_base, 32, - "openhuman.local_ai_apply_preset", + "openhuman.inference_apply_preset", json!({"tier": "ram_2_4gb"}), ) .await; let apply_result = assert_no_jsonrpc_error(&apply, "apply_preset"); + let apply_payload = apply_result.get("result").unwrap_or(apply_result); assert_eq!( - apply_result.get("applied_tier").and_then(Value::as_str), + apply_payload.get("applied_tier").and_then(Value::as_str), Some("ram_2_4gb") ); assert_eq!( - apply_result.get("chat_model_id").and_then(Value::as_str), + apply_payload.get("chat_model_id").and_then(Value::as_str), Some("gemma3:1b-it-qat") ); assert_eq!( - apply_result.get("vision_mode").and_then(Value::as_str), + apply_payload.get("vision_mode").and_then(Value::as_str), Some("disabled") ); // --- verify presets reflects the change --- - let presets_after = post_json_rpc(&rpc_base, 33, "openhuman.local_ai_presets", json!({})).await; + let presets_after = + post_json_rpc(&rpc_base, 33, "openhuman.inference_presets", json!({})).await; let presets_after_result = assert_no_jsonrpc_error(&presets_after, "presets_after"); + let presets_after_payload = presets_after_result + .get("result") + .unwrap_or(presets_after_result); assert_eq!( - presets_after_result + presets_after_payload .get("current_tier") .and_then(Value::as_str), Some("ram_2_4gb"), @@ -3449,7 +3458,7 @@ async fn json_rpc_local_ai_device_profile_and_presets() { let bad_apply = post_json_rpc( &rpc_base, 34, - "openhuman.local_ai_apply_preset", + "openhuman.inference_apply_preset", json!({"tier": "ultra"}), ) .await; @@ -3540,7 +3549,7 @@ async fn json_rpc_local_ai_lm_studio_config_diagnostics_and_prompt() { let update = post_json_rpc( &rpc_base, 36, - "openhuman.config_update_local_ai_settings", + "openhuman.inference_update_local_settings", json!({ "runtime_enabled": true, "opt_in_confirmed": true, @@ -3572,7 +3581,7 @@ async fn json_rpc_local_ai_lm_studio_config_diagnostics_and_prompt() { ); let diagnostics = - post_json_rpc(&rpc_base, 37, "openhuman.local_ai_diagnostics", json!({})).await; + post_json_rpc(&rpc_base, 37, "openhuman.inference_diagnostics", json!({})).await; let diagnostics_result = assert_no_jsonrpc_error(&diagnostics, "lm_studio_diagnostics"); assert_eq!( diagnostics_result.get("provider").and_then(Value::as_str), @@ -3595,7 +3604,7 @@ async fn json_rpc_local_ai_lm_studio_config_diagnostics_and_prompt() { let prompt = post_json_rpc( &rpc_base, 38, - "openhuman.local_ai_prompt", + "openhuman.inference_prompt", json!({ "prompt": "hello", "max_tokens": 16, @@ -3614,6 +3623,238 @@ async fn json_rpc_local_ai_lm_studio_config_diagnostics_and_prompt() { rpc_join.abort(); } +#[tokio::test] +async fn json_rpc_inference_namespace_lm_studio_prompt_and_status() { + let _env_lock = json_rpc_e2e_env_lock(); + let tmp = tempdir().expect("tempdir"); + let home = tmp.path(); + let openhuman_home = home.join(".openhuman"); + + let _home_guard = EnvVarGuard::set_to_path("HOME", home); + let _workspace_guard = EnvVarGuard::unset("OPENHUMAN_WORKSPACE"); + let _backend_url_guard = EnvVarGuard::unset("BACKEND_URL"); + let _vite_backend_guard = EnvVarGuard::unset("VITE_BACKEND_URL"); + let _tier_guard = EnvVarGuard::unset("OPENHUMAN_LOCAL_AI_TIER"); + let _lm_env_guard = EnvVarGuard::unset("OPENHUMAN_LM_STUDIO_BASE_URL"); + let _lm_alias_env_guard = EnvVarGuard::unset("LM_STUDIO_BASE_URL"); + + let (mock_addr, mock_join) = serve_on_ephemeral(mock_upstream_router()).await; + let mock_origin = format!("http://{}", mock_addr); + write_min_config(&openhuman_home, &mock_origin); + + let lm_app = Router::new() + .route( + "/v1/models", + get(|| async { + Json(json!({ + "object": "list", + "data": [ + { "id": "local-model", "object": "model", "owned_by": "lm-studio" } + ] + })) + }), + ) + .route( + "/v1/chat/completions", + post(|Json(_body): Json| async move { + Json(json!({ + "id": "chatcmpl-inference-e2e", + "object": "chat.completion", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": "hello from inference namespace" + }, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 7, + "completion_tokens": 5, + "total_tokens": 12 + } + })) + }), + ); + let (lm_addr, lm_join) = serve_on_ephemeral(lm_app).await; + let lm_base = format!("http://{lm_addr}/v1"); + + let (rpc_addr, rpc_join) = serve_on_ephemeral(build_core_http_router(false)).await; + let rpc_base = format!("http://{}", rpc_addr); + tokio::time::sleep(Duration::from_millis(100)).await; + + let update = post_json_rpc( + &rpc_base, + 360, + "openhuman.inference_update_local_settings", + json!({ + "runtime_enabled": true, + "opt_in_confirmed": true, + "provider": "lm_studio", + "base_url": lm_base, + "model_id": "local-model", + "chat_model_id": "local-model" + }), + ) + .await; + assert_no_jsonrpc_error(&update, "update_local_ai_settings for inference namespace"); + + let status = post_json_rpc(&rpc_base, 361, "openhuman.inference_status", json!({})).await; + let status_result = assert_no_jsonrpc_error(&status, "inference_status"); + let status_payload = status_result.get("result").unwrap_or(status_result); + assert_eq!( + status_payload.get("provider").and_then(Value::as_str), + Some("lm_studio") + ); + + let prompt = post_json_rpc( + &rpc_base, + 362, + "openhuman.inference_prompt", + json!({ + "prompt": "hello", + "max_tokens": 16, + "no_think": true + }), + ) + .await; + let prompt_result = assert_no_jsonrpc_error(&prompt, "inference_prompt"); + assert_eq!( + extract_string_outcome(prompt_result), + "hello from inference namespace" + ); + + let summarize = post_json_rpc( + &rpc_base, + 363, + "openhuman.inference_summarize", + json!({ + "text": "summarize me", + "max_tokens": 16 + }), + ) + .await; + let summarize_result = assert_no_jsonrpc_error(&summarize, "inference_summarize"); + assert_eq!( + extract_string_outcome(summarize_result), + "hello from inference namespace" + ); + + // openhuman.inference_update_model_settings — mutate `default_model` + // through the RPC transport so a controller-registration or param-shape + // regression surfaces here instead of in the settings-save UI flow. + // (We assert on `default_model` because that field is exposed by + // `inference_get_client_config`; `default_temperature` is not.) + let model_update = post_json_rpc( + &rpc_base, + 366, + "openhuman.inference_update_model_settings", + json!({ "default_model": "e2e-updated-model" }), + ) + .await; + assert_no_jsonrpc_error(&model_update, "inference_update_model_settings"); + let client_cfg = post_json_rpc( + &rpc_base, + 367, + "openhuman.inference_get_client_config", + json!({}), + ) + .await; + let client_cfg_result = assert_no_jsonrpc_error(&client_cfg, "inference_get_client_config"); + let updated_model = client_cfg_result + .pointer("/result/default_model") + .or_else(|| client_cfg_result.get("default_model")) + .and_then(Value::as_str); + assert_eq!( + updated_model, + Some("e2e-updated-model"), + "inference_get_client_config did not reflect updated default_model: {client_cfg_result}" + ); + + // openhuman.inference_list_models — no cloud provider configured for this + // local-only test, so we expect a structured error rather than a panic. + // Asserting an error here proves the controller is registered and reaches + // its handler over the RPC transport (the empty-picker symptom CodeRabbit + // flagged would surface as a controller-not-found error instead). + let list_models = post_json_rpc( + &rpc_base, + 368, + "openhuman.inference_list_models", + json!({ "provider_id": "does-not-exist" }), + ) + .await; + let _ = assert_jsonrpc_error( + &list_models, + "inference_list_models with unknown provider id", + ); + + lm_join.abort(); + mock_join.abort(); + rpc_join.abort(); +} + +#[tokio::test] +async fn json_rpc_inference_prompt_requires_external_ollama_runtime_when_unreachable() { + let _env_lock = json_rpc_e2e_env_lock(); + let tmp = tempdir().expect("tempdir"); + let home = tmp.path(); + let openhuman_home = home.join(".openhuman"); + + let _home_guard = EnvVarGuard::set_to_path("HOME", home); + let _workspace_guard = EnvVarGuard::unset("OPENHUMAN_WORKSPACE"); + let _backend_url_guard = EnvVarGuard::unset("BACKEND_URL"); + let _vite_backend_guard = EnvVarGuard::unset("VITE_BACKEND_URL"); + let _tier_guard = EnvVarGuard::unset("OPENHUMAN_LOCAL_AI_TIER"); + let _ollama_url_guard = EnvVarGuard::set("OPENHUMAN_OLLAMA_BASE_URL", "http://127.0.0.1:1"); + + let (mock_addr, mock_join) = serve_on_ephemeral(mock_upstream_router()).await; + let mock_origin = format!("http://{}", mock_addr); + write_min_config(&openhuman_home, &mock_origin); + + let (rpc_addr, rpc_join) = serve_on_ephemeral(build_core_http_router(false)).await; + let rpc_base = format!("http://{}", rpc_addr); + tokio::time::sleep(Duration::from_millis(100)).await; + + let update = post_json_rpc( + &rpc_base, + 364, + "openhuman.inference_update_local_settings", + json!({ + "runtime_enabled": true, + "opt_in_confirmed": true, + "provider": "ollama", + "model_id": "gemma3:1b-it-qat", + "chat_model_id": "gemma3:1b-it-qat" + }), + ) + .await; + assert_no_jsonrpc_error(&update, "update_local_ai_settings for unreachable ollama"); + + let prompt = post_json_rpc( + &rpc_base, + 365, + "openhuman.inference_prompt", + json!({ + "prompt": "hello", + "max_tokens": 16, + "no_think": true + }), + ) + .await; + let prompt_err = assert_jsonrpc_error(&prompt, "inference_prompt unreachable ollama"); + let prompt_err_message = prompt_err + .get("message") + .and_then(Value::as_str) + .unwrap_or_default(); + assert!( + prompt_err_message.contains("routes inference through an external Ollama endpoint"), + "unexpected error: {prompt_err}" + ); + + mock_join.abort(); + rpc_join.abort(); +} + // ── Billing & Team E2E tests ────────────────────────────────────────────────── /// End-to-end test for billing RPC methods.