From 292dcc6a5ddecd0c02767d485c052ba5c2639fed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=86=A0=E8=BE=B0?= Date: Fri, 29 May 2026 20:05:41 +0800 Subject: [PATCH 1/2] fix(local-ai): unblock summary diagnostics --- app/src/utils/tauriCommands/localAi.ts | 1 + src/openhuman/inference/local/ops.rs | 2 +- .../inference/local/service/ollama_admin.rs | 30 ++++++-- .../local/service/ollama_admin_tests.rs | 76 +++++++++++++++++++ .../inference/local/service/public_infer.rs | 28 ++++++- .../local/service/public_infer_tests.rs | 41 ++++++++++ 6 files changed, 167 insertions(+), 11 deletions(-) diff --git a/app/src/utils/tauriCommands/localAi.ts b/app/src/utils/tauriCommands/localAi.ts index 7c2ecdca05..b4711adf44 100644 --- a/app/src/utils/tauriCommands/localAi.ts +++ b/app/src/utils/tauriCommands/localAi.ts @@ -194,6 +194,7 @@ export interface InstalledModelInfo { export interface LocalAiDiagnostics { ollama_running: boolean; + ollama_runner_ok?: boolean; ollama_base_url: string; ollama_binary_path: string | null; vision_mode?: string; diff --git a/src/openhuman/inference/local/ops.rs b/src/openhuman/inference/local/ops.rs index efecec0601..7320bc4c51 100644 --- a/src/openhuman/inference/local/ops.rs +++ b/src/openhuman/inference/local/ops.rs @@ -200,7 +200,7 @@ pub async fn local_ai_summarize( service.bootstrap(config).await; } let summary = service - .summarize(config, text, max_tokens) + .summarize_interactive(config, text, max_tokens) .await .map_err(|e| e.to_string())?; Ok(RpcOutcome::single_log( diff --git a/src/openhuman/inference/local/service/ollama_admin.rs b/src/openhuman/inference/local/service/ollama_admin.rs index 5fd83e84cd..7d0089c953 100644 --- a/src/openhuman/inference/local/service/ollama_admin.rs +++ b/src/openhuman/inference/local/service/ollama_admin.rs @@ -848,6 +848,11 @@ impl LocalAiService { let base_url = ollama_base_url_from_config(config); let healthy = self.ollama_healthy_at(&base_url).await; + let runner_ok = if healthy { + self.ollama_runner_ok_at(&base_url).await + } else { + false + }; log::debug!( "[local_ai] diagnostics: entry base_url={} healthy={}", @@ -884,11 +889,15 @@ impl LocalAiService { // `/api/show` is one bounded round-trip per installed model, // fetched concurrently and only on this diagnostics path. let model_eligibilities: Vec = if healthy { - futures_util::future::join_all(models.iter().map(|m| self.fetch_model_context(&m.name))) - .await - .into_iter() - .map(evaluate_context) - .collect() + futures_util::future::join_all( + models + .iter() + .map(|m| self.fetch_model_context_at(&base_url, &m.name)), + ) + .await + .into_iter() + .map(evaluate_context) + .collect() } else { Vec::new() }; @@ -941,6 +950,12 @@ impl LocalAiService { base_url )); } + if healthy && !runner_ok { + issues.push( + "Configured Ollama runtime is reachable but cannot execute models. Restart the external runtime and retry." + .to_string(), + ); + } if healthy && !chat_found { issues.push(format!("Chat model `{}` is not installed", expected_chat)); } @@ -1002,6 +1017,7 @@ impl LocalAiService { Ok(serde_json::json!({ "ollama_running": healthy, + "ollama_runner_ok": runner_ok, "ollama_base_url": base_url, "ollama_binary_path": binary_path, "installed_models": installed_models, @@ -1112,8 +1128,8 @@ impl LocalAiService { /// the metadata key is absent) — the caller maps that to an `Unknown` /// eligibility verdict rather than a hard rejection. One bounded HTTP /// round-trip per model; only ever invoked from the diagnostics path. - async fn fetch_model_context(&self, model: &str) -> Option { - let url = format!("{}/api/show", ollama_base_url()); + async fn fetch_model_context_at(&self, base_url: &str, model: &str) -> Option { + let url = format!("{}/api/show", base_url.trim_end_matches('/')); let resp = self .http .post(&url) diff --git a/src/openhuman/inference/local/service/ollama_admin_tests.rs b/src/openhuman/inference/local/service/ollama_admin_tests.rs index b1eb3ab744..1e11b500c2 100644 --- a/src/openhuman/inference/local/service/ollama_admin_tests.rs +++ b/src/openhuman/inference/local/service/ollama_admin_tests.rs @@ -437,6 +437,82 @@ async fn diagnostics_ok_when_expected_models_are_present() { } } +#[tokio::test] +async fn diagnostics_reports_broken_runner_even_when_models_are_present() { + let _guard = crate::openhuman::inference::inference_test_guard(); + + let config = Config::default(); + let chat = crate::openhuman::inference::model_ids::effective_chat_model_id(&config); + let embedding = crate::openhuman::inference::model_ids::effective_embedding_model_id(&config); + let chat_tag = format!("{}:latest", chat); + let embed_tag = format!("{}:latest", embedding); + let app = Router::new() + .route( + "/api/tags", + get(move || { + let chat_tag = chat_tag.clone(); + let embed_tag = embed_tag.clone(); + async move { + Json(json!({ + "models": [ + { "name": chat_tag, "modified_at": "", "size": 1u64, "digest": "d" }, + { "name": embed_tag, "modified_at": "", "size": 2u64, "digest": "e" }, + ] + })) + } + }), + ) + .route( + "/api/show", + axum::routing::post(|Json(body): Json| async move { + let model = body["name"] + .as_str() + .or_else(|| body["model"].as_str()) + .unwrap_or_default(); + if model == "___nonexistent_probe___" { + return ( + axum::http::StatusCode::INTERNAL_SERVER_ERROR, + "fork/exec /broken/ollama: no such file or directory".to_string(), + ); + } + ( + axum::http::StatusCode::OK, + json!({ + "model_info": { + "general.architecture": "bert", + "bert.context_length": 8192, + }, + "capabilities": ["embedding"], + }) + .to_string(), + ) + }), + ); + let base = spawn_mock(app).await; + unsafe { + std::env::set_var("OPENHUMAN_OLLAMA_BASE_URL", &base); + } + + let service = LocalAiService::new(&config); + let diag = service.diagnostics(&config).await.expect("diagnostics"); + + unsafe { + std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL"); + } + + assert_eq!(diag["ollama_running"], true); + assert_eq!(diag["ok"], false); + let issues = diag["issues"].as_array().cloned().unwrap_or_default(); + assert!( + issues.iter().any(|issue| issue + .as_str() + .unwrap_or_default() + .contains("cannot execute models")), + "diagnostics should report the broken Ollama runner, got: {:?}", + issues + ); +} + #[tokio::test] async fn resolve_binary_path_finds_binary_via_ollama_bin_env() { let _guard = crate::openhuman::inference::inference_test_guard(); diff --git a/src/openhuman/inference/local/service/public_infer.rs b/src/openhuman/inference/local/service/public_infer.rs index c2cf6243f4..5b3305a352 100644 --- a/src/openhuman/inference/local/service/public_infer.rs +++ b/src/openhuman/inference/local/service/public_infer.rs @@ -44,6 +44,25 @@ impl LocalAiService { .await } + pub async fn summarize_interactive( + &self, + config: &Config, + text: &str, + max_tokens: Option, + ) -> Result { + log::trace!("[local_ai] summarize_interactive bypasses scheduler_gate permit"); + if !config.local_ai.runtime_enabled { + return Err("local ai is disabled".to_string()); + } + let system = "You summarize internal assistant context. Keep concise bullet points."; + let prompt = format!( + "Summarize this text in concise bullet points. Preserve decisions and commitments.\\n\\n{}", + text + ); + self.inference_interactive(config, system, &prompt, max_tokens.or(Some(128)), true) + .await + } + pub async fn prompt( &self, config: &Config, @@ -114,7 +133,8 @@ impl LocalAiService { /// turn against it than show stale or empty completions for the /// duration of the backfill. /// - /// Along with [`Self::prompt_interactive`] and + /// Along with [`Self::prompt_interactive`], + /// [`Self::summarize_interactive`], and /// [`Self::chat_with_history_interactive`], this is one of the paths /// inside [`LocalAiService`] that opts out of the gate. Every other /// entry point (`inference`, `prompt`, `summarize`, @@ -397,13 +417,15 @@ impl LocalAiService { /// the scheduler gate's LLM permit**. /// /// Used by user-arrival paths where the user is staring at the - /// output (push-to-talk dictation cleanup, in particular). If we + /// output (push-to-talk dictation cleanup and debug summary tests, in + /// particular). If we /// queue these behind a long-running memory backfill, the user /// experiences a frozen UI; better to race the call against /// background work and accept the contention than to silently /// degrade interactivity. /// - /// Sibling to [`Self::inline_complete_interactive`] for autocomplete. + /// Sibling to [`Self::inline_complete_interactive`] for autocomplete and + /// [`Self::summarize_interactive`] for explicit debug summary requests. /// Every other entry point (`inference`, `prompt`, `summarize`, /// `inline_complete`, `vision_prompt`, `embed`, `chat_with_history`) /// remains gated. diff --git a/src/openhuman/inference/local/service/public_infer_tests.rs b/src/openhuman/inference/local/service/public_infer_tests.rs index f9e9fa6b63..0fa8539387 100644 --- a/src/openhuman/inference/local/service/public_infer_tests.rs +++ b/src/openhuman/inference/local/service/public_infer_tests.rs @@ -351,6 +351,47 @@ async fn prompt_interactive_does_not_block_on_held_permit() { assert_eq!(reply, "hello from mock"); } +#[tokio::test] +async fn summarize_interactive_does_not_block_on_held_permit() { + let _guard = crate::openhuman::inference::inference_test_guard(); + + let _held = crate::openhuman::scheduler_gate::gate::try_acquire_llm_permit() + .expect("test must start with a free permit"); + + let app = Router::new().route( + "/api/generate", + post(|Json(_body): Json| async move { + Json(json!({ + "model": "test", + "response": "summary from mock", + "done": true + })) + }), + ); + let base = spawn_mock(app).await; + unsafe { + std::env::set_var("OPENHUMAN_OLLAMA_BASE_URL", &base); + } + + let config = enabled_config(); + let service = ready_service(&config); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(2), + service.summarize_interactive(&config, "text to summarize", Some(16)), + ) + .await; + + unsafe { + std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL"); + } + + let reply = result + .expect("interactive summary must not block on a held permit") + .expect("interactive summary response"); + assert_eq!(reply, "summary from mock"); +} + #[tokio::test] async fn chat_with_history_interactive_does_not_block_on_held_permit() { let _guard = crate::openhuman::inference::inference_test_guard(); From 1c3d50216785f902bbf3fff228ad4c41daece69d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=86=A0=E8=BE=B0?= Date: Fri, 29 May 2026 20:35:53 +0800 Subject: [PATCH 2/2] fix(local-ai): use real newlines in summary prompt --- src/openhuman/inference/local/service/public_infer.rs | 4 ++-- .../inference/local/service/public_infer_tests.rs | 11 ++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/openhuman/inference/local/service/public_infer.rs b/src/openhuman/inference/local/service/public_infer.rs index 5b3305a352..4b036d086f 100644 --- a/src/openhuman/inference/local/service/public_infer.rs +++ b/src/openhuman/inference/local/service/public_infer.rs @@ -37,7 +37,7 @@ impl LocalAiService { } let system = "You summarize internal assistant context. Keep concise bullet points."; let prompt = format!( - "Summarize this text in concise bullet points. Preserve decisions and commitments.\\n\\n{}", + "Summarize this text in concise bullet points. Preserve decisions and commitments.\n\n{}", text ); self.inference(config, system, &prompt, max_tokens.or(Some(128)), true) @@ -56,7 +56,7 @@ impl LocalAiService { } let system = "You summarize internal assistant context. Keep concise bullet points."; let prompt = format!( - "Summarize this text in concise bullet points. Preserve decisions and commitments.\\n\\n{}", + "Summarize this text in concise bullet points. Preserve decisions and commitments.\n\n{}", text ); self.inference_interactive(config, system, &prompt, max_tokens.or(Some(128)), true) diff --git a/src/openhuman/inference/local/service/public_infer_tests.rs b/src/openhuman/inference/local/service/public_infer_tests.rs index 0fa8539387..54c3c97d1e 100644 --- a/src/openhuman/inference/local/service/public_infer_tests.rs +++ b/src/openhuman/inference/local/service/public_infer_tests.rs @@ -360,7 +360,16 @@ async fn summarize_interactive_does_not_block_on_held_permit() { let app = Router::new().route( "/api/generate", - post(|Json(_body): Json| async move { + post(|Json(body): Json| async move { + let prompt = body["prompt"].as_str().unwrap_or_default(); + assert!( + prompt.contains("commitments.\n\ntext to summarize"), + "summary prompt should use real newlines, got: {prompt:?}" + ); + assert!( + !prompt.contains(r"commitments.\n\ntext to summarize"), + "summary prompt must not contain literal backslash-n separators" + ); Json(json!({ "model": "test", "response": "summary from mock",