Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions app/src/utils/tauriCommands/localAi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ export interface InstalledModelInfo {

export interface LocalAiDiagnostics {
ollama_running: boolean;
ollama_runner_ok?: boolean;
ollama_base_url: string;
ollama_binary_path: string | null;
vision_mode?: string;
Expand Down
2 changes: 1 addition & 1 deletion src/openhuman/inference/local/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ pub async fn local_ai_summarize(
service.bootstrap(config).await;
}
let summary = service
.summarize(config, text, max_tokens)
.summarize_interactive(config, text, max_tokens)
.await
.map_err(|e| e.to_string())?;
Ok(RpcOutcome::single_log(
Expand Down
30 changes: 23 additions & 7 deletions src/openhuman/inference/local/service/ollama_admin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,11 @@ impl LocalAiService {

let base_url = ollama_base_url_from_config(config);
let healthy = self.ollama_healthy_at(&base_url).await;
let runner_ok = if healthy {
self.ollama_runner_ok_at(&base_url).await
} else {
false
};

log::debug!(
"[local_ai] diagnostics: entry base_url={} healthy={}",
Expand Down Expand Up @@ -884,11 +889,15 @@ impl LocalAiService {
// `/api/show` is one bounded round-trip per installed model,
// fetched concurrently and only on this diagnostics path.
let model_eligibilities: Vec<ContextEligibility> = if healthy {
futures_util::future::join_all(models.iter().map(|m| self.fetch_model_context(&m.name)))
.await
.into_iter()
.map(evaluate_context)
.collect()
futures_util::future::join_all(
models
.iter()
.map(|m| self.fetch_model_context_at(&base_url, &m.name)),
)
.await
.into_iter()
.map(evaluate_context)
.collect()
} else {
Vec::new()
};
Expand Down Expand Up @@ -941,6 +950,12 @@ impl LocalAiService {
base_url
));
}
if healthy && !runner_ok {
issues.push(
"Configured Ollama runtime is reachable but cannot execute models. Restart the external runtime and retry."
.to_string(),
);
}
if healthy && !chat_found {
issues.push(format!("Chat model `{}` is not installed", expected_chat));
}
Expand Down Expand Up @@ -1002,6 +1017,7 @@ impl LocalAiService {

Ok(serde_json::json!({
"ollama_running": healthy,
"ollama_runner_ok": runner_ok,
"ollama_base_url": base_url,
"ollama_binary_path": binary_path,
"installed_models": installed_models,
Expand Down Expand Up @@ -1112,8 +1128,8 @@ impl LocalAiService {
/// the metadata key is absent) — the caller maps that to an `Unknown`
/// eligibility verdict rather than a hard rejection. One bounded HTTP
/// round-trip per model; only ever invoked from the diagnostics path.
async fn fetch_model_context(&self, model: &str) -> Option<u64> {
let url = format!("{}/api/show", ollama_base_url());
async fn fetch_model_context_at(&self, base_url: &str, model: &str) -> Option<u64> {
let url = format!("{}/api/show", base_url.trim_end_matches('/'));
let resp = self
.http
.post(&url)
Expand Down
76 changes: 76 additions & 0 deletions src/openhuman/inference/local/service/ollama_admin_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,82 @@ async fn diagnostics_ok_when_expected_models_are_present() {
}
}

#[tokio::test]
async fn diagnostics_reports_broken_runner_even_when_models_are_present() {
let _guard = crate::openhuman::inference::inference_test_guard();

let config = Config::default();
let chat = crate::openhuman::inference::model_ids::effective_chat_model_id(&config);
let embedding = crate::openhuman::inference::model_ids::effective_embedding_model_id(&config);
let chat_tag = format!("{}:latest", chat);
let embed_tag = format!("{}:latest", embedding);
let app = Router::new()
.route(
"/api/tags",
get(move || {
let chat_tag = chat_tag.clone();
let embed_tag = embed_tag.clone();
async move {
Json(json!({
"models": [
{ "name": chat_tag, "modified_at": "", "size": 1u64, "digest": "d" },
{ "name": embed_tag, "modified_at": "", "size": 2u64, "digest": "e" },
]
}))
}
}),
)
.route(
"/api/show",
axum::routing::post(|Json(body): Json<serde_json::Value>| async move {
let model = body["name"]
.as_str()
.or_else(|| body["model"].as_str())
.unwrap_or_default();
if model == "___nonexistent_probe___" {
return (
axum::http::StatusCode::INTERNAL_SERVER_ERROR,
"fork/exec /broken/ollama: no such file or directory".to_string(),
);
}
(
axum::http::StatusCode::OK,
json!({
"model_info": {
"general.architecture": "bert",
"bert.context_length": 8192,
},
"capabilities": ["embedding"],
})
.to_string(),
)
}),
);
let base = spawn_mock(app).await;
unsafe {
std::env::set_var("OPENHUMAN_OLLAMA_BASE_URL", &base);
}

let service = LocalAiService::new(&config);
let diag = service.diagnostics(&config).await.expect("diagnostics");

unsafe {
std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL");
}

assert_eq!(diag["ollama_running"], true);
assert_eq!(diag["ok"], false);
let issues = diag["issues"].as_array().cloned().unwrap_or_default();
assert!(
issues.iter().any(|issue| issue
.as_str()
.unwrap_or_default()
.contains("cannot execute models")),
"diagnostics should report the broken Ollama runner, got: {:?}",
issues
);
}

#[tokio::test]
async fn resolve_binary_path_finds_binary_via_ollama_bin_env() {
let _guard = crate::openhuman::inference::inference_test_guard();
Expand Down
30 changes: 26 additions & 4 deletions src/openhuman/inference/local/service/public_infer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,32 @@ impl LocalAiService {
}
let system = "You summarize internal assistant context. Keep concise bullet points.";
let prompt = format!(
"Summarize this text in concise bullet points. Preserve decisions and commitments.\\n\\n{}",
"Summarize this text in concise bullet points. Preserve decisions and commitments.\n\n{}",
text
);
self.inference(config, system, &prompt, max_tokens.or(Some(128)), true)
.await
}

pub async fn summarize_interactive(
&self,
config: &Config,
text: &str,
max_tokens: Option<u32>,
) -> Result<String, String> {
log::trace!("[local_ai] summarize_interactive bypasses scheduler_gate permit");
if !config.local_ai.runtime_enabled {
return Err("local ai is disabled".to_string());
}
let system = "You summarize internal assistant context. Keep concise bullet points.";
let prompt = format!(
Comment thread
YOMXXX marked this conversation as resolved.
"Summarize this text in concise bullet points. Preserve decisions and commitments.\n\n{}",
text
);
self.inference_interactive(config, system, &prompt, max_tokens.or(Some(128)), true)
.await
}

pub async fn prompt(
&self,
config: &Config,
Expand Down Expand Up @@ -114,7 +133,8 @@ impl LocalAiService {
/// turn against it than show stale or empty completions for the
/// duration of the backfill.
///
/// Along with [`Self::prompt_interactive`] and
/// Along with [`Self::prompt_interactive`],
/// [`Self::summarize_interactive`], and
/// [`Self::chat_with_history_interactive`], this is one of the paths
/// inside [`LocalAiService`] that opts out of the gate. Every other
/// entry point (`inference`, `prompt`, `summarize`,
Expand Down Expand Up @@ -397,13 +417,15 @@ impl LocalAiService {
/// the scheduler gate's LLM permit**.
///
/// Used by user-arrival paths where the user is staring at the
/// output (push-to-talk dictation cleanup, in particular). If we
/// output (push-to-talk dictation cleanup and debug summary tests, in
/// particular). If we
/// queue these behind a long-running memory backfill, the user
/// experiences a frozen UI; better to race the call against
/// background work and accept the contention than to silently
/// degrade interactivity.
///
/// Sibling to [`Self::inline_complete_interactive`] for autocomplete.
/// Sibling to [`Self::inline_complete_interactive`] for autocomplete and
/// [`Self::summarize_interactive`] for explicit debug summary requests.
/// Every other entry point (`inference`, `prompt`, `summarize`,
/// `inline_complete`, `vision_prompt`, `embed`, `chat_with_history`)
/// remains gated.
Expand Down
50 changes: 50 additions & 0 deletions src/openhuman/inference/local/service/public_infer_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,56 @@ async fn prompt_interactive_does_not_block_on_held_permit() {
assert_eq!(reply, "hello from mock");
}

#[tokio::test]
async fn summarize_interactive_does_not_block_on_held_permit() {
let _guard = crate::openhuman::inference::inference_test_guard();

let _held = crate::openhuman::scheduler_gate::gate::try_acquire_llm_permit()
.expect("test must start with a free permit");

let app = Router::new().route(
"/api/generate",
post(|Json(body): Json<serde_json::Value>| async move {
let prompt = body["prompt"].as_str().unwrap_or_default();
assert!(
prompt.contains("commitments.\n\ntext to summarize"),
"summary prompt should use real newlines, got: {prompt:?}"
);
assert!(
!prompt.contains(r"commitments.\n\ntext to summarize"),
"summary prompt must not contain literal backslash-n separators"
);
Json(json!({
"model": "test",
"response": "summary from mock",
"done": true
}))
}),
);
let base = spawn_mock(app).await;
unsafe {
std::env::set_var("OPENHUMAN_OLLAMA_BASE_URL", &base);
}

let config = enabled_config();
let service = ready_service(&config);

let result = tokio::time::timeout(
std::time::Duration::from_secs(2),
service.summarize_interactive(&config, "text to summarize", Some(16)),
)
.await;

unsafe {
std::env::remove_var("OPENHUMAN_OLLAMA_BASE_URL");
}

let reply = result
.expect("interactive summary must not block on a held permit")
.expect("interactive summary response");
assert_eq!(reply, "summary from mock");
}

#[tokio::test]
async fn chat_with_history_interactive_does_not_block_on_held_permit() {
let _guard = crate::openhuman::inference::inference_test_guard();
Expand Down
Loading