From d84467781aefafbf436363933d43a68940f0df75 Mon Sep 17 00:00:00 2001 From: Joel Teply Date: Sat, 25 Apr 2026 13:26:01 -0500 Subject: [PATCH] fix(vision): bounded wait for in-flight VDS description on first message (#970 stage 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PersonaResponseGenerator only attached the VDS description to the IPC signal when descriptionStatus(base64) === 'cached'. On the first message with a fresh image, pre-warm started at chat-send time is still 'inflight' when the persona reaches this code path — so description was undefined, the Rust signal carried { ..., description: undefined }, and text-only personas had no marker to read. Empirical hit on PR #950: CodeReview AI confidently said "absence of images or attachments" when an image WAS attached. Fix: also wait when status is 'inflight'. VDS already deduplicates in-flight requests, so the await piggybacks on the existing pre-warm — no extra inference cost. 8s ceiling protects against a stuck pre-warm (LLaVA on CPU is the slow case at 60-70s; the timeout is the safety valve, not the expected path). Stage 2 (Rust-side marker injection for the description=None case) still tracked in #970 — when description IS undefined despite the bounded wait, the Rust signal-to-ContentPart conversion should emit "[Attached image: vision description unavailable]" instead of silently dropping the image, so the persona knows an image exists rather than fabricating absence. That's a Rust change, separate scope. Validated locally: npm run build:ts → clean. Refs #970. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../modules/PersonaResponseGenerator.ts | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/system/user/server/modules/PersonaResponseGenerator.ts b/src/system/user/server/modules/PersonaResponseGenerator.ts index 03f3a8880..ad8b1f082 100644 --- a/src/system/user/server/modules/PersonaResponseGenerator.ts +++ b/src/system/user/server/modules/PersonaResponseGenerator.ts @@ -373,16 +373,33 @@ export class PersonaResponseGenerator { if (!base64) { return null; // Nothing to send to the model } - // Pull cached description (populated by prewarmVisionDescriptions - // at chat-send time). Cache hit takes ~0ms; miss returns - // undefined — text-only personas downstream get a "no - // description available" marker instead of fabricating. + // Pull description from VDS — populated by prewarmVisionDescriptions + // at chat-send time. Two states are valid waits: + // 'cached' → ~0ms instant lookup (pre-warm finished). + // 'inflight' → bounded wait. Pre-warm started but hasn't + // resolved yet; we'd rather wait up to 8s than + // hand the persona an empty description and + // let it hallucinate "I don't see any image." + // VDS already deduplicates inflight requests, so + // this await piggybacks on the existing call — + // no extra inference cost. + // Status `none` / `error` → don't trigger a blocking describe + // here; the chat-send path is responsible for prewarming. Stage + // 2 (Rust-side) is responsible for emitting an [Attached image: + // unavailable] marker when description ends up undefined, so a + // text-only persona at least KNOWS an image was attached + // instead of fabricating absence. Tracked in #970. let description: string | undefined; if (m.type === 'image') { try { const visionSvc = VisionDescriptionService.getInstance(); - if (visionSvc.descriptionStatus(base64) === 'cached') { - const desc = await visionSvc.describeBase64(base64, m.mimeType ?? 'image/png', { maxLength: 200 }); + const status = visionSvc.descriptionStatus(base64); + if (status === 'cached' || status === 'inflight') { + const VDS_WAIT_MS = 8000; + const desc = await Promise.race([ + visionSvc.describeBase64(base64, m.mimeType ?? 'image/png', { maxLength: 200 }), + new Promise((resolve) => setTimeout(() => resolve(null), VDS_WAIT_MS)), + ]); description = desc?.description; } } catch {