From 6119d11905e128f943f7934099180d067f2eb0cb Mon Sep 17 00:00:00 2001 From: ScientificProgrammer <4348294+ScientificProgrammer@users.noreply.github.com> Date: Sun, 19 Apr 2026 11:15:25 -0500 Subject: [PATCH 1/2] fix(auto-recall): thread AbortSignal to cancel embedding on timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: the Promise.race-based timeout in the auto-recall before_prompt_build hook resolves the outer promise with undefined after AUTO_RECALL_TIMEOUT_MS, but it does not cancel the underlying embedding HTTP call. The underlying work keeps running, holding the per-agent memory-runtime mutex (and lancedb connection), so the next hook invocation serializes behind it. Combined with the gateway's session-write-lock max-hold of (timeoutMs + 120s grace, min 300s), a single slow embedding call can hold the session lock for several minutes before the watchdog force-releases — seen in production as "stuck session: state=processing age=219s queueDepth=0". Fix: - Create an AbortController in the hook and pass its signal into recallWork(signal). - recallWork threads the signal through retrieveWithRetry() into Retriever.retrieve() via a new RetrievalContext.signal field. - Retriever.retrieve() forwards the signal to the embedQuery() calls in both vectorOnlyRetrieval() and hybridRetrieval(). The embedder already accepts AbortSignal throughout (src/embedder.ts), so no downstream changes were needed. - When the setTimeout fires, the hook calls controller.abort() before resolving undefined. In-flight embedding fetch() requests terminate promptly; retriever.retrieve() rejects with AbortError which the retryWithRetry early-return handles (aborts skip the 75ms retry). - The catch branch downgrades AbortError to a debug log so we don't double-log the timeout warning the setTimeout path already emitted. Out of scope (follow-up): - Jina rerank HTTP calls (a separate code path in retriever.ts) should also accept the signal. Left for a focused follow-up since it touches the rerank client interface. - BM25 path does not call embedQuery and has no long-running HTTP call, so it does not need signal plumbing. Note: tsc --noEmit reports a pre-existing TS2353 error on retrieveWithRetry where callers pass `source: "auto-recall"` but the function's inline type did not include `source` (introduced by #535, not this PR). Non-blocking — tsx runtime transpile does not check types. Can be fixed with a one-line addition to the param type. Co-Authored-By: Claude Opus 4.7 (1M context) --- index.ts | 25 ++++++++++++++++++++++--- src/retriever.ts | 14 +++++++++++--- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/index.ts b/index.ts index 25b2012f..e55178f2 100644 --- a/index.ts +++ b/index.ts @@ -1942,10 +1942,12 @@ const memoryLanceDBProPlugin = { limit: number; scopeFilter?: string[]; category?: string; + signal?: AbortSignal; }) { let results = await retriever.retrieve(params); if (results.length === 0) { await sleep(75); + if (params.signal?.aborted) return results; results = await retriever.retrieve(params); } return results; @@ -2407,7 +2409,13 @@ const memoryLanceDBProPlugin = { // (embedding → rerank → lifecycle), which can silently drop messages on // channels like Telegram when subsequent requests hit lock timeouts. // See: https://github.com/CortexReach/memory-lancedb-pro/issues/253 - const recallWork = async (): Promise<{ prependContext: string } | undefined> => { + // + // The timeout also aborts the in-flight embedding HTTP call via the + // abortController below, so the underlying work doesn't continue to hold + // resources (lancedb connection, per-agent memory-runtime mutex) past + // the timeout boundary. + const abortController = new AbortController(); + const recallWork = async (signal: AbortSignal): Promise<{ prependContext: string } | undefined> => { // Determine agent ID and accessible scopes const agentId = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey); const accessibleScopes = resolveScopeFilter(scopeManager, agentId); @@ -2450,6 +2458,7 @@ const memoryLanceDBProPlugin = { limit: retrieveLimit, scopeFilter: accessibleScopes, source: "auto-recall", + signal, }), config.workspaceBoundary); if (results.length === 0) { @@ -2690,9 +2699,12 @@ const memoryLanceDBProPlugin = { let timeoutId: ReturnType | undefined; try { const result = await Promise.race([ - recallWork().then((r) => { clearTimeout(timeoutId); return r; }), + recallWork(abortController.signal).then((r) => { clearTimeout(timeoutId); return r; }), new Promise((resolve) => { timeoutId = setTimeout(() => { + // Cancel in-flight embedding/retrieval HTTP calls so they don't + // keep holding resources after we've given up on the result. + abortController.abort(new Error("auto-recall timeout")); api.logger.warn( `memory-lancedb-pro: auto-recall timed out after ${AUTO_RECALL_TIMEOUT_MS}ms; skipping memory injection to avoid stalling agent startup`, ); @@ -2703,7 +2715,14 @@ const memoryLanceDBProPlugin = { return result; } catch (err) { clearTimeout(timeoutId); - api.logger.warn(`memory-lancedb-pro: recall failed: ${String(err)}`); + // Downgrade AbortError to debug — the timeout path already emitted + // a warn log, and the race has already resolved undefined. + const isAbort = err instanceof Error && (err.name === "AbortError" || /abort/i.test(err.message)); + if (isAbort) { + api.logger.debug?.(`memory-lancedb-pro: recall aborted: ${String(err)}`); + } else { + api.logger.warn(`memory-lancedb-pro: recall failed: ${String(err)}`); + } } }, { priority: 10 }); diff --git a/src/retriever.ts b/src/retriever.ts index 97837888..35b70bac 100644 --- a/src/retriever.ts +++ b/src/retriever.ts @@ -105,6 +105,10 @@ export interface RetrievalContext { category?: string; /** Retrieval source: "manual" for user-triggered, "auto-recall" for system-initiated, "cli" for CLI commands. */ source?: "manual" | "auto-recall" | "cli"; + /** Optional AbortSignal. When aborted, in-flight embedding calls cancel and + * the method rejects with AbortError instead of holding the caller's session + * lock while the underlying HTTP request runs to completion. */ + signal?: AbortSignal; } export interface RetrievalResult extends MemorySearchResult { @@ -559,7 +563,7 @@ export class MemoryRetriever { } async retrieve(context: RetrievalContext): Promise { - const { query, limit, scopeFilter, category, source } = context; + const { query, limit, scopeFilter, category, source, signal } = context; const safeLimit = clampInt(limit, 1, 20); this.lastDiagnostics = null; const diagnostics: RetrievalDiagnostics = { @@ -615,6 +619,7 @@ export class MemoryRetriever { category, trace, diagnostics, + signal, ); } else { results = await this.hybridRetrieval( @@ -625,6 +630,7 @@ export class MemoryRetriever { trace, source, diagnostics, + signal, ); } @@ -717,11 +723,12 @@ export class MemoryRetriever { category?: string, trace?: TraceCollector, diagnostics?: RetrievalDiagnostics, + signal?: AbortSignal, ): Promise { let failureStage: RetrievalDiagnostics["failureStage"] = "vector.embedQuery"; try { const candidatePoolSize = Math.max(this.config.candidatePoolSize, limit * 2); - const queryVector = await this.embedder.embedQuery(query); + const queryVector = await this.embedder.embedQuery(query, signal); failureStage = "vector.vectorSearch"; const results = await this.store.vectorSearch( queryVector, @@ -907,11 +914,12 @@ export class MemoryRetriever { trace?: TraceCollector, source?: RetrievalContext["source"], diagnostics?: RetrievalDiagnostics, + signal?: AbortSignal, ): Promise { let failureStage: RetrievalDiagnostics["failureStage"] = "hybrid.embedQuery"; try { const candidatePoolSize = Math.max(this.config.candidatePoolSize, limit * 2); - const queryVector = await this.embedder.embedQuery(query); + const queryVector = await this.embedder.embedQuery(query, signal); const bm25Query = this.buildBM25Query(query, source); if (diagnostics) { diagnostics.bm25Query = bm25Query; From 373e7f26c1aed5992c487f32d9a9fcbb5149937b Mon Sep 17 00:00:00 2001 From: ScientificProgrammer <4348294+ScientificProgrammer@users.noreply.github.com> Date: Mon, 20 Apr 2026 01:44:22 -0500 Subject: [PATCH 2/2] fix(auto-recall): narrow abort classification to our controller Address Copilot P2 feedback on PR #668. The catch block used `err.name === "AbortError" || /abort/i.test(err.message)` to decide whether to downgrade recall errors to debug. That was too broad: if the embedder's own internal timeout AbortController fired (src/embedder.ts withTimeout), its AbortError would propagate up and match the same pattern, silencing a real infrastructure failure. Key the debug-downgrade to `abortController.signal.aborted` instead. That flag is owned by this hook's own controller and is true only when our timeout callback fired. Aborts from any other controller (embedder, SDK client, upstream cancellation) now stay at warn. Also tighten the `RetrievalContext.signal` docstring: the embedder's withTimeout rejects with `externalSignal.reason ?? new Error("aborted")` when the signal is already aborted, which can be a plain Error rather than an AbortError. "rejects due to abort (often with AbortError or the signal's abort reason)" matches observed behavior. Co-Authored-By: Claude Opus 4.7 (1M context) --- index.ts | 11 ++++++----- src/retriever.ts | 5 +++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/index.ts b/index.ts index e55178f2..b9b73298 100644 --- a/index.ts +++ b/index.ts @@ -2715,11 +2715,12 @@ const memoryLanceDBProPlugin = { return result; } catch (err) { clearTimeout(timeoutId); - // Downgrade AbortError to debug — the timeout path already emitted - // a warn log, and the race has already resolved undefined. - const isAbort = err instanceof Error && (err.name === "AbortError" || /abort/i.test(err.message)); - if (isAbort) { - api.logger.debug?.(`memory-lancedb-pro: recall aborted: ${String(err)}`); + // Downgrade to debug only when OUR controller aborted (i.e. the + // timeout callback fired). Aborts originating elsewhere — e.g. the + // embedder's own internal timeout — keep warn visibility so real + // failures aren't silenced. + if (abortController.signal.aborted) { + api.logger.debug?.(`memory-lancedb-pro: recall aborted by timeout: ${String(err)}`); } else { api.logger.warn(`memory-lancedb-pro: recall failed: ${String(err)}`); } diff --git a/src/retriever.ts b/src/retriever.ts index 35b70bac..3c465070 100644 --- a/src/retriever.ts +++ b/src/retriever.ts @@ -106,8 +106,9 @@ export interface RetrievalContext { /** Retrieval source: "manual" for user-triggered, "auto-recall" for system-initiated, "cli" for CLI commands. */ source?: "manual" | "auto-recall" | "cli"; /** Optional AbortSignal. When aborted, in-flight embedding calls cancel and - * the method rejects with AbortError instead of holding the caller's session - * lock while the underlying HTTP request runs to completion. */ + * the method rejects due to abort (often with AbortError or the signal's + * abort reason) instead of holding the caller's session lock while the + * underlying HTTP request runs to completion. */ signal?: AbortSignal; }