From 2c00611d74f8d51ca7fe232022f0909e785b05eb Mon Sep 17 00:00:00 2001 From: Jeff Dafoe Date: Thu, 11 Jun 2026 16:30:16 -0400 Subject: [PATCH] ZBBS-WORK-404: answer rate-limited wait-calls with 429 RATE_LIMITED instead of 200 reply=null When a VA is in rate-limit cooldown, handleDirectChat resolved the wait=true promise with null - the salem engine got a 200 with reply=null, which it can only classify as a malformed response. Rate limiting masqueraded as model-output failure in tick telemetry (the misattribution behind reactor-liveness finding #13 / ZBBS-HOME-332). The rate-limited branch now throws a typed error (statusCode 429, code RATE_LIMITED, resumesInSeconds from the limiter cooldown); the /chat/send wait-mode catch allowlists exactly that shape through as HTTP 429 with error.resumes_in_seconds. Everything else keeps the 502 REPLY_FAILED contract. The breadcrumb chat row still lands before the throw; non-wait dispatch swallows the rejection as before. Engine counterpart (new ErrorRateLimited class) ships separately in the salem repo; old engine binaries map 429 to malformed, identical to today, so deploy order is safe either way (api first preferred). Co-Authored-By: Claude Fable 5 --- node/api/src/routes/chat.js | 17 ++++++++++++++++ node/api/src/services/virtual-agent.js | 28 +++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/node/api/src/routes/chat.js b/node/api/src/routes/chat.js index 4b391bdb..7b913a23 100644 --- a/node/api/src/routes/chat.js +++ b/node/api/src/routes/chat.js @@ -207,6 +207,23 @@ router.post('/chat/send', apiRoute('chat', 'send', async (req, res) => { // The VA reply path failed before sending its [Error] feedback // chat message. Surface the error directly so the wait-mode // caller can react instead of polling for a sentinel. + // One allowlisted typed error passes through: the 429 + // RATE_LIMITED throw from handleDirectChat (ZBBS-WORK-404), so + // the engine can classify a cooldown honestly instead of + // booking it as malformed. Allowlisted rather than passing any + // thrown statusCode through — an arbitrary internal error + // carrying statusCode would otherwise widen this route's + // public contract. Everything else stays 502 REPLY_FAILED. + if (replyErr.code === 'RATE_LIMITED' && replyErr.statusCode === 429) { + const error = { + code: 'RATE_LIMITED', + message: replyErr.message || 'Rate limited', + }; + if (replyErr.resumesInSeconds != null) { + error.resumes_in_seconds = replyErr.resumesInSeconds; + } + return res.status(429).json({ error }); + } return res.status(502).json({ error: { code: 'REPLY_FAILED', message: replyErr.message || 'virtual agent reply failed' }, }); diff --git a/node/api/src/services/virtual-agent.js b/node/api/src/services/virtual-agent.js index 67d39834..0e96056d 100644 --- a/node/api/src/services/virtual-agent.js +++ b/node/api/src/services/virtual-agent.js @@ -299,6 +299,18 @@ async function isRateLimited(agentName, sceneId = null) { return false; } +// Seconds until an agent's active cooldown lifts; 0 when none is active. +// Only meaningful right after isRateLimited returned true — both limited +// branches (already-cooling and just-tripped) leave _cooldownUntil set, so +// wait-mode callers can surface a Retry-After-style hint (ZBBS-WORK-404). +function rateLimitResumeSeconds(agentName) { + const history = callHistory[agentName]; + if (!history || !history._cooldownUntil) { + return 0; + } + return Math.max(0, Math.ceil((history._cooldownUntil - Date.now()) / 1000)); +} + // Record a provider call for rate limiting. // // sceneId (optional): the salem per-tick scene id. Only the FIRST call of a new @@ -2569,7 +2581,21 @@ async function handleDirectChat(virtualAgentName, fromAgent, messageText, messag logVA('direct-chat-rate-limited', { agent: virtualAgentName, from: fromAgent }); await chatSend(virtualAgentName, [fromAgent], null, '[Error] Rate limited — too many API calls. Please wait before trying again.', { sceneId, conversationId, isError: true }); - return null; + // Throw rather than resolve null (ZBBS-WORK-404). Resolving null + // gave wait=true callers (the salem engine) a 200 with reply=null, + // which the engine could only classify as a malformed response — + // rate-limit cooldowns masquerading as model-output failures in + // tick telemetry (the misattribution behind reactor-liveness + // finding #13). The route maps statusCode/code onto the HTTP + // reply so the engine sees an honest 429. The breadcrumb chat row + // above still lands for history/admin; non-wait dispatch swallows + // the rejection (.catch in chat.js), same as the missing-config + // throw below. + throw Object.assign(new Error('Rate limited — too many API calls'), { + statusCode: 429, + code: 'RATE_LIMITED', + resumesInSeconds: rateLimitResumeSeconds(agent.agent), + }); } // Budget check