From 2c00611d74f8d51ca7fe232022f0909e785b05eb Mon Sep 17 00:00:00 2001
From: Jeff Dafoe <jdafoe@4mcentral.com>
Date: Thu, 11 Jun 2026 16:30:16 -0400
Subject: [PATCH] ZBBS-WORK-404: answer rate-limited wait-calls with 429
 RATE_LIMITED instead of 200 reply=null

When a VA is in rate-limit cooldown, handleDirectChat resolved the
wait=true promise with null - the salem engine got a 200 with
reply=null, which it can only classify as a malformed response. Rate
limiting masqueraded as model-output failure in tick telemetry (the
misattribution behind reactor-liveness finding #13 / ZBBS-HOME-332).

The rate-limited branch now throws a typed error (statusCode 429,
code RATE_LIMITED, resumesInSeconds from the limiter cooldown); the
/chat/send wait-mode catch allowlists exactly that shape through as
HTTP 429 with error.resumes_in_seconds. Everything else keeps the 502
REPLY_FAILED contract. The breadcrumb chat row still lands before the
throw; non-wait dispatch swallows the rejection as before.

Engine counterpart (new ErrorRateLimited class) ships separately in
the salem repo; old engine binaries map 429 to malformed, identical
to today, so deploy order is safe either way (api first preferred).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 node/api/src/routes/chat.js            | 17 ++++++++++++++++
 node/api/src/services/virtual-agent.js | 28 +++++++++++++++++++++++++-
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/node/api/src/routes/chat.js b/node/api/src/routes/chat.js
index 4b391bdb..7b913a23 100644
--- a/node/api/src/routes/chat.js
+++ b/node/api/src/routes/chat.js
@@ -207,6 +207,23 @@ router.post('/chat/send', apiRoute('chat', 'send', async (req, res) => {
             // The VA reply path failed before sending its [Error] feedback
             // chat message. Surface the error directly so the wait-mode
             // caller can react instead of polling for a sentinel.
+            // One allowlisted typed error passes through: the 429
+            // RATE_LIMITED throw from handleDirectChat (ZBBS-WORK-404), so
+            // the engine can classify a cooldown honestly instead of
+            // booking it as malformed. Allowlisted rather than passing any
+            // thrown statusCode through — an arbitrary internal error
+            // carrying statusCode would otherwise widen this route's
+            // public contract. Everything else stays 502 REPLY_FAILED.
+            if (replyErr.code === 'RATE_LIMITED' && replyErr.statusCode === 429) {
+                const error = {
+                    code: 'RATE_LIMITED',
+                    message: replyErr.message || 'Rate limited',
+                };
+                if (replyErr.resumesInSeconds != null) {
+                    error.resumes_in_seconds = replyErr.resumesInSeconds;
+                }
+                return res.status(429).json({ error });
+            }
             return res.status(502).json({
                 error: { code: 'REPLY_FAILED', message: replyErr.message || 'virtual agent reply failed' },
             });
diff --git a/node/api/src/services/virtual-agent.js b/node/api/src/services/virtual-agent.js
index 67d39834..0e96056d 100644
--- a/node/api/src/services/virtual-agent.js
+++ b/node/api/src/services/virtual-agent.js
@@ -299,6 +299,18 @@ async function isRateLimited(agentName, sceneId = null) {
     return false;
 }
 
+// Seconds until an agent's active cooldown lifts; 0 when none is active.
+// Only meaningful right after isRateLimited returned true — both limited
+// branches (already-cooling and just-tripped) leave _cooldownUntil set, so
+// wait-mode callers can surface a Retry-After-style hint (ZBBS-WORK-404).
+function rateLimitResumeSeconds(agentName) {
+    const history = callHistory[agentName];
+    if (!history || !history._cooldownUntil) {
+        return 0;
+    }
+    return Math.max(0, Math.ceil((history._cooldownUntil - Date.now()) / 1000));
+}
+
 // Record a provider call for rate limiting.
 //
 // sceneId (optional): the salem per-tick scene id. Only the FIRST call of a new
@@ -2569,7 +2581,21 @@ async function handleDirectChat(virtualAgentName, fromAgent, messageText, messag
             logVA('direct-chat-rate-limited', { agent: virtualAgentName, from: fromAgent });
             await chatSend(virtualAgentName, [fromAgent], null,
                 '[Error] Rate limited — too many API calls. Please wait before trying again.', { sceneId, conversationId, isError: true });
-            return null;
+            // Throw rather than resolve null (ZBBS-WORK-404). Resolving null
+            // gave wait=true callers (the salem engine) a 200 with reply=null,
+            // which the engine could only classify as a malformed response —
+            // rate-limit cooldowns masquerading as model-output failures in
+            // tick telemetry (the misattribution behind reactor-liveness
+            // finding #13). The route maps statusCode/code onto the HTTP
+            // reply so the engine sees an honest 429. The breadcrumb chat row
+            // above still lands for history/admin; non-wait dispatch swallows
+            // the rejection (.catch in chat.js), same as the missing-config
+            // throw below.
+            throw Object.assign(new Error('Rate limited — too many API calls'), {
+                statusCode: 429,
+                code: 'RATE_LIMITED',
+                resumesInSeconds: rateLimitResumeSeconds(agent.agent),
+            });
         }
 
         // Budget check