Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions node/api/src/routes/chat.js
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,23 @@ router.post('/chat/send', apiRoute('chat', 'send', async (req, res) => {
// The VA reply path failed before sending its [Error] feedback
// chat message. Surface the error directly so the wait-mode
// caller can react instead of polling for a sentinel.
// One allowlisted typed error passes through: the 429
// RATE_LIMITED throw from handleDirectChat (ZBBS-WORK-404), so
// the engine can classify a cooldown honestly instead of
// booking it as malformed. Allowlisted rather than passing any
// thrown statusCode through — an arbitrary internal error
// carrying statusCode would otherwise widen this route's
// public contract. Everything else stays 502 REPLY_FAILED.
if (replyErr.code === 'RATE_LIMITED' && replyErr.statusCode === 429) {
const error = {
code: 'RATE_LIMITED',
message: replyErr.message || 'Rate limited',
};
if (replyErr.resumesInSeconds != null) {
error.resumes_in_seconds = replyErr.resumesInSeconds;
}
return res.status(429).json({ error });
}
return res.status(502).json({
error: { code: 'REPLY_FAILED', message: replyErr.message || 'virtual agent reply failed' },
});
Expand Down
28 changes: 27 additions & 1 deletion node/api/src/services/virtual-agent.js
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,18 @@ async function isRateLimited(agentName, sceneId = null) {
return false;
}

// Seconds until an agent's active cooldown lifts; 0 when none is active.
// Only meaningful right after isRateLimited returned true — both limited
// branches (already-cooling and just-tripped) leave _cooldownUntil set, so
// wait-mode callers can surface a Retry-After-style hint (ZBBS-WORK-404).
function rateLimitResumeSeconds(agentName) {
const history = callHistory[agentName];
if (!history || !history._cooldownUntil) {
return 0;
}
return Math.max(0, Math.ceil((history._cooldownUntil - Date.now()) / 1000));
}

// Record a provider call for rate limiting.
//
// sceneId (optional): the salem per-tick scene id. Only the FIRST call of a new
Expand Down Expand Up @@ -2569,7 +2581,21 @@ async function handleDirectChat(virtualAgentName, fromAgent, messageText, messag
logVA('direct-chat-rate-limited', { agent: virtualAgentName, from: fromAgent });
await chatSend(virtualAgentName, [fromAgent], null,
'[Error] Rate limited — too many API calls. Please wait before trying again.', { sceneId, conversationId, isError: true });
return null;
// Throw rather than resolve null (ZBBS-WORK-404). Resolving null
// gave wait=true callers (the salem engine) a 200 with reply=null,
// which the engine could only classify as a malformed response —
// rate-limit cooldowns masquerading as model-output failures in
// tick telemetry (the misattribution behind reactor-liveness
// finding #13). The route maps statusCode/code onto the HTTP
// reply so the engine sees an honest 429. The breadcrumb chat row
// above still lands for history/admin; non-wait dispatch swallows
// the rejection (.catch in chat.js), same as the missing-config
// throw below.
throw Object.assign(new Error('Rate limited — too many API calls'), {
statusCode: 429,
code: 'RATE_LIMITED',
resumesInSeconds: rateLimitResumeSeconds(agent.agent),
});
}

// Budget check
Expand Down
Loading