Skip to content

Commit 71cb2c0

Browse files
fix(rate-limiter): bound hosted-key queue wait to execution budget; fix heartbeat + telemetry
Tie the per-workspace hosted-key queue wait to the surrounding execution budget instead of a flat 5-minute cap. acquireKey now accepts the execution AbortSignal (threaded from ExecutionContext): when present, the wait is bounded by the run's actual plan timeout / cancellation, with the enterprise async ceiling as a backstop; when absent it falls back to MAX_QUEUE_WAIT_MS. This lets long-running async (Trigger.dev) runs use their full budget while no longer letting a single queued call burn a short sync run's entire budget. Also addresses Greptile review: - P1: share one lastHeartbeatAt across all wait phases and cap every sleep to HEARTBEAT_REFRESH_INTERVAL_MS so a long low-RPM retryAfterMs can no longer let the head's heartbeat lapse mid-wait and break FIFO ordering. - P2: derive hostedKeyQueueWaited telemetry reason from the actual bottleneck (queue_position / dimension / actor_requests) instead of hardcoding it. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 0b80ed3 commit 71cb2c0

3 files changed

Lines changed: 259 additions & 61 deletions

File tree

apps/sim/lib/core/rate-limiter/hosted-key/hosted-key-rate-limiter.test.ts

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import type {
55
TokenStatus,
66
} from '@/lib/core/rate-limiter/storage'
77
import { HostedKeyRateLimiter } from './hosted-key-rate-limiter'
8-
import type { HostedKeyQueue } from './queue'
8+
import { HEARTBEAT_REFRESH_INTERVAL_MS, type HostedKeyQueue } from './queue'
99
import type { CustomRateLimit, PerRequestRateLimit } from './types'
1010

1111
/** Force the queue wait to give up on the first iteration by reporting a retry time
@@ -370,6 +370,102 @@ describe('HostedKeyRateLimiter', () => {
370370
})
371371
})
372372

373+
describe('execution-budget-bounded waits', () => {
374+
it('bails immediately when the execution signal is already aborted', async () => {
375+
const blocked: ConsumeResult = {
376+
allowed: false,
377+
tokensRemaining: 0,
378+
resetAt: new Date(Date.now() + 100),
379+
}
380+
mockAdapter.consumeTokens.mockResolvedValue(blocked)
381+
382+
const result = await rateLimiter.acquireKey(
383+
testProvider,
384+
envKeyPrefix,
385+
perRequestRateLimit,
386+
'workspace-1',
387+
AbortSignal.abort()
388+
)
389+
390+
expect(result.success).toBe(false)
391+
expect(result.billingActorRateLimited).toBe(true)
392+
// Aborted budget => give up on the first bucket check rather than looping.
393+
expect(mockAdapter.consumeTokens).toHaveBeenCalledTimes(1)
394+
})
395+
396+
it('keeps waiting past the no-signal fallback cap while the signal is live', async () => {
397+
// A live (non-aborted) signal means the run still has budget, so the wait must not
398+
// 429 at the 5-minute MAX_QUEUE_WAIT_MS fallback. The bucket frees up after ~7 min.
399+
const blocked: ConsumeResult = {
400+
allowed: false,
401+
tokensRemaining: 0,
402+
resetAt: new Date(Date.now() + 10_000),
403+
}
404+
const allowedResult: ConsumeResult = {
405+
allowed: true,
406+
tokensRemaining: 9,
407+
resetAt: new Date(Date.now() + 60_000),
408+
}
409+
mockAdapter.consumeTokens.mockResolvedValue(blocked)
410+
411+
vi.useFakeTimers()
412+
try {
413+
const promise = rateLimiter.acquireKey(
414+
testProvider,
415+
envKeyPrefix,
416+
perRequestRateLimit,
417+
'workspace-1',
418+
new AbortController().signal
419+
)
420+
// Burn well past the 5-minute fallback cap — without a signal this would have 429'd.
421+
await vi.advanceTimersByTimeAsync(7 * 60 * 1000)
422+
mockAdapter.consumeTokens.mockResolvedValue(allowedResult)
423+
await vi.advanceTimersByTimeAsync(HEARTBEAT_REFRESH_INTERVAL_MS)
424+
const result = await promise
425+
426+
expect(result.success).toBe(true)
427+
expect(result.key).toBe('test-key-1')
428+
} finally {
429+
vi.useRealTimers()
430+
}
431+
})
432+
433+
it('refreshes the heartbeat during a long low-RPM bucket wait', async () => {
434+
// Provider with a long refill (retryAfterMs >> heartbeat TTL). The sleep must be
435+
// capped so the heartbeat is renewed and the head is not reaped mid-wait.
436+
const blocked: ConsumeResult = {
437+
allowed: false,
438+
tokensRemaining: 0,
439+
resetAt: new Date(Date.now() + 60_000),
440+
}
441+
const allowedResult: ConsumeResult = {
442+
allowed: true,
443+
tokensRemaining: 0,
444+
resetAt: new Date(Date.now() + 60_000),
445+
}
446+
mockAdapter.consumeTokens.mockResolvedValue(blocked)
447+
448+
vi.useFakeTimers()
449+
try {
450+
const promise = rateLimiter.acquireKey(
451+
testProvider,
452+
envKeyPrefix,
453+
perRequestRateLimit,
454+
'workspace-1',
455+
new AbortController().signal
456+
)
457+
await vi.advanceTimersByTimeAsync(3 * HEARTBEAT_REFRESH_INTERVAL_MS)
458+
mockAdapter.consumeTokens.mockResolvedValue(allowedResult)
459+
await vi.advanceTimersByTimeAsync(HEARTBEAT_REFRESH_INTERVAL_MS)
460+
await promise
461+
462+
expect(mockQueue.refreshHeartbeat).toHaveBeenCalled()
463+
} finally {
464+
vi.useRealTimers()
465+
}
466+
})
467+
})
468+
373469
describe('acquireKey with custom rate limit', () => {
374470
const customRateLimit: CustomRateLimit = {
375471
mode: 'custom',

0 commit comments

Comments
 (0)