Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 170 additions & 0 deletions packages/sdk/src/workflows/__tests__/budget-enforcement.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import { mkdtempSync, rmSync } from 'node:fs';
import { tmpdir } from 'node:os';
import path from 'node:path';

import { afterEach, describe, expect, it, vi } from 'vitest';
import type { CliSessionReport } from '../cli-session-collector.js';
import type { RelayYamlConfig } from '../types.js';

const mockedReports = vi.hoisted(() => ({
queue: [] as Array<CliSessionReport | null>,
}));

vi.mock('@relaycast/sdk', () => ({
RelayCast: vi.fn(),
RelayError: class RelayError extends Error {},
}));

vi.mock('../../relay.js', () => ({
AgentRelay: vi.fn(),
}));

vi.mock('../cli-session-collector.js', () => ({
collectCliSession: vi.fn(async () => mockedReports.queue.shift() ?? null),
}));

const { WorkflowRunner } = await import('../runner.js');
const { InMemoryWorkflowDb } = await import('../memory-db.js');

const tempDirs: string[] = [];

function createWorkspace(): string {
const dir = mkdtempSync(path.join(tmpdir(), 'workflow-budget-'));
tempDirs.push(dir);
return dir;
}

function makeReport(
input: number,
output: number,
finalStatus: 'completed' | 'failed' = 'completed'
): CliSessionReport {
return {
cli: 'codex' as const,
sessionId: `session-${input}-${output}`,
model: 'gpt-5',
provider: 'openai',
durationMs: 1_000,
cost: null,
tokens: { input, output, cacheRead: 0 },
turns: 1,
toolCalls: [],
errors: [],
finalStatus,
summary: `used ${input + output} tokens`,
};
}

function makeConfig(overrides?: {
tokenBudget?: number;
maxTokens?: number;
retries?: number;
}): RelayYamlConfig {
return {
version: '1',
name: 'budget-enforcement',
swarm: {
pattern: 'dag',
tokenBudget: overrides?.tokenBudget,
},
agents: [
{
name: 'worker',
cli: 'codex',
interactive: false,
constraints: overrides?.maxTokens ? { maxTokens: overrides.maxTokens } : undefined,
},
],
workflows: [
{
name: 'default',
steps: [
{
name: 'step-1',
agent: 'worker',
task: 'Do the first task',
retries: overrides?.retries,
},
{
name: 'step-2',
agent: 'worker',
task: 'Do the second task',
dependsOn: ['step-1'],
},
],
},
],
trajectories: false,
};
}

afterEach(() => {
mockedReports.queue = [];
vi.clearAllMocks();
while (tempDirs.length > 0) {
rmSync(tempDirs.pop()!, { recursive: true, force: true });
}
});

describe('WorkflowRunner token budget enforcement', () => {
it('blocks later steps once the workflow budget is exhausted', async () => {
mockedReports.queue = [makeReport(60, 50)];

const db = new InMemoryWorkflowDb();
const executor = {
executeAgentStep: vi.fn(async (step) => `completed ${step.name}`),
};
const runner = new WorkflowRunner({
cwd: createWorkspace(),
db,
executor,
});

const run = await runner.execute(makeConfig({ tokenBudget: 100, maxTokens: 80 }), 'default');
const steps = await db.getStepsByRunId(run.id);

expect(run.status).toBe('failed');
expect(executor.executeAgentStep).toHaveBeenCalledTimes(1);
expect(steps.find((step) => step.stepName === 'step-1')?.status).toBe('completed');
expect(steps.find((step) => step.stepName === 'step-2')?.status).toBe('failed');
expect(steps.find((step) => step.stepName === 'step-2')?.completionReason).toBe(
'failed_budget_exceeded'
);
expect(steps.find((step) => step.stepName === 'step-2')?.error).toContain(
'Workflow exceeded workflow budget'
);
});

it('counts failed attempts against the same workflow budget before retrying later steps', async () => {
mockedReports.queue = [makeReport(40, 20, 'failed'), makeReport(30, 20)];

const db = new InMemoryWorkflowDb();
const executor = {
executeAgentStep: vi
.fn()
.mockRejectedValueOnce(new Error('first attempt failed'))
.mockResolvedValueOnce('step-1 recovered'),
};
const runner = new WorkflowRunner({
cwd: createWorkspace(),
db,
executor,
});

const run = await runner.execute(
makeConfig({ tokenBudget: 100, maxTokens: 80, retries: 1 }),
'default'
);
const steps = await db.getStepsByRunId(run.id);

expect(run.status).toBe('failed');
expect(executor.executeAgentStep).toHaveBeenCalledTimes(2);
expect(steps.find((step) => step.stepName === 'step-1')?.status).toBe('completed');
expect(steps.find((step) => step.stepName === 'step-2')?.completionReason).toBe(
'failed_budget_exceeded'
);
expect(steps.find((step) => step.stepName === 'step-2')?.error).toContain(
'Workflow exceeded workflow budget'
);
});
});
114 changes: 114 additions & 0 deletions packages/sdk/src/workflows/__tests__/budget-tracker.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import { describe, expect, it } from 'vitest';

import { BudgetExceededError, BudgetTracker } from '../budget-tracker.js';

describe('BudgetTracker', () => {
it('tracks usage across multiple steps', () => {
const tracker = new BudgetTracker({ perAgent: 100, perWorkflow: 500 });

tracker.recordUsage('plan', { input: 10, output: 5, cacheRead: 3 });
tracker.recordUsage('plan', { input: 1, output: 2 });
tracker.recordUsage('implement', { input: 20, output: 8, cacheRead: 4 });

expect(tracker.getStepUsage('plan')).toEqual({
input: 11,
output: 7,
cacheRead: 3,
total: 18,
});
expect(tracker.getStepUsage('implement')).toEqual({
input: 20,
output: 8,
cacheRead: 4,
total: 28,
});
expect(tracker.getTotalUsage()).toEqual({
input: 31,
output: 15,
cacheRead: 7,
total: 46,
});
expect(tracker.getRemainingBudget()).toEqual({
agent: 72,
workflow: 454,
});
});

it('detects per-agent budget overruns without counting cache reads', () => {
const tracker = new BudgetTracker({ perAgent: 30 });

tracker.recordUsage('worker-a', { input: 20, output: 15, cacheRead: 999 });

expect(tracker.isOverBudget('worker-a')).toEqual({
over: true,
reason: 'Step "worker-a" exceeded per-agent budget (35/30 tokens used)',
});
});

it('detects per-workflow budget overruns', () => {
const tracker = new BudgetTracker({ perWorkflow: 50 });

tracker.recordUsage('step-1', { input: 20, output: 10 });
tracker.recordUsage('step-2', { input: 5, output: 20 });

expect(tracker.isOverBudget('step-2')).toEqual({
over: true,
reason: 'Workflow budget exceeded after step "step-2" (55/50 tokens used)',
});
expect(tracker.getRemainingBudget()).toEqual({
agent: null,
workflow: -5,
});
});

it('prevents spawning when workflow budget is nearly exhausted', () => {
const tracker = new BudgetTracker({ perAgent: 100, perWorkflow: 500 });

tracker.recordUsage('planner', { input: 250, output: 241 });

expect(tracker.checkCanSpawn('implementer')).toEqual({
allowed: false,
reason: 'Cannot spawn "implementer": remaining workflow budget 9 is below 10% of per-agent budget 10',
});
});

it('maintains correct totals when async callers record usage in parallel', async () => {
const tracker = new BudgetTracker({ perAgent: 1_000, perWorkflow: 1_000 });

await Promise.all(
Array.from({ length: 100 }, async (_, index) => {
await Promise.resolve();
tracker.recordUsage(`step-${index % 4}`, {
input: 2,
output: 3,
cacheRead: index % 2,
});
}),
);

expect(tracker.getTotalUsage()).toEqual({
input: 200,
output: 300,
cacheRead: 50,
total: 500,
});
expect(tracker.getStepUsage('step-0').total).toBe(125);
expect(tracker.getStepUsage('step-1').total).toBe(125);
expect(tracker.getStepUsage('step-2').total).toBe(125);
expect(tracker.getStepUsage('step-3').total).toBe(125);
});
});

describe('BudgetExceededError', () => {
it('exposes structured budget overrun details', () => {
const error = new BudgetExceededError('review', 'workflow', 500, 550);

expect(error).toBeInstanceOf(Error);
expect(error.name).toBe('BudgetExceededError');
expect(error.message).toBe('Workflow exceeded workflow budget: 550 tokens used of 500');
expect(error.stepName).toBe('review');
expect(error.budgetType).toBe('workflow');
expect(error.limit).toBe(500);
expect(error.actual).toBe(550);
});
});
55 changes: 55 additions & 0 deletions packages/sdk/src/workflows/__tests__/run-summary-table.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,61 @@ describe('formatRunSummaryTable', () => {
expect(output).toContain(' └─ Error [turn 1] Error: database locked');
});

it('renders budget usage and over-budget markers when summaries are provided', () => {
const output = formatRunSummaryTable(
[
{ name: 'plan', agent: 'lead', status: 'completed', attempts: 1, durationMs: 1_000 },
{ name: 'implement', agent: 'worker', status: 'completed', attempts: 2, durationMs: 2_000 },
],
new Map([
[
'plan',
{
cli: 'claude',
sessionId: 's1',
model: 'claude-sonnet-4',
provider: 'anthropic',
durationMs: 1_200,
cost: 0.75,
tokens: { input: 100, output: 50, cacheRead: 10 },
turns: 2,
toolCalls: [],
errors: [],
finalStatus: 'completed',
summary: 'planned',
},
],
[
'implement',
{
cli: 'codex',
sessionId: 's2',
model: 'gpt-5',
provider: 'openai',
durationMs: 3_400,
cost: 1.25,
tokens: { input: 300, output: 90, cacheRead: 20 },
turns: 4,
toolCalls: [],
errors: [{ turn: 2, text: 'Error: recovered after retry' }],
finalStatus: 'completed',
summary: 'implemented',
},
],
]),
new Map([
['plan', { used: 150, limit: 200, over: false }],
['implement', { used: 390, limit: 350, over: true }],
]),
{ used: 540, limit: 500, over: true }
);

expect(output).toContain('Budget');
expect(output).toContain('150/200');
expect(output).toContain('390/350 [OVER]');
expect(output).toContain('540/500 [OVER]');
});

it('renders deterministic steps without reports using placeholder columns', () => {
const output = formatRunSummaryTable(
[{ name: 'lint', agent: 'shell', status: 'completed', attempts: 1, durationMs: 900 }],
Expand Down
Loading
Loading