diff --git a/packages/sdk/src/__tests__/workflow-runner.test.ts b/packages/sdk/src/__tests__/workflow-runner.test.ts index 3144e30bb..3ab5fac55 100644 --- a/packages/sdk/src/__tests__/workflow-runner.test.ts +++ b/packages/sdk/src/__tests__/workflow-runner.test.ts @@ -916,23 +916,82 @@ agents: expect(spawnCalls[0][0].task).toContain('STEP_COMPLETE:step-1'); }); - it('should use the full remaining timeout as the review safety backstop', async () => { - const config = makeConfig({ - workflows: [ - { - name: 'default', - steps: [{ name: 'step-1', agent: 'agent-a', task: 'Do step 1', timeoutMs: 90_000 }], - }, - ], + it('should force interactive Codex reviewers onto the non-interactive review path', async () => { + const spawnAndWait = vi + .spyOn(runner as any, 'spawnAndWait') + .mockImplementation(async (agentDef: any, _step: any, timeoutMs: number | undefined, options: any) => { + expect(agentDef.cli).toBe('codex'); + expect(agentDef.interactive).toBe(false); + expect(timeoutMs).toBe(300_000); + options?.onChunk?.({ + chunk: 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: subprocess review\n', + }); + return { + output: 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: subprocess review\n', + exitCode: 0, + }; + }); + + vi.spyOn(runner as any, 'postToChannel').mockImplementation(() => undefined); + vi.spyOn(runner as any, 'recordStepToolSideEffect').mockImplementation(() => undefined); + (runner as any).trajectory = { + registerAgent: vi.fn().mockResolvedValue(undefined), + reviewCompleted: vi.fn().mockResolvedValue(undefined), + }; + + const reviewOutput = await (runner as any).runStepReviewGate( + { name: 'step-1', type: 'agent', agent: 'specialist', task: 'Do step 1' }, + 'Do step 1', + 'worker finished\n', + 'STEP_COMPLETE:step-1\n', + { name: 'team-lead', cli: 'claude', role: 'lead coordinator' }, + { name: 'reviewer-1', cli: 'codex', role: 'reviewer' }, + undefined + ); + + expect(reviewOutput).toContain('REVIEW_DECISION: APPROVE'); + expect(spawnAndWait).toHaveBeenCalledTimes(1); + }); + + it('should cap default review timeout at 5 minutes in executor mode', async () => { + const executeAgentStep = vi.fn( + async ( + _step: { name: string }, + agentDef: { cli: string; interactive?: boolean }, + _resolvedTask: string, + timeoutMs?: number + ) => { + expect(agentDef.cli).toBe('codex'); + expect(agentDef.interactive).toBe(false); + expect(timeoutMs).toBe(300_000); + return 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: executor review\n'; + } + ); + + const localRunner = new WorkflowRunner({ + db, + workspaceId: 'ws-test', + executor: { executeAgentStep }, }); - const run = await runner.execute(config, 'default'); + vi.spyOn(localRunner as any, 'postToChannel').mockImplementation(() => undefined); + vi.spyOn(localRunner as any, 'recordStepToolSideEffect').mockImplementation(() => undefined); + (localRunner as any).trajectory = { + registerAgent: vi.fn().mockResolvedValue(undefined), + reviewCompleted: vi.fn().mockResolvedValue(undefined), + }; + + const reviewOutput = await (localRunner as any).runStepReviewGate( + { name: 'step-1', type: 'agent', agent: 'specialist', task: 'Do step 1' }, + 'Do step 1', + 'worker finished\n', + 'STEP_COMPLETE:step-1\n', + { name: 'team-lead', cli: 'claude', role: 'lead coordinator' }, + { name: 'reviewer-1', cli: 'codex', role: 'reviewer' }, + undefined + ); - expect(run.status).toBe('completed'); - const waitCalls = (waitForExitFn as any).mock?.calls ?? []; - expect(waitCalls.length).toBeGreaterThanOrEqual(2); - // first call: owner timeout; second call: review timeout - expect(waitCalls[1][0]).toBeGreaterThan(60_000); - expect(waitCalls[1][0]).toBeLessThanOrEqual(90_000); + expect(reviewOutput).toContain('REVIEW_DECISION: APPROVE'); + expect(executeAgentStep).toHaveBeenCalledTimes(1); }); }); diff --git a/packages/sdk/src/workflows/runner.ts b/packages/sdk/src/workflows/runner.ts index 9526a1d07..d1f364fcf 100644 --- a/packages/sdk/src/workflows/runner.ts +++ b/packages/sdk/src/workflows/runner.ts @@ -4595,7 +4595,10 @@ export class WorkflowRunner { `REVIEW_REASON: \n` + `Then output /exit.`; - const safetyTimeoutMs = timeoutMs ?? 600_000; + // Cap review timeout at 5 minutes. Review tasks are self-contained + // read-and-judge operations and should not inherit long owner step timeouts. + const REVIEW_TIMEOUT_CAP_MS = 300_000; + const safetyTimeoutMs = Math.min(timeoutMs ?? 600_000, REVIEW_TIMEOUT_CAP_MS); const reviewStep: WorkflowStep = { name: `${step.name}-review`, type: 'agent', @@ -4670,7 +4673,7 @@ export class WorkflowRunner { }; try { - await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, { + const spawnResult = await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, { evidenceStepName: step.name, evidenceRole: 'reviewer', logicalName: reviewerDef.name, @@ -4686,6 +4689,15 @@ export class WorkflowRunner { } }, }); + // If onChunk never fired (e.g. non-interactive reviewer path), fall back + // to the accumulated output returned by spawnAndWait. + if (!reviewOutput && spawnResult.output) { + reviewOutput = spawnResult.output; + const parsed = this.parseReviewDecision(reviewOutput); + if (parsed) { + startReviewCompletion(parsed); + } + } await reviewCompletionPromise; } catch (error) { const message = error instanceof Error ? error.message : String(error);