From b967865e58f0d8fc6cd3ff958f559f086e81f29c Mon Sep 17 00:00:00 2001 From: Khaliq Date: Thu, 26 Mar 2026 11:56:02 +0100 Subject: [PATCH 1/6] fix: force non-interactive self-review agents --- .../sdk/src/__tests__/workflow-runner.test.ts | 89 +++++++++++++++---- packages/sdk/src/workflows/runner.ts | 46 ++++++---- 2 files changed, 102 insertions(+), 33 deletions(-) diff --git a/packages/sdk/src/__tests__/workflow-runner.test.ts b/packages/sdk/src/__tests__/workflow-runner.test.ts index 3144e30bb..3ab5fac55 100644 --- a/packages/sdk/src/__tests__/workflow-runner.test.ts +++ b/packages/sdk/src/__tests__/workflow-runner.test.ts @@ -916,23 +916,82 @@ agents: expect(spawnCalls[0][0].task).toContain('STEP_COMPLETE:step-1'); }); - it('should use the full remaining timeout as the review safety backstop', async () => { - const config = makeConfig({ - workflows: [ - { - name: 'default', - steps: [{ name: 'step-1', agent: 'agent-a', task: 'Do step 1', timeoutMs: 90_000 }], - }, - ], + it('should force interactive Codex reviewers onto the non-interactive review path', async () => { + const spawnAndWait = vi + .spyOn(runner as any, 'spawnAndWait') + .mockImplementation(async (agentDef: any, _step: any, timeoutMs: number | undefined, options: any) => { + expect(agentDef.cli).toBe('codex'); + expect(agentDef.interactive).toBe(false); + expect(timeoutMs).toBe(300_000); + options?.onChunk?.({ + chunk: 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: subprocess review\n', + }); + return { + output: 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: subprocess review\n', + exitCode: 0, + }; + }); + + vi.spyOn(runner as any, 'postToChannel').mockImplementation(() => undefined); + vi.spyOn(runner as any, 'recordStepToolSideEffect').mockImplementation(() => undefined); + (runner as any).trajectory = { + registerAgent: vi.fn().mockResolvedValue(undefined), + reviewCompleted: vi.fn().mockResolvedValue(undefined), + }; + + const reviewOutput = await (runner as any).runStepReviewGate( + { name: 'step-1', type: 'agent', agent: 'specialist', task: 'Do step 1' }, + 'Do step 1', + 'worker finished\n', + 'STEP_COMPLETE:step-1\n', + { name: 'team-lead', cli: 'claude', role: 'lead coordinator' }, + { name: 'reviewer-1', cli: 'codex', role: 'reviewer' }, + undefined + ); + + expect(reviewOutput).toContain('REVIEW_DECISION: APPROVE'); + expect(spawnAndWait).toHaveBeenCalledTimes(1); + }); + + it('should cap default review timeout at 5 minutes in executor mode', async () => { + const executeAgentStep = vi.fn( + async ( + _step: { name: string }, + agentDef: { cli: string; interactive?: boolean }, + _resolvedTask: string, + timeoutMs?: number + ) => { + expect(agentDef.cli).toBe('codex'); + expect(agentDef.interactive).toBe(false); + expect(timeoutMs).toBe(300_000); + return 'REVIEW_DECISION: APPROVE\nREVIEW_REASON: executor review\n'; + } + ); + + const localRunner = new WorkflowRunner({ + db, + workspaceId: 'ws-test', + executor: { executeAgentStep }, }); - const run = await runner.execute(config, 'default'); + vi.spyOn(localRunner as any, 'postToChannel').mockImplementation(() => undefined); + vi.spyOn(localRunner as any, 'recordStepToolSideEffect').mockImplementation(() => undefined); + (localRunner as any).trajectory = { + registerAgent: vi.fn().mockResolvedValue(undefined), + reviewCompleted: vi.fn().mockResolvedValue(undefined), + }; + + const reviewOutput = await (localRunner as any).runStepReviewGate( + { name: 'step-1', type: 'agent', agent: 'specialist', task: 'Do step 1' }, + 'Do step 1', + 'worker finished\n', + 'STEP_COMPLETE:step-1\n', + { name: 'team-lead', cli: 'claude', role: 'lead coordinator' }, + { name: 'reviewer-1', cli: 'codex', role: 'reviewer' }, + undefined + ); - expect(run.status).toBe('completed'); - const waitCalls = (waitForExitFn as any).mock?.calls ?? []; - expect(waitCalls.length).toBeGreaterThanOrEqual(2); - // first call: owner timeout; second call: review timeout - expect(waitCalls[1][0]).toBeGreaterThan(60_000); - expect(waitCalls[1][0]).toBeLessThanOrEqual(90_000); + expect(reviewOutput).toContain('REVIEW_DECISION: APPROVE'); + expect(executeAgentStep).toHaveBeenCalledTimes(1); }); }); diff --git a/packages/sdk/src/workflows/runner.ts b/packages/sdk/src/workflows/runner.ts index 9526a1d07..023537d83 100644 --- a/packages/sdk/src/workflows/runner.ts +++ b/packages/sdk/src/workflows/runner.ts @@ -4595,33 +4595,43 @@ export class WorkflowRunner { `REVIEW_REASON: \n` + `Then output /exit.`; - const safetyTimeoutMs = timeoutMs ?? 600_000; + // Cap review timeout at 5 minutes. Review tasks are self-contained + // read-and-judge operations and should not inherit long owner step timeouts. + const REVIEW_TIMEOUT_CAP_MS = 300_000; + const safetyTimeoutMs = Math.min(timeoutMs ?? 600_000, REVIEW_TIMEOUT_CAP_MS); + // Force reviewers onto the non-interactive subprocess path. Review steps + // do not need broker connectivity, and interactive Codex reviewers can + // connect successfully but never actually consume the review task. + const effectiveReviewerDef = + reviewerDef.interactive === false + ? reviewerDef + : { ...reviewerDef, interactive: false }; const reviewStep: WorkflowStep = { name: `${step.name}-review`, type: 'agent', - agent: reviewerDef.name, + agent: effectiveReviewerDef.name, task: reviewTask, }; - await this.trajectory?.registerAgent(reviewerDef.name, 'reviewer'); - this.postToChannel(`**[${step.name}]** Review started (reviewer: ${reviewerDef.name})`); + await this.trajectory?.registerAgent(effectiveReviewerDef.name, 'reviewer'); + this.postToChannel(`**[${step.name}]** Review started (reviewer: ${effectiveReviewerDef.name})`); this.recordStepToolSideEffect(step.name, { type: 'review_started', - detail: `Review started with ${reviewerDef.name}`, - raw: { reviewer: reviewerDef.name }, + detail: `Review started with ${effectiveReviewerDef.name}`, + raw: { reviewer: effectiveReviewerDef.name }, }); const emitReviewCompleted = async (decision: 'approved' | 'rejected', reason?: string) => { this.recordStepToolSideEffect(step.name, { type: 'review_completed', - detail: `Review ${decision} by ${reviewerDef.name}${reason ? `: ${reason}` : ''}`, - raw: { reviewer: reviewerDef.name, decision, reason }, + detail: `Review ${decision} by ${effectiveReviewerDef.name}${reason ? `: ${reason}` : ''}`, + raw: { reviewer: effectiveReviewerDef.name, decision, reason }, }); - await this.trajectory?.reviewCompleted(step.name, reviewerDef.name, decision, reason); + await this.trajectory?.reviewCompleted(step.name, effectiveReviewerDef.name, decision, reason); this.emit({ type: 'step:review-completed', runId: this.currentRunId ?? '', stepName: step.name, - reviewerName: reviewerDef.name, + reviewerName: effectiveReviewerDef.name, decision, }); }; @@ -4629,21 +4639,21 @@ export class WorkflowRunner { if (this.executor) { const reviewOutput = await this.executor.executeAgentStep( reviewStep, - reviewerDef, + effectiveReviewerDef, reviewTask, safetyTimeoutMs ); const parsed = this.parseReviewDecision(reviewOutput); if (!parsed) { throw new Error( - `Step "${step.name}" review response malformed from "${reviewerDef.name}" (missing REVIEW_DECISION)` + `Step "${step.name}" review response malformed from "${effectiveReviewerDef.name}" (missing REVIEW_DECISION)` ); } await emitReviewCompleted(parsed.decision, parsed.reason); if (parsed.decision === 'rejected') { - throw new Error(`Step "${step.name}" review rejected by "${reviewerDef.name}"`); + throw new Error(`Step "${step.name}" review rejected by "${effectiveReviewerDef.name}"`); } - this.postToChannel(`**[${step.name}]** Review approved by \`${reviewerDef.name}\``); + this.postToChannel(`**[${step.name}]** Review approved by \`${effectiveReviewerDef.name}\``); return reviewOutput; } @@ -4670,7 +4680,7 @@ export class WorkflowRunner { }; try { - await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, { + await this.spawnAndWait(effectiveReviewerDef, reviewStep, safetyTimeoutMs, { evidenceStepName: step.name, evidenceRole: 'reviewer', logicalName: reviewerDef.name, @@ -4702,7 +4712,7 @@ export class WorkflowRunner { const parsed = this.parseReviewDecision(reviewOutput); if (!parsed) { throw new Error( - `Step "${step.name}" review response malformed from "${reviewerDef.name}" (missing REVIEW_DECISION)` + `Step "${step.name}" review response malformed from "${effectiveReviewerDef.name}" (missing REVIEW_DECISION)` ); } completedReview = parsed; @@ -4710,10 +4720,10 @@ export class WorkflowRunner { } if (completedReview.decision === 'rejected') { - throw new Error(`Step "${step.name}" review rejected by "${reviewerDef.name}"`); + throw new Error(`Step "${step.name}" review rejected by "${effectiveReviewerDef.name}"`); } - this.postToChannel(`**[${step.name}]** Review approved by \`${reviewerDef.name}\``); + this.postToChannel(`**[${step.name}]** Review approved by \`${effectiveReviewerDef.name}\``); return reviewOutput; } From 3a109d039e003a6c810ce8d9999761e3ed0a5e41 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Fri, 27 Mar 2026 13:19:16 +0100 Subject: [PATCH 2/6] review feedback --- packages/sdk/src/workflows/runner.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/packages/sdk/src/workflows/runner.ts b/packages/sdk/src/workflows/runner.ts index 023537d83..3c509aaad 100644 --- a/packages/sdk/src/workflows/runner.ts +++ b/packages/sdk/src/workflows/runner.ts @@ -4696,6 +4696,15 @@ export class WorkflowRunner { } }, }); + // When the reviewer is non-interactive, spawnAndWait short-circuits to + // execNonInteractive which ignores onChunk — use SpawnResult.output as fallback. + if (!reviewOutput && spawnResult.output) { + reviewOutput = spawnResult.output; + const parsed = this.parseReviewDecision(reviewOutput); + if (parsed) { + startReviewCompletion(parsed); + } + } await reviewCompletionPromise; } catch (error) { const message = error instanceof Error ? error.message : String(error); From 542bafeb44c0e5a5743575d0d4eec57227c8c23e Mon Sep 17 00:00:00 2001 From: Khaliq Date: Fri, 27 Mar 2026 13:24:27 +0100 Subject: [PATCH 3/6] assign variable --- packages/sdk/src/workflows/runner.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/sdk/src/workflows/runner.ts b/packages/sdk/src/workflows/runner.ts index 3c509aaad..141ae32c5 100644 --- a/packages/sdk/src/workflows/runner.ts +++ b/packages/sdk/src/workflows/runner.ts @@ -4680,7 +4680,7 @@ export class WorkflowRunner { }; try { - await this.spawnAndWait(effectiveReviewerDef, reviewStep, safetyTimeoutMs, { + const spawnResult = await this.spawnAndWait(effectiveReviewerDef, reviewStep, safetyTimeoutMs, { evidenceStepName: step.name, evidenceRole: 'reviewer', logicalName: reviewerDef.name, From 01125421601743e8bb7e14e47b987d06bf503d0d Mon Sep 17 00:00:00 2001 From: Khaliq Date: Fri, 27 Mar 2026 13:30:17 +0100 Subject: [PATCH 4/6] revert force and keep timeout --- packages/sdk/src/workflows/runner.ts | 45 ++++++++++++---------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/packages/sdk/src/workflows/runner.ts b/packages/sdk/src/workflows/runner.ts index 141ae32c5..d1f364fcf 100644 --- a/packages/sdk/src/workflows/runner.ts +++ b/packages/sdk/src/workflows/runner.ts @@ -4599,39 +4599,32 @@ export class WorkflowRunner { // read-and-judge operations and should not inherit long owner step timeouts. const REVIEW_TIMEOUT_CAP_MS = 300_000; const safetyTimeoutMs = Math.min(timeoutMs ?? 600_000, REVIEW_TIMEOUT_CAP_MS); - // Force reviewers onto the non-interactive subprocess path. Review steps - // do not need broker connectivity, and interactive Codex reviewers can - // connect successfully but never actually consume the review task. - const effectiveReviewerDef = - reviewerDef.interactive === false - ? reviewerDef - : { ...reviewerDef, interactive: false }; const reviewStep: WorkflowStep = { name: `${step.name}-review`, type: 'agent', - agent: effectiveReviewerDef.name, + agent: reviewerDef.name, task: reviewTask, }; - await this.trajectory?.registerAgent(effectiveReviewerDef.name, 'reviewer'); - this.postToChannel(`**[${step.name}]** Review started (reviewer: ${effectiveReviewerDef.name})`); + await this.trajectory?.registerAgent(reviewerDef.name, 'reviewer'); + this.postToChannel(`**[${step.name}]** Review started (reviewer: ${reviewerDef.name})`); this.recordStepToolSideEffect(step.name, { type: 'review_started', - detail: `Review started with ${effectiveReviewerDef.name}`, - raw: { reviewer: effectiveReviewerDef.name }, + detail: `Review started with ${reviewerDef.name}`, + raw: { reviewer: reviewerDef.name }, }); const emitReviewCompleted = async (decision: 'approved' | 'rejected', reason?: string) => { this.recordStepToolSideEffect(step.name, { type: 'review_completed', - detail: `Review ${decision} by ${effectiveReviewerDef.name}${reason ? `: ${reason}` : ''}`, - raw: { reviewer: effectiveReviewerDef.name, decision, reason }, + detail: `Review ${decision} by ${reviewerDef.name}${reason ? `: ${reason}` : ''}`, + raw: { reviewer: reviewerDef.name, decision, reason }, }); - await this.trajectory?.reviewCompleted(step.name, effectiveReviewerDef.name, decision, reason); + await this.trajectory?.reviewCompleted(step.name, reviewerDef.name, decision, reason); this.emit({ type: 'step:review-completed', runId: this.currentRunId ?? '', stepName: step.name, - reviewerName: effectiveReviewerDef.name, + reviewerName: reviewerDef.name, decision, }); }; @@ -4639,21 +4632,21 @@ export class WorkflowRunner { if (this.executor) { const reviewOutput = await this.executor.executeAgentStep( reviewStep, - effectiveReviewerDef, + reviewerDef, reviewTask, safetyTimeoutMs ); const parsed = this.parseReviewDecision(reviewOutput); if (!parsed) { throw new Error( - `Step "${step.name}" review response malformed from "${effectiveReviewerDef.name}" (missing REVIEW_DECISION)` + `Step "${step.name}" review response malformed from "${reviewerDef.name}" (missing REVIEW_DECISION)` ); } await emitReviewCompleted(parsed.decision, parsed.reason); if (parsed.decision === 'rejected') { - throw new Error(`Step "${step.name}" review rejected by "${effectiveReviewerDef.name}"`); + throw new Error(`Step "${step.name}" review rejected by "${reviewerDef.name}"`); } - this.postToChannel(`**[${step.name}]** Review approved by \`${effectiveReviewerDef.name}\``); + this.postToChannel(`**[${step.name}]** Review approved by \`${reviewerDef.name}\``); return reviewOutput; } @@ -4680,7 +4673,7 @@ export class WorkflowRunner { }; try { - const spawnResult = await this.spawnAndWait(effectiveReviewerDef, reviewStep, safetyTimeoutMs, { + const spawnResult = await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, { evidenceStepName: step.name, evidenceRole: 'reviewer', logicalName: reviewerDef.name, @@ -4696,8 +4689,8 @@ export class WorkflowRunner { } }, }); - // When the reviewer is non-interactive, spawnAndWait short-circuits to - // execNonInteractive which ignores onChunk — use SpawnResult.output as fallback. + // If onChunk never fired (e.g. non-interactive reviewer path), fall back + // to the accumulated output returned by spawnAndWait. if (!reviewOutput && spawnResult.output) { reviewOutput = spawnResult.output; const parsed = this.parseReviewDecision(reviewOutput); @@ -4721,7 +4714,7 @@ export class WorkflowRunner { const parsed = this.parseReviewDecision(reviewOutput); if (!parsed) { throw new Error( - `Step "${step.name}" review response malformed from "${effectiveReviewerDef.name}" (missing REVIEW_DECISION)` + `Step "${step.name}" review response malformed from "${reviewerDef.name}" (missing REVIEW_DECISION)` ); } completedReview = parsed; @@ -4729,10 +4722,10 @@ export class WorkflowRunner { } if (completedReview.decision === 'rejected') { - throw new Error(`Step "${step.name}" review rejected by "${effectiveReviewerDef.name}"`); + throw new Error(`Step "${step.name}" review rejected by "${reviewerDef.name}"`); } - this.postToChannel(`**[${step.name}]** Review approved by \`${effectiveReviewerDef.name}\``); + this.postToChannel(`**[${step.name}]** Review approved by \`${reviewerDef.name}\``); return reviewOutput; } From 41e7e29e13e82f88a88bc2e364d0eeba3f425926 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Fri, 27 Mar 2026 15:07:11 +0100 Subject: [PATCH 5/6] fix: force interactive:false on reviewer def in runStepReviewGate Clone reviewerDef with interactive:false before passing to spawnAndWait and executeAgentStep so review subprocesses cannot spawn sub-agents or open interactive PTY sessions regardless of the original agent config. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/sdk/src/workflows/runner.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/sdk/src/workflows/runner.ts b/packages/sdk/src/workflows/runner.ts index d1f364fcf..95e4115f2 100644 --- a/packages/sdk/src/workflows/runner.ts +++ b/packages/sdk/src/workflows/runner.ts @@ -4599,12 +4599,15 @@ export class WorkflowRunner { // read-and-judge operations and should not inherit long owner step timeouts. const REVIEW_TIMEOUT_CAP_MS = 300_000; const safetyTimeoutMs = Math.min(timeoutMs ?? 600_000, REVIEW_TIMEOUT_CAP_MS); + const reviewStep: WorkflowStep = { name: `${step.name}-review`, type: 'agent', agent: reviewerDef.name, task: reviewTask, }; + // Force non-interactive for review subprocesses regardless of original agent config + const reviewReviewerDef = { ...reviewerDef, interactive: false as const }; await this.trajectory?.registerAgent(reviewerDef.name, 'reviewer'); this.postToChannel(`**[${step.name}]** Review started (reviewer: ${reviewerDef.name})`); @@ -4632,7 +4635,7 @@ export class WorkflowRunner { if (this.executor) { const reviewOutput = await this.executor.executeAgentStep( reviewStep, - reviewerDef, + reviewReviewerDef, reviewTask, safetyTimeoutMs ); @@ -4673,7 +4676,7 @@ export class WorkflowRunner { }; try { - const spawnResult = await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, { + const spawnResult = await this.spawnAndWait(reviewReviewerDef, reviewStep, safetyTimeoutMs, { evidenceStepName: step.name, evidenceRole: 'reviewer', logicalName: reviewerDef.name, From eab78bcc9b13566a48bc0221c1cb7d538c46ba28 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Fri, 27 Mar 2026 15:47:34 +0100 Subject: [PATCH 6/6] Revert "fix: force interactive:false on reviewer def in runStepReviewGate" This reverts commit 41e7e29e13e82f88a88bc2e364d0eeba3f425926. --- packages/sdk/src/workflows/runner.ts | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/packages/sdk/src/workflows/runner.ts b/packages/sdk/src/workflows/runner.ts index 95e4115f2..d1f364fcf 100644 --- a/packages/sdk/src/workflows/runner.ts +++ b/packages/sdk/src/workflows/runner.ts @@ -4599,15 +4599,12 @@ export class WorkflowRunner { // read-and-judge operations and should not inherit long owner step timeouts. const REVIEW_TIMEOUT_CAP_MS = 300_000; const safetyTimeoutMs = Math.min(timeoutMs ?? 600_000, REVIEW_TIMEOUT_CAP_MS); - const reviewStep: WorkflowStep = { name: `${step.name}-review`, type: 'agent', agent: reviewerDef.name, task: reviewTask, }; - // Force non-interactive for review subprocesses regardless of original agent config - const reviewReviewerDef = { ...reviewerDef, interactive: false as const }; await this.trajectory?.registerAgent(reviewerDef.name, 'reviewer'); this.postToChannel(`**[${step.name}]** Review started (reviewer: ${reviewerDef.name})`); @@ -4635,7 +4632,7 @@ export class WorkflowRunner { if (this.executor) { const reviewOutput = await this.executor.executeAgentStep( reviewStep, - reviewReviewerDef, + reviewerDef, reviewTask, safetyTimeoutMs ); @@ -4676,7 +4673,7 @@ export class WorkflowRunner { }; try { - const spawnResult = await this.spawnAndWait(reviewReviewerDef, reviewStep, safetyTimeoutMs, { + const spawnResult = await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, { evidenceStepName: step.name, evidenceRole: 'reviewer', logicalName: reviewerDef.name,