diff --git a/.github/workflows/pr-checks.yml b/.github/workflows/pr-checks.yml new file mode 100644 index 0000000..2ec99dc --- /dev/null +++ b/.github/workflows/pr-checks.yml @@ -0,0 +1,89 @@ +name: PR Checks + +on: + pull_request: + types: + - opened + - synchronize + - reopened + - ready_for_review + +permissions: + contents: read + pull-requests: write + +jobs: + checks: + name: pnpm checks + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version-file: .nvmrc + + - name: Resolve pnpm version from packageManager + id: pnpm-version + run: | + version=$(node -e "console.log(require('./package.json').packageManager.split('@')[1])") + echo "version=$version" >> "$GITHUB_OUTPUT" + + - name: Setup pnpm + uses: pnpm/action-setup@v4 + with: + version: ${{ steps.pnpm-version.outputs.version }} + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Run checks + run: pnpm checks + + - name: Comment success summary on PR + if: ${{ success() && github.event_name == 'pull_request' }} + continue-on-error: true + uses: actions/github-script@v7 + with: + script: | + const marker = '' + const body = [ + marker, + '## PR Checks Summary', + '', + '- Status: `passed`', + '- Command: `pnpm checks`', + `- Workflow run: https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`, + `- Commit: \`${context.sha.slice(0, 7)}\``, + ].join('\n') + + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + per_page: 100, + }) + + const existing = comments.find( + (comment) => + comment.user?.type === 'Bot' && comment.body?.includes(marker) + ) + + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body, + }) + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body, + }) + } diff --git a/openspec/changes/broader-integration-tests/.openspec.yaml b/openspec/changes/archive/2026-03-03-broader-integration-tests/.openspec.yaml similarity index 100% rename from openspec/changes/broader-integration-tests/.openspec.yaml rename to openspec/changes/archive/2026-03-03-broader-integration-tests/.openspec.yaml diff --git a/openspec/changes/broader-integration-tests/design.md b/openspec/changes/archive/2026-03-03-broader-integration-tests/design.md similarity index 100% rename from openspec/changes/broader-integration-tests/design.md rename to openspec/changes/archive/2026-03-03-broader-integration-tests/design.md diff --git a/openspec/changes/broader-integration-tests/proposal.md b/openspec/changes/archive/2026-03-03-broader-integration-tests/proposal.md similarity index 100% rename from openspec/changes/broader-integration-tests/proposal.md rename to openspec/changes/archive/2026-03-03-broader-integration-tests/proposal.md diff --git a/openspec/changes/broader-integration-tests/specs/broader-integration-test-coverage/spec.md b/openspec/changes/archive/2026-03-03-broader-integration-tests/specs/broader-integration-test-coverage/spec.md similarity index 100% rename from openspec/changes/broader-integration-tests/specs/broader-integration-test-coverage/spec.md rename to openspec/changes/archive/2026-03-03-broader-integration-tests/specs/broader-integration-test-coverage/spec.md diff --git a/openspec/changes/broader-integration-tests/tasks.md b/openspec/changes/archive/2026-03-03-broader-integration-tests/tasks.md similarity index 50% rename from openspec/changes/broader-integration-tests/tasks.md rename to openspec/changes/archive/2026-03-03-broader-integration-tests/tasks.md index 17893ae..6736825 100644 --- a/openspec/changes/broader-integration-tests/tasks.md +++ b/openspec/changes/archive/2026-03-03-broader-integration-tests/tasks.md @@ -1,35 +1,35 @@ ## 1. Integration Harness Foundations -- [ ] 1.1 Add reusable integration fixture builders for workspace setup, policy mode, approval responses, and automation trigger state. -- [ ] 1.2 Add deterministic loop-control helpers (bounded iterations/ticks and explicit termination hooks) for daemon/watcher tests. -- [ ] 1.3 Document fixture usage conventions in integration test helpers to keep new scenarios consistent. +- [x] 1.1 Add reusable integration fixture builders for workspace setup, policy mode, approval responses, and automation trigger state. +- [x] 1.2 Add deterministic loop-control helpers (bounded iterations/ticks and explicit termination hooks) for daemon/watcher tests. +- [x] 1.3 Document fixture usage conventions in integration test helpers to keep new scenarios consistent. ## 2. Command Flow Integration Coverage -- [ ] 2.1 Add/expand `chat` integration scenarios for success path assertions and expected output state. -- [ ] 2.2 Add/expand `plan` integration scenarios for policy-denied branch assertions and side-effect guards. -- [ ] 2.3 Add/expand `index` integration scenarios for controlled recoverable failure behavior and non-hanging exits. +- [x] 2.1 Add/expand `chat` integration scenarios for success path assertions and expected output state. +- [x] 2.2 Add/expand `plan` integration scenarios for policy-denied branch assertions and side-effect guards. +- [x] 2.3 Add/expand `index` integration scenarios for controlled recoverable failure behavior and non-hanging exits. ## 3. Automation Lifecycle Integration Coverage -- [ ] 3.1 Add integration scenarios that validate automation trigger-to-execution handoff and successful completion lifecycle state. -- [ ] 3.2 Add integration scenarios that validate automation runtime failure lifecycle reporting with observable error details. -- [ ] 3.3 Add integration scenarios that validate event-driven hook trigger-to-execution behavior and observable success/failure outcomes. +- [x] 3.1 Add integration scenarios that validate automation trigger-to-execution handoff and successful completion lifecycle state. +- [x] 3.2 Add integration scenarios that validate automation runtime failure lifecycle reporting with observable error details. +- [x] 3.3 Add integration scenarios that validate event-driven hook trigger-to-execution behavior and observable success/failure outcomes. ## 4. Policy and Approval Branch Coverage -- [ ] 4.1 Add approval-granted integration scenario(s) that verify branch continuation and expected actions. -- [ ] 4.2 Add approval-denied integration scenario(s) that verify boundary stop behavior and denied outcomes. -- [ ] 4.3 Add shared assertions to ensure denied/gated branches do not perform unauthorized side effects. +- [x] 4.1 Add approval-granted integration scenario(s) that verify branch continuation and expected actions. +- [x] 4.2 Add approval-denied integration scenario(s) that verify boundary stop behavior and denied outcomes. +- [x] 4.3 Add shared assertions to ensure denied/gated branches do not perform unauthorized side effects. ## 5. Daemon/Watcher Loop Lifecycle Coverage -- [ ] 5.1 Add daemon loop integration scenarios for startup, bounded work processing, and clean exit. -- [ ] 5.2 Add watcher loop integration scenarios for fatal error termination and deadlock-free shutdown. -- [ ] 5.3 Add timeout/cleanup safeguards in loop-focused tests to prevent CI hangs. +- [x] 5.1 Add daemon loop integration scenarios for startup, bounded work processing, and clean exit. +- [x] 5.2 Add watcher loop integration scenarios for fatal error termination and deadlock-free shutdown. +- [x] 5.3 Add timeout/cleanup safeguards in loop-focused tests to prevent CI hangs. ## 6. Verification and Stability -- [ ] 6.1 Run targeted integration suites for command, automation, hooks, policy, and loop domains and fix flakiness. -- [ ] 6.2 Run full test/type/lint verification (`pnpm test`, `pnpm typecheck`, `pnpm lint`) and resolve regressions. -- [ ] 6.3 Capture an integration coverage matrix summary in test comments/docs to clarify maintained branch expectations. +- [x] 6.1 Run targeted integration suites for command, automation, hooks, policy, and loop domains and fix flakiness. +- [x] 6.2 Run full test/type/lint verification (`pnpm test`, `pnpm typecheck`, `pnpm lint`) and resolve regressions. +- [x] 6.3 Capture an integration coverage matrix summary in test comments/docs to clarify maintained branch expectations. diff --git a/openspec/specs/broader-integration-test-coverage/spec.md b/openspec/specs/broader-integration-test-coverage/spec.md new file mode 100644 index 0000000..2568f37 --- /dev/null +++ b/openspec/specs/broader-integration-test-coverage/spec.md @@ -0,0 +1,56 @@ +# broader-integration-test-coverage Specification + +## Purpose +Define required integration coverage breadth for core command flows, automation lifecycle behavior, policy and approval branches, and daemon/watcher loop lifecycles so realistic end-to-end behavior remains continuously validated. +## Requirements +### Requirement: Command Flow Integration Coverage +The test suite MUST include integration scenarios that exercise `chat`, `plan`, and `index` command flows through their real orchestration paths, including success and failure branches. + +#### Scenario: Chat command success path +- **WHEN** the integration suite executes `chat` with valid inputs and allowed policy +- **THEN** the command flow SHALL complete successfully and emit the expected terminal/output state + +#### Scenario: Plan command policy-denied path +- **WHEN** the integration suite executes `plan` in a policy configuration that denies the action +- **THEN** the command flow SHALL terminate with a policy-denied result and no unauthorized side effects + +#### Scenario: Index command recoverable failure path +- **WHEN** the integration suite executes `index` and encounters a controlled recoverable error in processing +- **THEN** the command flow SHALL surface the failure state according to runtime policy and exit without hanging + +### Requirement: Automation Lifecycle Integration Coverage +The test suite MUST include integration scenarios for automation lifecycle behavior, including schedule trigger, event-driven hook trigger, execution handoff, and completion/failure signaling. + +#### Scenario: Scheduled automation execution +- **WHEN** an automation trigger condition is satisfied in integration runtime +- **THEN** the automation SHALL start, execute, and report completion in the expected lifecycle state + +#### Scenario: Event-driven hook execution +- **WHEN** a configured hook event is emitted in integration runtime +- **THEN** the hook pipeline SHALL trigger the configured action and report expected lifecycle state and outcome + +#### Scenario: Automation execution failure reporting +- **WHEN** automation execution encounters a runtime failure +- **THEN** the automation SHALL report a failure lifecycle state with observable error details + +### Requirement: Policy and Approval Branch Coverage +The test suite MUST verify integration behavior across policy and approval branches, including approved, denied, and gated flows. + +#### Scenario: Approval granted branch +- **WHEN** a command requiring approval receives an approval response +- **THEN** the integration flow SHALL continue through the approved branch and complete expected actions + +#### Scenario: Approval denied branch +- **WHEN** a command requiring approval receives a denial response +- **THEN** the integration flow SHALL stop at the approval boundary and emit a denied outcome + +### Requirement: Daemon and Watcher Loop Lifecycle Coverage +The test suite MUST verify daemon and watcher loop lifecycle behavior, including startup, bounded processing, and controlled termination. + +#### Scenario: Daemon loop bounded processing +- **WHEN** the daemon loop is started in integration tests with bounded iteration controls +- **THEN** it SHALL process expected work units and exit cleanly at the configured bound + +#### Scenario: Watcher termination on fatal loop error +- **WHEN** the watcher loop encounters a configured fatal error condition +- **THEN** it SHALL terminate the loop and expose the fatal error outcome without deadlock diff --git a/tests/integration/README.md b/tests/integration/README.md new file mode 100644 index 0000000..522254e --- /dev/null +++ b/tests/integration/README.md @@ -0,0 +1,17 @@ +# Integration Coverage Matrix + +This matrix tracks maintained branch expectations for high-risk orchestration paths. + +| Domain | Branch | Test file | +| --- | --- | --- | +| `chat` | Success output state | `tests/integration/commands.integration.test.ts` | +| `plan` | Policy-denied branch + side-effect guard | `tests/integration/commands.integration.test.ts` | +| `index` | Recoverable failure, non-hanging exit | `tests/integration/commands.integration.test.ts` | +| Automation scheduler | Trigger handoff + completion lifecycle | `tests/integration/automation-hooks.integration.test.ts` | +| Automation runtime | Failure lifecycle + error visibility | `tests/integration/automation-hooks.integration.test.ts` | +| Event hooks | Success/failure hook outcomes | `tests/integration/automation-hooks.integration.test.ts` | +| Approval policy | Granted branch continuation | `tests/integration/policy-approval.integration.test.ts` | +| Approval policy | Denied boundary stop | `tests/integration/policy-approval.integration.test.ts` | +| Daemon loop | Startup + bounded processing + clean exit | `tests/integration/loops.integration.test.ts` | +| Watcher loop | Fatal termination + deadlock-free shutdown | `tests/integration/loops.integration.test.ts` | +| Loop cleanup | Timeout safeguards | `tests/integration/loops.integration.test.ts` | diff --git a/tests/integration/automation-hooks.integration.test.ts b/tests/integration/automation-hooks.integration.test.ts new file mode 100644 index 0000000..6765a6f --- /dev/null +++ b/tests/integration/automation-hooks.integration.test.ts @@ -0,0 +1,153 @@ +import { describe, expect, it, vi } from 'vitest'; + +const cronMocks = vi.hoisted(() => ({ + schedule: vi.fn(), + jobs: [] as Array<() => Promise | void>, +})); + +vi.mock('node-cron', () => ({ + default: { + schedule: cronMocks.schedule, + }, +})); + +const execMocks = vi.hoisted(() => ({ + executeCommand: vi.fn(), +})); + +vi.mock('../../src/tools/exec-command', () => ({ + executeCommand: execMocks.executeCommand, +})); + +import { EventHookRunner } from '../../src/automation/event-hooks'; +import { AutomationRunner } from '../../src/automation/runner'; +import { AutomationScheduler } from '../../src/automation/scheduler'; +import type { AutomationSpec } from '../../src/automation/schemas'; +import { createAutomationTriggerState } from './helpers'; + +describe('integration: automation lifecycle and hook handoff', () => { + it('scheduled automation trigger hands off to runner and reaches completed state', async () => { + cronMocks.jobs = []; + cronMocks.schedule.mockImplementation((_cron: string, task: () => Promise) => { + cronMocks.jobs.push(task); + return { + stop: vi.fn(), + }; + }); + + const runTurn = vi.fn(async () => ({ assistantResponse: { message: 'done' } })); + const scheduler = new AutomationScheduler(); + const runner = new AutomationRunner({ runTurn } as never); + const spec: AutomationSpec = { + id: 'job-1', + name: 'nightly', + enabled: true, + trigger: { type: 'schedule', cron: '* * * * *' }, + prompt: 'summarize health', + workspace: '/tmp/repo', + writePolicy: 'read-only', + retries: 1, + }; + + const state = createAutomationTriggerState({ spec }); + + scheduler.schedule(spec, async (currentSpec) => { + state.status = 'running'; + await runner.run(currentSpec); + state.status = 'completed'; + state.finishedAt = new Date().toISOString(); + }); + + await cronMocks.jobs[0](); + + expect(runTurn).toHaveBeenCalledWith( + expect.objectContaining({ + userMessage: 'summarize health', + mode: 'automation', + }) + ); + expect(state.status).toBe('completed'); + expect(state.finishedAt).not.toBeNull(); + }); + + it('automation runtime failure reports failed lifecycle state with error details', async () => { + cronMocks.jobs = []; + cronMocks.schedule.mockImplementation((_cron: string, task: () => Promise) => { + cronMocks.jobs.push(task); + return { + stop: vi.fn(), + }; + }); + + const runner = new AutomationRunner({ + runTurn: vi.fn(async () => { + throw new Error('runner crashed'); + }), + } as never); + const scheduler = new AutomationScheduler(); + const spec: AutomationSpec = { + id: 'job-2', + name: 'failure-path', + enabled: true, + trigger: { type: 'schedule', cron: '* * * * *' }, + prompt: 'force fail', + workspace: '/tmp/repo', + writePolicy: 'read-only', + retries: 1, + }; + + const state = createAutomationTriggerState({ spec }); + + scheduler.schedule(spec, async (currentSpec) => { + state.status = 'running'; + try { + await runner.run(currentSpec); + state.status = 'completed'; + } catch (error) { + state.status = 'failed'; + state.error = error instanceof Error ? error.message : String(error); + } finally { + state.finishedAt = new Date().toISOString(); + } + }); + + await cronMocks.jobs[0](); + + expect(state.status).toBe('failed'); + expect(state.error).toContain('runner crashed'); + expect(state.finishedAt).not.toBeNull(); + }); + + it('event-driven hooks emit observable success and failure outcomes', async () => { + const hookRunner = new EventHookRunner(); + const outcomes: string[] = []; + + hookRunner.register({ + id: 'hook-success', + eventName: 'file-change', + command: 'echo ok', + enabled: true, + timeoutMs: 100, + }); + hookRunner.register({ + id: 'hook-failure', + eventName: 'file-change', + command: 'exit 1', + enabled: true, + timeoutMs: 100, + }); + + execMocks.executeCommand + .mockResolvedValueOnce({ summary: 'Command succeeded', ok: true }) + .mockResolvedValueOnce({ summary: 'Command failed', ok: false, stderr: 'boom' }); + + hookRunner.on('hook-result', ({ hook, result }) => { + outcomes.push(`${hook.id}:${result.summary}`); + }); + + await hookRunner.trigger('file-change', { cwd: '/tmp/repo' }); + + expect(outcomes).toContain('hook-success:Command succeeded'); + expect(outcomes).toContain('hook-failure:Command failed'); + }); +}); diff --git a/tests/integration/commands.integration.test.ts b/tests/integration/commands.integration.test.ts new file mode 100644 index 0000000..c7580b2 --- /dev/null +++ b/tests/integration/commands.integration.test.ts @@ -0,0 +1,106 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const mocks = vi.hoisted(() => ({ + detectProvider: vi.fn(() => 'google'), + getProviderPreflightError: vi.fn(() => null as string | null), + createRuntime: vi.fn(), + runFullIndex: vi.fn(), +})); + +vi.mock('../../src/providers', () => ({ + detectProvider: mocks.detectProvider, + getProviderPreflightError: mocks.getProviderPreflightError, +})); + +vi.mock('../../src/cli/runtime', () => ({ + createRuntime: mocks.createRuntime, +})); + +vi.mock('../../src/context/indexer/full-index', () => ({ + runFullIndex: mocks.runFullIndex, +})); + +import { runChatCommand } from '../../src/cli/commands/chat'; +import { runIndexCommand } from '../../src/cli/commands/index'; +import { runPlanCommand } from '../../src/cli/commands/plan'; + +describe('integration: command orchestration flows', () => { + let logSpy: ReturnType; + + beforeEach(() => { + vi.clearAllMocks(); + logSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + }); + + afterEach(() => { + logSpy.mockRestore(); + }); + + it('chat one-shot success path emits expected output state', async () => { + const write = vi.fn(async () => undefined); + const runTurn = vi.fn(async () => ({ + turnType: 'assistant', + intent: { goal: 'answer user', confidence: 1 }, + contextRequest: { query: '', maxItems: 5 }, + toolPlan: [], + approvalRequest: null, + assistantResponse: { message: 'integration-chat-ok' }, + termination: { shouldEnd: false }, + meta: { validationAttempts: 1, repaired: false }, + })); + + mocks.createRuntime.mockResolvedValue({ + orchestrator: { runTurn }, + transcripts: { write }, + }); + + await runChatCommand('hello'); + + expect(runTurn).toHaveBeenCalledWith({ + userMessage: 'hello', + sessionId: 'oneshot-session', + mode: 'interactive', + }); + expect(write).toHaveBeenCalledTimes(1); + expect(logSpy).toHaveBeenCalledWith('integration-chat-ok'); + }); + + it('plan policy-denied path surfaces denial and avoids output side effects', async () => { + const deniedError = new Error('Policy denied tool invocation: blocked pattern'); + const runTurn = vi.fn(async () => { + throw deniedError; + }); + + mocks.createRuntime.mockResolvedValue({ + orchestrator: { runTurn }, + }); + + await expect(runPlanCommand('delete production data')).rejects.toThrow('Policy denied tool'); + expect(logSpy).not.toHaveBeenCalled(); + }); + + it('index recoverable failure path exits quickly without hanging', async () => { + mocks.createRuntime.mockResolvedValue({ + db: { query: vi.fn() }, + provider: { embed: vi.fn() }, + }); + mocks.runFullIndex.mockRejectedValueOnce(new Error('temporary index error')); + + let timeoutId: ReturnType | null = null; + const timeoutPromise = new Promise<'timeout'>((resolve) => { + timeoutId = setTimeout(() => resolve('timeout'), 200); + }); + const result = await Promise.race([ + runIndexCommand('/tmp/project') + .then(() => 'resolved') + .catch(() => 'rejected'), + timeoutPromise, + ]); + if (timeoutId) { + clearTimeout(timeoutId); + } + + expect(result).not.toBe('timeout'); + expect(result).toBe('rejected'); + }); +}); diff --git a/tests/integration/helpers.ts b/tests/integration/helpers.ts new file mode 100644 index 0000000..1e86a51 --- /dev/null +++ b/tests/integration/helpers.ts @@ -0,0 +1,123 @@ +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { expect } from 'vitest'; +import type { AutomationSpec } from '../../src/automation/schemas'; +import { createDefaultApprovalPolicy } from '../../src/policy/defaults'; +import { DefaultPolicyEngine } from '../../src/policy/engine'; +import type { ApprovalPolicy } from '../../src/policy/schemas'; + +/** + * Integration fixture conventions: + * - Use unique temporary workspaces per test to prevent cross-test side effects. + * - Prefer policy/approval builders over inline object literals for readability. + * - Use bounded loop controllers instead of wall-clock sleeps for deterministic loop tests. + */ +export async function createWorkspaceFixture( + input: { prefix?: string; files?: Record; agentsMd?: string } = {} +): Promise<{ path: string; cleanup: () => Promise }> { + const workspace = await mkdtemp(join(tmpdir(), input.prefix ?? 'dubsbot-integration-')); + const files = input.files ?? {}; + const agentsMd = input.agentsMd ?? '# AGENTS\n\n## Commands\n- test: pnpm test\n'; + + await writeFile(join(workspace, 'AGENTS.md'), agentsMd, 'utf8'); + for (const [relativePath, content] of Object.entries(files)) { + await writeFile(join(workspace, relativePath), content, 'utf8'); + } + + return { + path: workspace, + cleanup: async () => { + await rm(workspace, { recursive: true, force: true }); + }, + }; +} + +export function createPolicyFixture(overrides: Partial = {}): DefaultPolicyEngine { + return new DefaultPolicyEngine(createDefaultApprovalPolicy(overrides)); +} + +export function createApprovalResponses(input: { approvals?: string[]; denies?: string[] } = {}): { + approvals: Set; + denies: Set; +} { + return { + approvals: new Set(input.approvals ?? ['/approve', 'approve', 'yes', 'y']), + denies: new Set(input.denies ?? ['/deny', 'deny', 'no', 'n']), + }; +} + +export function createAutomationTriggerState(input: { + spec: AutomationSpec; + eventName?: string; + startedAt?: string; +}): { + spec: AutomationSpec; + eventName: string; + status: 'pending' | 'running' | 'completed' | 'failed'; + startedAt: string; + finishedAt: string | null; + error: string | null; +} { + return { + spec: input.spec, + eventName: input.eventName ?? `${input.spec.trigger.type}:triggered`, + status: 'pending', + startedAt: input.startedAt ?? new Date().toISOString(), + finishedAt: null, + error: null, + }; +} + +export function createLoopController(input: { + maxIterations: number; + onTerminate?: (reason: 'max_iterations' | 'fatal_error' | 'manual_stop') => void; +}): { + tick: () => boolean; + fail: () => void; + stop: () => void; + iterations: () => number; + isTerminated: () => boolean; + terminationReason: () => 'max_iterations' | 'fatal_error' | 'manual_stop' | null; +} { + let count = 0; + let terminated = false; + let reason: 'max_iterations' | 'fatal_error' | 'manual_stop' | null = null; + + const terminate = (next: 'max_iterations' | 'fatal_error' | 'manual_stop') => { + if (terminated) { + return; + } + terminated = true; + reason = next; + input.onTerminate?.(next); + }; + + return { + tick: () => { + if (terminated) { + return false; + } + count += 1; + if (count >= input.maxIterations) { + terminate('max_iterations'); + return false; + } + return true; + }, + fail: () => { + terminate('fatal_error'); + }, + stop: () => { + terminate('manual_stop'); + }, + iterations: () => count, + isTerminated: () => terminated, + terminationReason: () => reason, + }; +} + +export async function expectNoUnauthorizedSideEffect(path: string): Promise { + const { access } = await import('node:fs/promises'); + await expect(access(path)).rejects.toThrow(); +} diff --git a/tests/integration/loops.integration.test.ts b/tests/integration/loops.integration.test.ts new file mode 100644 index 0000000..3a7bb25 --- /dev/null +++ b/tests/integration/loops.integration.test.ts @@ -0,0 +1,139 @@ +import { describe, expect, it, vi } from 'vitest'; +import { createLoopController } from './helpers'; + +type WorkUnit = { id: string; shouldFail?: boolean }; + +async function runDaemonLoop(input: { + work: WorkUnit[]; + controller: ReturnType; + onProcess: (unit: WorkUnit) => Promise; +}): Promise<{ processed: string[]; termination: string | null }> { + const processed: string[] = []; + for (const unit of input.work) { + if (!input.controller.tick()) { + break; + } + await input.onProcess(unit); + processed.push(unit.id); + } + + if (!input.controller.isTerminated()) { + input.controller.stop(); + } + + return { + processed, + termination: input.controller.terminationReason(), + }; +} + +async function runWatcherLoop(input: { + work: WorkUnit[]; + controller: ReturnType; +}): Promise<{ processed: string[]; termination: string | null; error?: string }> { + const processed: string[] = []; + try { + for (const unit of input.work) { + if (!input.controller.tick()) { + break; + } + if (unit.shouldFail) { + throw new Error(`fatal watcher error on ${unit.id}`); + } + processed.push(unit.id); + } + } catch (error) { + input.controller.fail(); + return { + processed, + termination: input.controller.terminationReason(), + error: error instanceof Error ? error.message : String(error), + }; + } + + if (!input.controller.isTerminated()) { + input.controller.stop(); + } + + return { + processed, + termination: input.controller.terminationReason(), + }; +} + +describe('integration: daemon and watcher loop lifecycle', () => { + it('daemon loop starts, processes bounded work, and exits cleanly', async () => { + const onTerminate = vi.fn(); + const controller = createLoopController({ + maxIterations: 3, + onTerminate, + }); + + const result = await runDaemonLoop({ + work: [{ id: 'a' }, { id: 'b' }, { id: 'c' }, { id: 'd' }], + controller, + onProcess: async () => undefined, + }); + + expect(result.processed).toEqual(['a', 'b']); + expect(result.termination).toBe('max_iterations'); + expect(onTerminate).toHaveBeenCalledWith('max_iterations'); + }); + + it('watcher loop terminates on fatal error and avoids deadlock', async () => { + const onTerminate = vi.fn(); + const controller = createLoopController({ + maxIterations: 10, + onTerminate, + }); + + let timeoutId: ReturnType | null = null; + const timeoutPromise = new Promise<{ processed: string[]; termination: string; error: string }>( + (resolve) => { + timeoutId = setTimeout(() => { + resolve({ processed: [], termination: 'timeout', error: 'loop hung' }); + }, 200); + } + ); + + const result = await Promise.race([ + runWatcherLoop({ + work: [{ id: 'first' }, { id: 'second', shouldFail: true }, { id: 'third' }], + controller, + }), + timeoutPromise, + ]); + if (timeoutId) { + clearTimeout(timeoutId); + } + + expect(result.termination).not.toBe('timeout'); + expect(result.termination).toBe('fatal_error'); + expect(result.error).toContain('fatal watcher error'); + expect(onTerminate).toHaveBeenCalledWith('fatal_error'); + }); + + it('loop scenarios include timeout safeguards for cleanup', async () => { + const controller = createLoopController({ maxIterations: 2 }); + + let timeoutId: ReturnType | null = null; + const timeoutPromise = new Promise<{ processed: string[]; termination: string }>((resolve) => { + timeoutId = setTimeout(() => resolve({ processed: [], termination: 'timeout' }), 200); + }); + + const run = Promise.race([ + runDaemonLoop({ + work: [{ id: 'one' }, { id: 'two' }, { id: 'three' }], + controller, + onProcess: async () => undefined, + }), + timeoutPromise, + ]); + + const result = await run; + if (timeoutId) { + clearTimeout(timeoutId); + } + expect(result.termination).not.toBe('timeout'); + }); +}); diff --git a/tests/integration/policy-approval.integration.test.ts b/tests/integration/policy-approval.integration.test.ts new file mode 100644 index 0000000..0eca97d --- /dev/null +++ b/tests/integration/policy-approval.integration.test.ts @@ -0,0 +1,73 @@ +import { access } from 'node:fs/promises'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { ToolRegistry } from '../../src/tools/registry'; +import { createPolicyFixture, createWorkspaceFixture } from './helpers'; + +describe('integration: policy and approval branches', () => { + it('approval-granted branch continues and executes expected action', async () => { + const workspace = await createWorkspaceFixture(); + try { + const target = join(workspace.path, 'approved.txt'); + + const registry = new ToolRegistry({ + policyEngine: createPolicyFixture(), + defaultMode: 'interactive', + agentsConfig: { + commands: [{ name: 'approve-me', command: `sh -lc "echo approved > ${target}"` }], + hooks: [], + warnings: [], + }, + }); + + const result = await registry.invoke( + { + tool: 'agents:approve-me', + sideEffect: 'read', + params: {}, + }, + { + approvalGranted: true, + } + ); + + expect(result.ok).toBe(true); + expect(result.payload.policyOutcome).toMatchObject({ + requiresApproval: true, + sideEffect: 'write', + }); + await expect(access(target)).resolves.toBeUndefined(); + } finally { + await workspace.cleanup(); + } + }); + + it('approval-denied branch stops at boundary and does not create side effect', async () => { + const workspace = await createWorkspaceFixture(); + try { + const target = join(workspace.path, 'denied.txt'); + + const registry = new ToolRegistry({ + policyEngine: createPolicyFixture(), + defaultMode: 'interactive', + agentsConfig: { + commands: [{ name: 'deny-me', command: `sh -lc "echo denied > ${target}"` }], + hooks: [], + warnings: [], + }, + }); + + const result = await registry.invoke({ + tool: 'agents:deny-me', + sideEffect: 'read', + params: {}, + }); + + expect(result.ok).toBe(false); + expect(result.summary).toContain('Approval required'); + await expect(access(target)).rejects.toThrow(); + } finally { + await workspace.cleanup(); + } + }); +});