Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/orchestrator/src/project-profile.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,19 @@ describe('toolchain confinement probe', () => {
});
});

describe('toolchain runner diagnostics', () => {
it('package-backed profiles name the runner packages they load', () => {
expect(brunchProfile.toolchain.diagnostics.runnerPackages).toContain('vitest');
expect(resolveToolchain('node-vitest').diagnostics.runnerPackages).toContain('vitest');
expect(resolveToolchain('node-jest').diagnostics.runnerPackages).toContain('jest');
});

it('built-in runner profiles do not invent node_modules package facts', () => {
expect(resolveToolchain('node-test').diagnostics.runnerPackages).toEqual([]);
expect(resolveToolchain('deno').diagnostics.runnerPackages).toEqual([]);
});
});

describe('toolchain test conventions are framework-specific', () => {
it('bun conventions mention bun:test', () => {
expect(bunProfile.toolchain.testConventions).toContain('bun:test');
Expand Down
31 changes: 31 additions & 0 deletions src/orchestrator/src/project-profile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ export interface Toolchain {
* profile (e.g. `bun --version`).
*/
probeCommand(): string[];
/** Profile-owned runner facts used to classify harness/toolchain failures. */
diagnostics: RunnerDiagnostics;
/**
* Agent-facing description of the test framework + scaffold/install
* conventions, injected into the cook test-writer task so prompts carry no
Expand All @@ -26,6 +28,15 @@ export interface Toolchain {
testConventions: string;
}

export interface RunnerDiagnostics {
/** Human-readable runner name for diagnostics and future reports. */
runnerName: string;
/** Package/artifact names whose load-denial means the runner failed, not product code. */
runnerPackages: readonly string[];
/** Runner-specific messages for target filters that match zero test files. */
noTestsPatterns: readonly RegExp[];
}

export interface ProjectProfile {
id: ProfileId;
toolchain: Toolchain;
Expand All @@ -41,6 +52,7 @@ interface ProfileData {
epicTarget: string;
testCommand: readonly string[];
probeCommand: readonly string[];
diagnostics: RunnerDiagnostics;
testConventions: string;
}

Expand All @@ -50,6 +62,7 @@ const PROFILE_DATA: Record<ProfileId, ProfileData> = {
epicTarget: 'tests/{id}.integration.test.ts',
testCommand: ['bun', 'test', '{target}'],
probeCommand: ['bun', '--version'],
diagnostics: { runnerName: 'bun', runnerPackages: [], noTestsPatterns: [/No tests found/i] },
testConventions:
'Use bun\'s test runner: `import { describe, expect, it } from "bun:test"`. The harness runs each target with `bun test <target>`.',
},
Expand All @@ -59,6 +72,11 @@ const PROFILE_DATA: Record<ProfileId, ProfileData> = {
epicTarget: '{id}.integration.test.ts',
testCommand: ['npx', 'vitest', 'run', '{target}'],
probeCommand: ['npx', 'vitest', '--version'],
diagnostics: {
runnerName: 'vitest',
runnerPackages: ['vitest', 'vite', 'typescript'],
noTestsPatterns: [/No test files found/i],
},
testConventions:
'Use vitest: `import { describe, expect, it } from "vitest"`. The harness runs each target with `vitest run <target>`.',
},
Expand All @@ -67,6 +85,11 @@ const PROFILE_DATA: Record<ProfileId, ProfileData> = {
epicTarget: 'tests/{id}.integration.test.ts',
testCommand: ['npx', 'vitest', 'run', '{target}'],
probeCommand: ['npx', 'vitest', '--version'],
diagnostics: {
runnerName: 'vitest',
runnerPackages: ['vitest', 'vite', 'typescript'],
noTestsPatterns: [/No test files found/i],
},
testConventions:
'Use vitest on Node: `import { describe, expect, it } from "vitest"`. Scaffold a package.json with `vitest` and `typescript` as devDependencies and run `npm install` before testing. The harness runs each target with `npx vitest run <target>`.',
},
Expand All @@ -75,6 +98,7 @@ const PROFILE_DATA: Record<ProfileId, ProfileData> = {
epicTarget: 'tests/{id}.integration.test.ts',
testCommand: ['node', '--test', '{target}'],
probeCommand: ['node', '--version'],
diagnostics: { runnerName: 'node:test', runnerPackages: [], noTestsPatterns: [/No tests found/i] },
testConventions:
"Use the built-in node:test runner: `import { test } from 'node:test'` and `import assert from 'node:assert/strict'`. Write TypeScript with erasable syntax only (no enums or namespaces) — Node strips types natively, so no install or build step is needed to run tests. The harness runs each target with `node --test <target>`.",
},
Expand All @@ -83,6 +107,11 @@ const PROFILE_DATA: Record<ProfileId, ProfileData> = {
epicTarget: 'tests/{id}.integration.test.ts',
testCommand: ['npx', 'jest', '--runTestsByPath', '{target}'],
probeCommand: ['npx', 'jest', '--version'],
diagnostics: {
runnerName: 'jest',
runnerPackages: ['jest', '@jest', 'ts-jest', 'typescript'],
noTestsPatterns: [/No tests found/i],
},
testConventions:
'Use jest with ts-jest: `import { describe, expect, it } from "@jest/globals"`. Scaffold a package.json with `jest`, `ts-jest`, and `typescript` as devDependencies plus a jest.config.js using the ts-jest preset, then run `npm install` before testing. The harness runs each target with `npx jest --runTestsByPath <target>`.',
},
Expand All @@ -91,6 +120,7 @@ const PROFILE_DATA: Record<ProfileId, ProfileData> = {
epicTarget: 'tests/{id}.integration.test.ts',
testCommand: ['deno', 'test', '--allow-all', '{target}'],
probeCommand: ['deno', '--version'],
diagnostics: { runnerName: 'deno', runnerPackages: [], noTestsPatterns: [/No test modules found/i] },
testConventions:
"Use Deno's built-in test runner: `Deno.test(...)` with assertions from `jsr:@std/assert`. No package.json or install step — imports resolve via jsr/npm specifiers. The harness runs each target with `deno test --allow-all <target>`.",
},
Expand All @@ -104,6 +134,7 @@ function compileProfile(id: ProfileId, data: ProfileData): ProjectProfile {
epicTarget: (epicId) => data.epicTarget.replaceAll('{id}', epicId),
testCommand: (target) => data.testCommand.map((arg) => (arg === '{target}' ? target : arg)),
probeCommand: () => [...data.probeCommand],
diagnostics: data.diagnostics,
testConventions: data.testConventions,
},
};
Expand Down
73 changes: 65 additions & 8 deletions src/orchestrator/src/test-runner.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ describe('ToolchainTestRunner honors the toolchain test command', () => {
epicTarget: (id) => id,
testCommand,
probeCommand: () => ['node', '--version'],
diagnostics: { runnerName: 'fake', runnerPackages: [], noTestsPatterns: [/No test files found/i] },
testConventions: 'fake',
};
}
Expand Down Expand Up @@ -132,33 +133,88 @@ describe('ToolchainTestRunner honors the toolchain test command', () => {
});

describe('classifyTestFailure (infra vs test)', () => {
const vitestDiagnostics = {
runnerName: 'vitest',
runnerPackages: ['vitest'],
noTestsPatterns: [/No test files found/i],
};

it('a spawn failure (missing runner binary) is infra', () => {
expect(classifyTestFailure('', true)).toBe('infra');
expect(classifyTestFailure('', true, vitestDiagnostics)).toBe('infra');
});

it('a shell "command not found" is infra even with a normal exit', () => {
expect(classifyTestFailure('sh: 1: vitest: command not found', false)).toBe('infra');
expect(classifyTestFailure("'jest' is not recognized as an internal or external command", false)).toBe(
'infra',
);
expect(classifyTestFailure('sh: 1: vitest: command not found', false, vitestDiagnostics)).toBe('infra');
expect(
classifyTestFailure(
"'jest' is not recognized as an internal or external command",
false,
vitestDiagnostics,
),
).toBe('infra');
});

it('a profile-owned runner package EPERM is infra', () => {
const output = "Error: EPERM: operation not permitted, open '/sandbox/node_modules/vitest/vitest.mjs'";

expect(classifyTestFailure(output, false, vitestDiagnostics)).toBe('infra');
});

it('does not treat embedded error-code text as runner denial', () => {
const output =
"AssertionError: expected message 'NOT_EPERM_CASE' from /sandbox/node_modules/vitest/vitest.mjs";

expect(classifyTestFailure(output, false, vitestDiagnostics)).toBe('test');
});

it('does not combine assertion permission text with runner stack frames', () => {
const output = [
"AssertionError: expected 'permission denied' to equal 'allowed'",
' at assertAllowed (/sandbox/src/auth.test.ts:3:1)',
' at runTest (/sandbox/node_modules/vitest/dist/runner.js:10:1)',
].join('\n');

expect(classifyTestFailure(output, false, vitestDiagnostics)).toBe('test');
});

it('a package EPERM outside the selected runner diagnostics stays a test failure', () => {
const output = "Error: EPERM: operation not permitted, open '/sandbox/node_modules/playwright/index.js'";

expect(classifyTestFailure(output, false, vitestDiagnostics)).toBe('test');
});

it('an assertion failure with no toolchain signal is a test failure', () => {
expect(classifyTestFailure('expect(received).toBe(expected)\n\n1 fail', false)).toBe('test');
expect(classifyTestFailure('expect(received).toBe(expected)\n\n1 fail', false, vitestDiagnostics)).toBe(
'test',
);
});

it('a missing *module* stays a test failure (ambiguous with TDD red), not infra', () => {
// A red test importing source that does not exist yet must not be mislabeled
// infra and skipped.
expect(classifyTestFailure("Cannot find module './widget' from 'widget.test.ts'", false)).toBe('test');
expect(
classifyTestFailure("Cannot find module './widget' from 'widget.test.ts'", false, vitestDiagnostics),
).toBe('test');
});

it('"No test files found" is absent (nothing built yet), not a test red', () => {
// The greenfield evaluate gate runs before the target test file exists, so
// the runner matches zero files. That is "not started", not a failure — it
// must not be conflated with a genuine assertion red (which keeps a slice's
// attempt counter clean and avoids a phantom ✗ NEEDS WORK on the grid).
expect(classifyTestFailure('No test files found, exiting with code 1', false)).toBe('absent');
expect(classifyTestFailure('No test files found, exiting with code 1', false, vitestDiagnostics)).toBe(
'absent',
);
});

it('no-test wording is profile-owned', () => {
const customDiagnostics = {
runnerName: 'custom',
runnerPackages: [],
noTestsPatterns: [/Nothing matched/i],
};

expect(classifyTestFailure('Nothing matched target filter', false, customDiagnostics)).toBe('absent');
});
});

Expand Down Expand Up @@ -188,6 +244,7 @@ describe('ToolchainTestRunner stamps failureKind', () => {
epicTarget: (id) => id,
testCommand,
probeCommand: () => ['node', '--version'],
diagnostics: { runnerName: 'fake', runnerPackages: [], noTestsPatterns: [/No test files found/i] },
testConventions: 'fake',
};
}
Expand Down
71 changes: 54 additions & 17 deletions src/orchestrator/src/test-runner.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { spawnSync } from 'node:child_process';

import { defaultToolchain, type Toolchain } from './project-profile.js';
import { defaultToolchain, type RunnerDiagnostics, type Toolchain } from './project-profile.js';
import type { ConfinedSpawn } from './sandbox-guard.js';
import type {
TestFailureKind,
Expand All @@ -18,12 +18,6 @@ const RUNNER_MISSING_PATTERNS: readonly RegExp[] = [
/is not recognized as an internal or external command/i,
];

// The runner matched zero test files. Vitest prints exactly this on an empty
// filter; it means "nothing built yet", not a red. Kept narrow on purpose — a
// missing *module* ("Cannot find module") is a genuine TDD red and must stay
// `test`, never `absent`.
const NO_TESTS_PATTERNS: readonly RegExp[] = [/No test files found/i];

/**
* FE-884 Slice B: the verify subprocess timeout. Sized well above a real test
* run because the wait includes `npx`/runner resolution + framework warmup +
Expand All @@ -37,28 +31,67 @@ export const VERIFY_TIMEOUT_MS = 180_000;

/**
* FE-884 Slice B: a spawn error that means "the runner never delivered a
* verdict" — the binary is missing (`ENOENT`) or the run was killed by the
* timeout (`ETIMEDOUT`). Both are toolchain/infra faults, not a code assertion,
* so they must not be routed to the (logic-fix) remediation agent. ENOBUFS and
* other post-start errors stay `test` — output exists to classify.
* verdict" — the binary is missing, unavailable, or the run was killed by the
* timeout. These are toolchain/infra faults, not code assertions, so they must
* not be routed to the (logic-fix) remediation agent. ENOBUFS and other
* post-start errors stay `test` — output exists to classify.
*/
export function isInfraSpawnError(error: unknown): boolean {
const code = (error as NodeJS.ErrnoException | null)?.code;
return code === 'ENOENT' || code === 'ETIMEDOUT';
return code === 'ENOENT' || code === 'ETIMEDOUT' || code === 'EACCES' || code === 'EPERM';
}

function isIdentifierChar(ch: string | undefined): boolean {
if (!ch) return false;
const code = ch.codePointAt(0);
if (code === undefined) return false;
return (
(code >= 48 && code <= 57) || (code >= 65 && code <= 90) || (code >= 97 && code <= 122) || ch === '_'
);
}

function includesErrorCode(output: string, code: string): boolean {
let index = output.indexOf(code);
while (index !== -1) {
const before = output[index - 1];
const after = output[index + code.length];
if (!isIdentifierChar(before) && !isIdentifierChar(after)) return true;
index = output.indexOf(code, index + code.length);
}
return false;
}

function isRunnerPackageDenied(output: string, diagnostics: RunnerDiagnostics): boolean {
return output.split('\n').some((line) => {
const lowerLine = line.toLowerCase();
const denied =
includesErrorCode(line, 'EACCES') ||
includesErrorCode(line, 'EPERM') ||
lowerLine.includes('operation not permitted') ||
lowerLine.includes('permission denied');
if (!denied) return false;
const normalized = line.replaceAll('\\', '/');
return diagnostics.runnerPackages.some((pkg) => normalized.includes(`/node_modules/${pkg}/`));
});
}

/**
* Classify a non-passing test run. Deliberately conservative ordering:
* 1. `infra` — a spawn failure (missing binary) or shell "command not found";
* an unambiguous "the runner itself isn't there".
* 2. `absent` — zero test files matched ("No test files found"); not started.
* an unambiguous "the runner itself isn't there/can't load".
* 2. `absent` — zero test files matched; not started.
* 3. `test` — everything else, because a missing *module* is ambiguous with a
* legitimate TDD red and misrouting a real failure would silently skip it.
*/
export function classifyTestFailure(output: string, spawnFailed: boolean): TestFailureKind {
export function classifyTestFailure(
output: string,
spawnFailed: boolean,
diagnostics: RunnerDiagnostics,
): TestFailureKind {
if (spawnFailed) return 'infra';
if (RUNNER_MISSING_PATTERNS.some((re) => re.test(output))) return 'infra';
if (NO_TESTS_PATTERNS.some((re) => re.test(output))) return 'absent';
if (isRunnerPackageDenied(output, diagnostics)) return 'infra';
if (diagnostics.noTestsPatterns.some((re) => re.test(output))) return 'absent';
return 'test';
}

Expand Down Expand Up @@ -107,7 +140,11 @@ export class ToolchainTestRunner implements TestRunner {
// the runner never delivered a verdict — an infra fault, not a code red
// (FE-884 Slice B). Other post-start errors stay `test`.
const runnerFailed = isInfraSpawnError(result.error);
return { passed, output, failureKind: classifyTestFailure(output, runnerFailed) };
return {
passed,
output,
failureKind: classifyTestFailure(output, runnerFailed, this.toolchain.diagnostics),
};
}
}

Expand Down
Loading