From be3c011b3ae661cfa13b7e59e412ffbbda5a30b2 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Fri, 26 Jun 2026 23:46:42 +0200 Subject: [PATCH 1/3] FE-1093: add profile-owned runner diagnostics --- src/orchestrator/src/project-profile.test.ts | 13 +++++ src/orchestrator/src/project-profile.ts | 31 +++++++++++ src/orchestrator/src/test-runner.test.ts | 56 +++++++++++++++++--- src/orchestrator/src/test-runner.ts | 43 +++++++++------ 4 files changed, 118 insertions(+), 25 deletions(-) diff --git a/src/orchestrator/src/project-profile.test.ts b/src/orchestrator/src/project-profile.test.ts index cc545d558..9f791ac69 100644 --- a/src/orchestrator/src/project-profile.test.ts +++ b/src/orchestrator/src/project-profile.test.ts @@ -42,6 +42,19 @@ describe('toolchain confinement probe', () => { }); }); +describe('toolchain runner diagnostics', () => { + it('package-backed profiles name the runner packages they load', () => { + expect(brunchProfile.toolchain.diagnostics.runnerPackages).toContain('vitest'); + expect(resolveToolchain('node-vitest').diagnostics.runnerPackages).toContain('vitest'); + expect(resolveToolchain('node-jest').diagnostics.runnerPackages).toContain('jest'); + }); + + it('built-in runner profiles do not invent node_modules package facts', () => { + expect(resolveToolchain('node-test').diagnostics.runnerPackages).toEqual([]); + expect(resolveToolchain('deno').diagnostics.runnerPackages).toEqual([]); + }); +}); + describe('toolchain test conventions are framework-specific', () => { it('bun conventions mention bun:test', () => { expect(bunProfile.toolchain.testConventions).toContain('bun:test'); diff --git a/src/orchestrator/src/project-profile.ts b/src/orchestrator/src/project-profile.ts index ffd20b993..c1350739f 100644 --- a/src/orchestrator/src/project-profile.ts +++ b/src/orchestrator/src/project-profile.ts @@ -17,6 +17,8 @@ export interface Toolchain { * profile (e.g. `bun --version`). */ probeCommand(): string[]; + /** Profile-owned runner facts used to classify harness/toolchain failures. */ + diagnostics: RunnerDiagnostics; /** * Agent-facing description of the test framework + scaffold/install * conventions, injected into the cook test-writer task so prompts carry no @@ -26,6 +28,15 @@ export interface Toolchain { testConventions: string; } +export interface RunnerDiagnostics { + /** Human-readable runner name for diagnostics and future reports. */ + runnerName: string; + /** Package/artifact names whose load-denial means the runner failed, not product code. */ + runnerPackages: readonly string[]; + /** Runner-specific messages for target filters that match zero test files. */ + noTestsPatterns: readonly RegExp[]; +} + export interface ProjectProfile { id: ProfileId; toolchain: Toolchain; @@ -41,6 +52,7 @@ interface ProfileData { epicTarget: string; testCommand: readonly string[]; probeCommand: readonly string[]; + diagnostics: RunnerDiagnostics; testConventions: string; } @@ -50,6 +62,7 @@ const PROFILE_DATA: Record = { epicTarget: 'tests/{id}.integration.test.ts', testCommand: ['bun', 'test', '{target}'], probeCommand: ['bun', '--version'], + diagnostics: { runnerName: 'bun', runnerPackages: [], noTestsPatterns: [/No tests found/i] }, testConventions: 'Use bun\'s test runner: `import { describe, expect, it } from "bun:test"`. The harness runs each target with `bun test `.', }, @@ -59,6 +72,11 @@ const PROFILE_DATA: Record = { epicTarget: '{id}.integration.test.ts', testCommand: ['npx', 'vitest', 'run', '{target}'], probeCommand: ['npx', 'vitest', '--version'], + diagnostics: { + runnerName: 'vitest', + runnerPackages: ['vitest', 'vite', 'typescript'], + noTestsPatterns: [/No test files found/i], + }, testConventions: 'Use vitest: `import { describe, expect, it } from "vitest"`. The harness runs each target with `vitest run `.', }, @@ -67,6 +85,11 @@ const PROFILE_DATA: Record = { epicTarget: 'tests/{id}.integration.test.ts', testCommand: ['npx', 'vitest', 'run', '{target}'], probeCommand: ['npx', 'vitest', '--version'], + diagnostics: { + runnerName: 'vitest', + runnerPackages: ['vitest', 'vite', 'typescript'], + noTestsPatterns: [/No test files found/i], + }, testConventions: 'Use vitest on Node: `import { describe, expect, it } from "vitest"`. Scaffold a package.json with `vitest` and `typescript` as devDependencies and run `npm install` before testing. The harness runs each target with `npx vitest run `.', }, @@ -75,6 +98,7 @@ const PROFILE_DATA: Record = { epicTarget: 'tests/{id}.integration.test.ts', testCommand: ['node', '--test', '{target}'], probeCommand: ['node', '--version'], + diagnostics: { runnerName: 'node:test', runnerPackages: [], noTestsPatterns: [/No tests found/i] }, testConventions: "Use the built-in node:test runner: `import { test } from 'node:test'` and `import assert from 'node:assert/strict'`. Write TypeScript with erasable syntax only (no enums or namespaces) — Node strips types natively, so no install or build step is needed to run tests. The harness runs each target with `node --test `.", }, @@ -83,6 +107,11 @@ const PROFILE_DATA: Record = { epicTarget: 'tests/{id}.integration.test.ts', testCommand: ['npx', 'jest', '--runTestsByPath', '{target}'], probeCommand: ['npx', 'jest', '--version'], + diagnostics: { + runnerName: 'jest', + runnerPackages: ['jest', '@jest', 'ts-jest', 'typescript'], + noTestsPatterns: [/No tests found/i], + }, testConventions: 'Use jest with ts-jest: `import { describe, expect, it } from "@jest/globals"`. Scaffold a package.json with `jest`, `ts-jest`, and `typescript` as devDependencies plus a jest.config.js using the ts-jest preset, then run `npm install` before testing. The harness runs each target with `npx jest --runTestsByPath `.', }, @@ -91,6 +120,7 @@ const PROFILE_DATA: Record = { epicTarget: 'tests/{id}.integration.test.ts', testCommand: ['deno', 'test', '--allow-all', '{target}'], probeCommand: ['deno', '--version'], + diagnostics: { runnerName: 'deno', runnerPackages: [], noTestsPatterns: [/No test modules found/i] }, testConventions: "Use Deno's built-in test runner: `Deno.test(...)` with assertions from `jsr:@std/assert`. No package.json or install step — imports resolve via jsr/npm specifiers. The harness runs each target with `deno test --allow-all `.", }, @@ -104,6 +134,7 @@ function compileProfile(id: ProfileId, data: ProfileData): ProjectProfile { epicTarget: (epicId) => data.epicTarget.replaceAll('{id}', epicId), testCommand: (target) => data.testCommand.map((arg) => (arg === '{target}' ? target : arg)), probeCommand: () => [...data.probeCommand], + diagnostics: data.diagnostics, testConventions: data.testConventions, }, }; diff --git a/src/orchestrator/src/test-runner.test.ts b/src/orchestrator/src/test-runner.test.ts index c68316f8c..e3131d791 100644 --- a/src/orchestrator/src/test-runner.test.ts +++ b/src/orchestrator/src/test-runner.test.ts @@ -91,6 +91,7 @@ describe('ToolchainTestRunner honors the toolchain test command', () => { epicTarget: (id) => id, testCommand, probeCommand: () => ['node', '--version'], + diagnostics: { runnerName: 'fake', runnerPackages: [], noTestsPatterns: [/No test files found/i] }, testConventions: 'fake', }; } @@ -132,25 +133,51 @@ describe('ToolchainTestRunner honors the toolchain test command', () => { }); describe('classifyTestFailure (infra vs test)', () => { + const vitestDiagnostics = { + runnerName: 'vitest', + runnerPackages: ['vitest'], + noTestsPatterns: [/No test files found/i], + }; + it('a spawn failure (missing runner binary) is infra', () => { - expect(classifyTestFailure('', true)).toBe('infra'); + expect(classifyTestFailure('', true, vitestDiagnostics)).toBe('infra'); }); it('a shell "command not found" is infra even with a normal exit', () => { - expect(classifyTestFailure('sh: 1: vitest: command not found', false)).toBe('infra'); - expect(classifyTestFailure("'jest' is not recognized as an internal or external command", false)).toBe( - 'infra', - ); + expect(classifyTestFailure('sh: 1: vitest: command not found', false, vitestDiagnostics)).toBe('infra'); + expect( + classifyTestFailure( + "'jest' is not recognized as an internal or external command", + false, + vitestDiagnostics, + ), + ).toBe('infra'); + }); + + it('a profile-owned runner package EPERM is infra', () => { + const output = "Error: EPERM: operation not permitted, open '/sandbox/node_modules/vitest/vitest.mjs'"; + + expect(classifyTestFailure(output, false, vitestDiagnostics)).toBe('infra'); + }); + + it('a package EPERM outside the selected runner diagnostics stays a test failure', () => { + const output = "Error: EPERM: operation not permitted, open '/sandbox/node_modules/playwright/index.js'"; + + expect(classifyTestFailure(output, false, vitestDiagnostics)).toBe('test'); }); it('an assertion failure with no toolchain signal is a test failure', () => { - expect(classifyTestFailure('expect(received).toBe(expected)\n\n1 fail', false)).toBe('test'); + expect(classifyTestFailure('expect(received).toBe(expected)\n\n1 fail', false, vitestDiagnostics)).toBe( + 'test', + ); }); it('a missing *module* stays a test failure (ambiguous with TDD red), not infra', () => { // A red test importing source that does not exist yet must not be mislabeled // infra and skipped. - expect(classifyTestFailure("Cannot find module './widget' from 'widget.test.ts'", false)).toBe('test'); + expect( + classifyTestFailure("Cannot find module './widget' from 'widget.test.ts'", false, vitestDiagnostics), + ).toBe('test'); }); it('"No test files found" is absent (nothing built yet), not a test red', () => { @@ -158,7 +185,19 @@ describe('classifyTestFailure (infra vs test)', () => { // the runner matches zero files. That is "not started", not a failure — it // must not be conflated with a genuine assertion red (which keeps a slice's // attempt counter clean and avoids a phantom ✗ NEEDS WORK on the grid). - expect(classifyTestFailure('No test files found, exiting with code 1', false)).toBe('absent'); + expect(classifyTestFailure('No test files found, exiting with code 1', false, vitestDiagnostics)).toBe( + 'absent', + ); + }); + + it('no-test wording is profile-owned', () => { + const customDiagnostics = { + runnerName: 'custom', + runnerPackages: [], + noTestsPatterns: [/Nothing matched/i], + }; + + expect(classifyTestFailure('Nothing matched target filter', false, customDiagnostics)).toBe('absent'); }); }); @@ -188,6 +227,7 @@ describe('ToolchainTestRunner stamps failureKind', () => { epicTarget: (id) => id, testCommand, probeCommand: () => ['node', '--version'], + diagnostics: { runnerName: 'fake', runnerPackages: [], noTestsPatterns: [/No test files found/i] }, testConventions: 'fake', }; } diff --git a/src/orchestrator/src/test-runner.ts b/src/orchestrator/src/test-runner.ts index a66e82a72..98e1d41c0 100644 --- a/src/orchestrator/src/test-runner.ts +++ b/src/orchestrator/src/test-runner.ts @@ -1,6 +1,6 @@ import { spawnSync } from 'node:child_process'; -import { defaultToolchain, type Toolchain } from './project-profile.js'; +import { defaultToolchain, type RunnerDiagnostics, type Toolchain } from './project-profile.js'; import type { ConfinedSpawn } from './sandbox-guard.js'; import type { TestFailureKind, @@ -18,12 +18,6 @@ const RUNNER_MISSING_PATTERNS: readonly RegExp[] = [ /is not recognized as an internal or external command/i, ]; -// The runner matched zero test files. Vitest prints exactly this on an empty -// filter; it means "nothing built yet", not a red. Kept narrow on purpose — a -// missing *module* ("Cannot find module") is a genuine TDD red and must stay -// `test`, never `absent`. -const NO_TESTS_PATTERNS: readonly RegExp[] = [/No test files found/i]; - /** * FE-884 Slice B: the verify subprocess timeout. Sized well above a real test * run because the wait includes `npx`/runner resolution + framework warmup + @@ -37,28 +31,39 @@ export const VERIFY_TIMEOUT_MS = 180_000; /** * FE-884 Slice B: a spawn error that means "the runner never delivered a - * verdict" — the binary is missing (`ENOENT`) or the run was killed by the - * timeout (`ETIMEDOUT`). Both are toolchain/infra faults, not a code assertion, - * so they must not be routed to the (logic-fix) remediation agent. ENOBUFS and - * other post-start errors stay `test` — output exists to classify. + * verdict" — the binary is missing, unavailable, or the run was killed by the + * timeout. These are toolchain/infra faults, not code assertions, so they must + * not be routed to the (logic-fix) remediation agent. ENOBUFS and other + * post-start errors stay `test` — output exists to classify. */ export function isInfraSpawnError(error: unknown): boolean { const code = (error as NodeJS.ErrnoException | null)?.code; - return code === 'ENOENT' || code === 'ETIMEDOUT'; + return code === 'ENOENT' || code === 'ETIMEDOUT' || code === 'EACCES' || code === 'EPERM'; +} + +function isRunnerPackageDenied(output: string, diagnostics: RunnerDiagnostics): boolean { + if (!/\b(?:EACCES|EPERM)\b|operation not permitted|permission denied/i.test(output)) return false; + const normalized = output.replaceAll('\\', '/'); + return diagnostics.runnerPackages.some((pkg) => normalized.includes(`/node_modules/${pkg}/`)); } /** * Classify a non-passing test run. Deliberately conservative ordering: * 1. `infra` — a spawn failure (missing binary) or shell "command not found"; - * an unambiguous "the runner itself isn't there". - * 2. `absent` — zero test files matched ("No test files found"); not started. + * an unambiguous "the runner itself isn't there/can't load". + * 2. `absent` — zero test files matched; not started. * 3. `test` — everything else, because a missing *module* is ambiguous with a * legitimate TDD red and misrouting a real failure would silently skip it. */ -export function classifyTestFailure(output: string, spawnFailed: boolean): TestFailureKind { +export function classifyTestFailure( + output: string, + spawnFailed: boolean, + diagnostics: RunnerDiagnostics, +): TestFailureKind { if (spawnFailed) return 'infra'; if (RUNNER_MISSING_PATTERNS.some((re) => re.test(output))) return 'infra'; - if (NO_TESTS_PATTERNS.some((re) => re.test(output))) return 'absent'; + if (isRunnerPackageDenied(output, diagnostics)) return 'infra'; + if (diagnostics.noTestsPatterns.some((re) => re.test(output))) return 'absent'; return 'test'; } @@ -107,7 +112,11 @@ export class ToolchainTestRunner implements TestRunner { // the runner never delivered a verdict — an infra fault, not a code red // (FE-884 Slice B). Other post-start errors stay `test`. const runnerFailed = isInfraSpawnError(result.error); - return { passed, output, failureKind: classifyTestFailure(output, runnerFailed) }; + return { + passed, + output, + failureKind: classifyTestFailure(output, runnerFailed, this.toolchain.diagnostics), + }; } } From 4145572297f047c61ba83a614a7df698c98f4061 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Mon, 29 Jun 2026 11:18:48 +0200 Subject: [PATCH 2/3] FE-1093: avoid regex denial scan for runner errors Co-authored-by: Cursor --- src/orchestrator/src/test-runner.test.ts | 7 ++++++ src/orchestrator/src/test-runner.ts | 28 +++++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/orchestrator/src/test-runner.test.ts b/src/orchestrator/src/test-runner.test.ts index e3131d791..70a94c9fd 100644 --- a/src/orchestrator/src/test-runner.test.ts +++ b/src/orchestrator/src/test-runner.test.ts @@ -160,6 +160,13 @@ describe('classifyTestFailure (infra vs test)', () => { expect(classifyTestFailure(output, false, vitestDiagnostics)).toBe('infra'); }); + it('does not treat embedded error-code text as runner denial', () => { + const output = + "AssertionError: expected message 'NOT_EPERM_CASE' from /sandbox/node_modules/vitest/vitest.mjs"; + + expect(classifyTestFailure(output, false, vitestDiagnostics)).toBe('test'); + }); + it('a package EPERM outside the selected runner diagnostics stays a test failure', () => { const output = "Error: EPERM: operation not permitted, open '/sandbox/node_modules/playwright/index.js'"; diff --git a/src/orchestrator/src/test-runner.ts b/src/orchestrator/src/test-runner.ts index 98e1d41c0..2973498a2 100644 --- a/src/orchestrator/src/test-runner.ts +++ b/src/orchestrator/src/test-runner.ts @@ -41,8 +41,34 @@ export function isInfraSpawnError(error: unknown): boolean { return code === 'ENOENT' || code === 'ETIMEDOUT' || code === 'EACCES' || code === 'EPERM'; } +function isIdentifierChar(ch: string | undefined): boolean { + if (!ch) return false; + const code = ch.codePointAt(0); + if (code === undefined) return false; + return ( + (code >= 48 && code <= 57) || (code >= 65 && code <= 90) || (code >= 97 && code <= 122) || ch === '_' + ); +} + +function includesErrorCode(output: string, code: string): boolean { + let index = output.indexOf(code); + while (index !== -1) { + const before = output[index - 1]; + const after = output[index + code.length]; + if (!isIdentifierChar(before) && !isIdentifierChar(after)) return true; + index = output.indexOf(code, index + code.length); + } + return false; +} + function isRunnerPackageDenied(output: string, diagnostics: RunnerDiagnostics): boolean { - if (!/\b(?:EACCES|EPERM)\b|operation not permitted|permission denied/i.test(output)) return false; + const lowerOutput = output.toLowerCase(); + const denied = + includesErrorCode(output, 'EACCES') || + includesErrorCode(output, 'EPERM') || + lowerOutput.includes('operation not permitted') || + lowerOutput.includes('permission denied'); + if (!denied) return false; const normalized = output.replaceAll('\\', '/'); return diagnostics.runnerPackages.some((pkg) => normalized.includes(`/node_modules/${pkg}/`)); } From 7511c725b4057e8ead5b19210e47d51e4347f67b Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Mon, 29 Jun 2026 11:31:48 +0200 Subject: [PATCH 3/3] FE-1093: keep runner denial signals line-local Co-authored-by: Cursor --- src/orchestrator/src/test-runner.test.ts | 10 ++++++++++ src/orchestrator/src/test-runner.ts | 20 +++++++++++--------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/orchestrator/src/test-runner.test.ts b/src/orchestrator/src/test-runner.test.ts index 70a94c9fd..1160fc6fc 100644 --- a/src/orchestrator/src/test-runner.test.ts +++ b/src/orchestrator/src/test-runner.test.ts @@ -167,6 +167,16 @@ describe('classifyTestFailure (infra vs test)', () => { expect(classifyTestFailure(output, false, vitestDiagnostics)).toBe('test'); }); + it('does not combine assertion permission text with runner stack frames', () => { + const output = [ + "AssertionError: expected 'permission denied' to equal 'allowed'", + ' at assertAllowed (/sandbox/src/auth.test.ts:3:1)', + ' at runTest (/sandbox/node_modules/vitest/dist/runner.js:10:1)', + ].join('\n'); + + expect(classifyTestFailure(output, false, vitestDiagnostics)).toBe('test'); + }); + it('a package EPERM outside the selected runner diagnostics stays a test failure', () => { const output = "Error: EPERM: operation not permitted, open '/sandbox/node_modules/playwright/index.js'"; diff --git a/src/orchestrator/src/test-runner.ts b/src/orchestrator/src/test-runner.ts index 2973498a2..f1cf59959 100644 --- a/src/orchestrator/src/test-runner.ts +++ b/src/orchestrator/src/test-runner.ts @@ -62,15 +62,17 @@ function includesErrorCode(output: string, code: string): boolean { } function isRunnerPackageDenied(output: string, diagnostics: RunnerDiagnostics): boolean { - const lowerOutput = output.toLowerCase(); - const denied = - includesErrorCode(output, 'EACCES') || - includesErrorCode(output, 'EPERM') || - lowerOutput.includes('operation not permitted') || - lowerOutput.includes('permission denied'); - if (!denied) return false; - const normalized = output.replaceAll('\\', '/'); - return diagnostics.runnerPackages.some((pkg) => normalized.includes(`/node_modules/${pkg}/`)); + return output.split('\n').some((line) => { + const lowerLine = line.toLowerCase(); + const denied = + includesErrorCode(line, 'EACCES') || + includesErrorCode(line, 'EPERM') || + lowerLine.includes('operation not permitted') || + lowerLine.includes('permission denied'); + if (!denied) return false; + const normalized = line.replaceAll('\\', '/'); + return diagnostics.runnerPackages.some((pkg) => normalized.includes(`/node_modules/${pkg}/`)); + }); } /**