hashintel · kostandinang · Jun 30, 2026 · Jun 26, 2026 · Jun 29, 2026 · Jun 29, 2026
diff --git a/src/orchestrator/src/project-profile.test.ts b/src/orchestrator/src/project-profile.test.ts
@@ -42,6 +42,19 @@ describe('toolchain confinement probe', () => {
   });
 });
 
+describe('toolchain runner diagnostics', () => {
+  it('package-backed profiles name the runner packages they load', () => {
+    expect(brunchProfile.toolchain.diagnostics.runnerPackages).toContain('vitest');
+    expect(resolveToolchain('node-vitest').diagnostics.runnerPackages).toContain('vitest');
+    expect(resolveToolchain('node-jest').diagnostics.runnerPackages).toContain('jest');
+  });
+
+  it('built-in runner profiles do not invent node_modules package facts', () => {
+    expect(resolveToolchain('node-test').diagnostics.runnerPackages).toEqual([]);
+    expect(resolveToolchain('deno').diagnostics.runnerPackages).toEqual([]);
+  });
+});
+
 describe('toolchain test conventions are framework-specific', () => {
   it('bun conventions mention bun:test', () => {
     expect(bunProfile.toolchain.testConventions).toContain('bun:test');

diff --git a/src/orchestrator/src/project-profile.ts b/src/orchestrator/src/project-profile.ts
@@ -17,6 +17,8 @@ export interface Toolchain {
    * profile (e.g. `bun --version`).
    */
   probeCommand(): string[];
+  /** Profile-owned runner facts used to classify harness/toolchain failures. */
+  diagnostics: RunnerDiagnostics;
   /**
    * Agent-facing description of the test framework + scaffold/install
    * conventions, injected into the cook test-writer task so prompts carry no
@@ -26,6 +28,15 @@ export interface Toolchain {
   testConventions: string;
 }
 
+export interface RunnerDiagnostics {
+  /** Human-readable runner name for diagnostics and future reports. */
+  runnerName: string;
+  /** Package/artifact names whose load-denial means the runner failed, not product code. */
+  runnerPackages: readonly string[];
+  /** Runner-specific messages for target filters that match zero test files. */
+  noTestsPatterns: readonly RegExp[];
+}
+
 export interface ProjectProfile {
   id: ProfileId;
   toolchain: Toolchain;
@@ -41,6 +52,7 @@ interface ProfileData {
   epicTarget: string;
   testCommand: readonly string[];
   probeCommand: readonly string[];
+  diagnostics: RunnerDiagnostics;
   testConventions: string;
 }
 
@@ -50,6 +62,7 @@ const PROFILE_DATA: Record<ProfileId, ProfileData> = {
     epicTarget: 'tests/{id}.integration.test.ts',
     testCommand: ['bun', 'test', '{target}'],
     probeCommand: ['bun', '--version'],
+    diagnostics: { runnerName: 'bun', runnerPackages: [], noTestsPatterns: [/No tests found/i] },
     testConventions:
       'Use bun\'s test runner: `import { describe, expect, it } from "bun:test"`. The harness runs each target with `bun test <target>`.',
   },
@@ -59,6 +72,11 @@ const PROFILE_DATA: Record<ProfileId, ProfileData> = {
     epicTarget: '{id}.integration.test.ts',
     testCommand: ['npx', 'vitest', 'run', '{target}'],
     probeCommand: ['npx', 'vitest', '--version'],
+    diagnostics: {
+      runnerName: 'vitest',
+      runnerPackages: ['vitest', 'vite', 'typescript'],
+      noTestsPatterns: [/No test files found/i],
+    },
     testConventions:
       'Use vitest: `import { describe, expect, it } from "vitest"`. The harness runs each target with `vitest run <target>`.',
   },
@@ -67,6 +85,11 @@ const PROFILE_DATA: Record<ProfileId, ProfileData> = {
     epicTarget: 'tests/{id}.integration.test.ts',
     testCommand: ['npx', 'vitest', 'run', '{target}'],
     probeCommand: ['npx', 'vitest', '--version'],
+    diagnostics: {
+      runnerName: 'vitest',
+      runnerPackages: ['vitest', 'vite', 'typescript'],
+      noTestsPatterns: [/No test files found/i],
+    },
     testConventions:
       'Use vitest on Node: `import { describe, expect, it } from "vitest"`. Scaffold a package.json with `vitest` and `typescript` as devDependencies and run `npm install` before testing. The harness runs each target with `npx vitest run <target>`.',
   },
@@ -75,6 +98,7 @@ const PROFILE_DATA: Record<ProfileId, ProfileData> = {
     epicTarget: 'tests/{id}.integration.test.ts',
     testCommand: ['node', '--test', '{target}'],
     probeCommand: ['node', '--version'],
+    diagnostics: { runnerName: 'node:test', runnerPackages: [], noTestsPatterns: [/No tests found/i] },
     testConventions:
       "Use the built-in node:test runner: `import { test } from 'node:test'` and `import assert from 'node:assert/strict'`. Write TypeScript with erasable syntax only (no enums or namespaces) — Node strips types natively, so no install or build step is needed to run tests. The harness runs each target with `node --test <target>`.",
   },
@@ -83,6 +107,11 @@ const PROFILE_DATA: Record<ProfileId, ProfileData> = {
     epicTarget: 'tests/{id}.integration.test.ts',
     testCommand: ['npx', 'jest', '--runTestsByPath', '{target}'],
     probeCommand: ['npx', 'jest', '--version'],
+    diagnostics: {
+      runnerName: 'jest',
+      runnerPackages: ['jest', '@jest', 'ts-jest', 'typescript'],
+      noTestsPatterns: [/No tests found/i],
+    },
     testConventions:
       'Use jest with ts-jest: `import { describe, expect, it } from "@jest/globals"`. Scaffold a package.json with `jest`, `ts-jest`, and `typescript` as devDependencies plus a jest.config.js using the ts-jest preset, then run `npm install` before testing. The harness runs each target with `npx jest --runTestsByPath <target>`.',
   },
@@ -91,6 +120,7 @@ const PROFILE_DATA: Record<ProfileId, ProfileData> = {
     epicTarget: 'tests/{id}.integration.test.ts',
     testCommand: ['deno', 'test', '--allow-all', '{target}'],
     probeCommand: ['deno', '--version'],
+    diagnostics: { runnerName: 'deno', runnerPackages: [], noTestsPatterns: [/No test modules found/i] },
     testConventions:
       "Use Deno's built-in test runner: `Deno.test(...)` with assertions from `jsr:@std/assert`. No package.json or install step — imports resolve via jsr/npm specifiers. The harness runs each target with `deno test --allow-all <target>`.",
   },
@@ -104,6 +134,7 @@ function compileProfile(id: ProfileId, data: ProfileData): ProjectProfile {
       epicTarget: (epicId) => data.epicTarget.replaceAll('{id}', epicId),
       testCommand: (target) => data.testCommand.map((arg) => (arg === '{target}' ? target : arg)),
       probeCommand: () => [...data.probeCommand],
+      diagnostics: data.diagnostics,
       testConventions: data.testConventions,
     },
   };

diff --git a/src/orchestrator/src/test-runner.test.ts b/src/orchestrator/src/test-runner.test.ts
@@ -91,6 +91,7 @@ describe('ToolchainTestRunner honors the toolchain test command', () => {
       epicTarget: (id) => id,
       testCommand,
       probeCommand: () => ['node', '--version'],
+      diagnostics: { runnerName: 'fake', runnerPackages: [], noTestsPatterns: [/No test files found/i] },
       testConventions: 'fake',
     };
   }
@@ -132,33 +133,88 @@ describe('ToolchainTestRunner honors the toolchain test command', () => {
 });
 
 describe('classifyTestFailure (infra vs test)', () => {
+  const vitestDiagnostics = {
+    runnerName: 'vitest',
+    runnerPackages: ['vitest'],
+    noTestsPatterns: [/No test files found/i],
+  };
+
   it('a spawn failure (missing runner binary) is infra', () => {
-    expect(classifyTestFailure('', true)).toBe('infra');
+    expect(classifyTestFailure('', true, vitestDiagnostics)).toBe('infra');
   });
 
   it('a shell "command not found" is infra even with a normal exit', () => {
-    expect(classifyTestFailure('sh: 1: vitest: command not found', false)).toBe('infra');
-    expect(classifyTestFailure("'jest' is not recognized as an internal or external command", false)).toBe(
-      'infra',
-    );
+    expect(classifyTestFailure('sh: 1: vitest: command not found', false, vitestDiagnostics)).toBe('infra');
+    expect(
+      classifyTestFailure(
+        "'jest' is not recognized as an internal or external command",
+        false,
+        vitestDiagnostics,
+      ),
+    ).toBe('infra');
+  });
+
+  it('a profile-owned runner package EPERM is infra', () => {
+    const output = "Error: EPERM: operation not permitted, open '/sandbox/node_modules/vitest/vitest.mjs'";
+
+    expect(classifyTestFailure(output, false, vitestDiagnostics)).toBe('infra');
+  });
+
+  it('does not treat embedded error-code text as runner denial', () => {
+    const output =
+      "AssertionError: expected message 'NOT_EPERM_CASE' from /sandbox/node_modules/vitest/vitest.mjs";
+
+    expect(classifyTestFailure(output, false, vitestDiagnostics)).toBe('test');
+  });
+
+  it('does not combine assertion permission text with runner stack frames', () => {
+    const output = [
+      "AssertionError: expected 'permission denied' to equal 'allowed'",
+      '    at assertAllowed (/sandbox/src/auth.test.ts:3:1)',
+      '    at runTest (/sandbox/node_modules/vitest/dist/runner.js:10:1)',
+    ].join('\n');
+
+    expect(classifyTestFailure(output, false, vitestDiagnostics)).toBe('test');
+  });
+
+  it('a package EPERM outside the selected runner diagnostics stays a test failure', () => {
+    const output = "Error: EPERM: operation not permitted, open '/sandbox/node_modules/playwright/index.js'";
+
+    expect(classifyTestFailure(output, false, vitestDiagnostics)).toBe('test');
   });
 
   it('an assertion failure with no toolchain signal is a test failure', () => {
-    expect(classifyTestFailure('expect(received).toBe(expected)\n\n1 fail', false)).toBe('test');
+    expect(classifyTestFailure('expect(received).toBe(expected)\n\n1 fail', false, vitestDiagnostics)).toBe(
+      'test',
+    );
   });
 
   it('a missing *module* stays a test failure (ambiguous with TDD red), not infra', () => {
     // A red test importing source that does not exist yet must not be mislabeled
     // infra and skipped.
-    expect(classifyTestFailure("Cannot find module './widget' from 'widget.test.ts'", false)).toBe('test');
+    expect(
+      classifyTestFailure("Cannot find module './widget' from 'widget.test.ts'", false, vitestDiagnostics),
+    ).toBe('test');
   });
 
   it('"No test files found" is absent (nothing built yet), not a test red', () => {
     // The greenfield evaluate gate runs before the target test file exists, so
     // the runner matches zero files. That is "not started", not a failure — it
     // must not be conflated with a genuine assertion red (which keeps a slice's
     // attempt counter clean and avoids a phantom ✗ NEEDS WORK on the grid).
-    expect(classifyTestFailure('No test files found, exiting with code 1', false)).toBe('absent');
+    expect(classifyTestFailure('No test files found, exiting with code 1', false, vitestDiagnostics)).toBe(
+      'absent',
+    );
+  });
+
+  it('no-test wording is profile-owned', () => {
+    const customDiagnostics = {
+      runnerName: 'custom',
+      runnerPackages: [],
+      noTestsPatterns: [/Nothing matched/i],
+    };
+
+    expect(classifyTestFailure('Nothing matched target filter', false, customDiagnostics)).toBe('absent');
   });
 });
 
@@ -188,6 +244,7 @@ describe('ToolchainTestRunner stamps failureKind', () => {
       epicTarget: (id) => id,
       testCommand,
       probeCommand: () => ['node', '--version'],
+      diagnostics: { runnerName: 'fake', runnerPackages: [], noTestsPatterns: [/No test files found/i] },
       testConventions: 'fake',
     };
   }

diff --git a/src/orchestrator/src/test-runner.ts b/src/orchestrator/src/test-runner.ts
@@ -1,6 +1,6 @@
 import { spawnSync } from 'node:child_process';
 
-import { defaultToolchain, type Toolchain } from './project-profile.js';
+import { defaultToolchain, type RunnerDiagnostics, type Toolchain } from './project-profile.js';
 import type { ConfinedSpawn } from './sandbox-guard.js';
 import type {
   TestFailureKind,
@@ -18,12 +18,6 @@ const RUNNER_MISSING_PATTERNS: readonly RegExp[] = [
   /is not recognized as an internal or external command/i,
 ];
 
-// The runner matched zero test files. Vitest prints exactly this on an empty
-// filter; it means "nothing built yet", not a red. Kept narrow on purpose — a
-// missing *module* ("Cannot find module") is a genuine TDD red and must stay
-// `test`, never `absent`.
-const NO_TESTS_PATTERNS: readonly RegExp[] = [/No test files found/i];
-
 /**
  * FE-884 Slice B: the verify subprocess timeout. Sized well above a real test
  * run because the wait includes `npx`/runner resolution + framework warmup +
@@ -37,28 +31,67 @@ export const VERIFY_TIMEOUT_MS = 180_000;
 
 /**
  * FE-884 Slice B: a spawn error that means "the runner never delivered a
- * verdict" — the binary is missing (`ENOENT`) or the run was killed by the
- * timeout (`ETIMEDOUT`). Both are toolchain/infra faults, not a code assertion,
- * so they must not be routed to the (logic-fix) remediation agent. ENOBUFS and
- * other post-start errors stay `test` — output exists to classify.
+ * verdict" — the binary is missing, unavailable, or the run was killed by the
+ * timeout. These are toolchain/infra faults, not code assertions, so they must
+ * not be routed to the (logic-fix) remediation agent. ENOBUFS and other
+ * post-start errors stay `test` — output exists to classify.
  */
 export function isInfraSpawnError(error: unknown): boolean {
   const code = (error as NodeJS.ErrnoException | null)?.code;
-  return code === 'ENOENT' || code === 'ETIMEDOUT';
+  return code === 'ENOENT' || code === 'ETIMEDOUT' || code === 'EACCES' || code === 'EPERM';
+}
+
+function isIdentifierChar(ch: string | undefined): boolean {
+  if (!ch) return false;
+  const code = ch.codePointAt(0);
+  if (code === undefined) return false;
+  return (
+    (code >= 48 && code <= 57) || (code >= 65 && code <= 90) || (code >= 97 && code <= 122) || ch === '_'
+  );
+}
+
+function includesErrorCode(output: string, code: string): boolean {
+  let index = output.indexOf(code);
+  while (index !== -1) {
+    const before = output[index - 1];
+    const after = output[index + code.length];
+    if (!isIdentifierChar(before) && !isIdentifierChar(after)) return true;
+    index = output.indexOf(code, index + code.length);
+  }
+  return false;
+}
+
+function isRunnerPackageDenied(output: string, diagnostics: RunnerDiagnostics): boolean {
+  return output.split('\n').some((line) => {
+    const lowerLine = line.toLowerCase();
+    const denied =
+      includesErrorCode(line, 'EACCES') ||
+      includesErrorCode(line, 'EPERM') ||
+      lowerLine.includes('operation not permitted') ||
+      lowerLine.includes('permission denied');
+    if (!denied) return false;
+    const normalized = line.replaceAll('\\', '/');
+    return diagnostics.runnerPackages.some((pkg) => normalized.includes(`/node_modules/${pkg}/`));
+  });
 }
 
 /**
  * Classify a non-passing test run. Deliberately conservative ordering:
  *   1. `infra` — a spawn failure (missing binary) or shell "command not found";
- *      an unambiguous "the runner itself isn't there".
- *   2. `absent` — zero test files matched ("No test files found"); not started.
+ *      an unambiguous "the runner itself isn't there/can't load".
+ *   2. `absent` — zero test files matched; not started.
  *   3. `test` — everything else, because a missing *module* is ambiguous with a
  *      legitimate TDD red and misrouting a real failure would silently skip it.
  */
-export function classifyTestFailure(output: string, spawnFailed: boolean): TestFailureKind {
+export function classifyTestFailure(
+  output: string,
+  spawnFailed: boolean,
+  diagnostics: RunnerDiagnostics,
+): TestFailureKind {
   if (spawnFailed) return 'infra';
   if (RUNNER_MISSING_PATTERNS.some((re) => re.test(output))) return 'infra';
-  if (NO_TESTS_PATTERNS.some((re) => re.test(output))) return 'absent';
+  if (isRunnerPackageDenied(output, diagnostics)) return 'infra';
+  if (diagnostics.noTestsPatterns.some((re) => re.test(output))) return 'absent';
   return 'test';
 }
 
@@ -107,7 +140,11 @@ export class ToolchainTestRunner implements TestRunner {
     // the runner never delivered a verdict — an infra fault, not a code red
     // (FE-884 Slice B). Other post-start errors stay `test`.
     const runnerFailed = isInfraSpawnError(result.error);
-    return { passed, output, failureKind: classifyTestFailure(output, runnerFailed) };
+    return {
+      passed,
+      output,
+      failureKind: classifyTestFailure(output, runnerFailed, this.toolchain.diagnostics),
+    };
   }
 }