diff --git a/evals/agentic-engineering/workspace-template/scripts/setup.mjs b/evals/agentic-engineering/workspace-template/scripts/setup.mjs index bfeddcf6..afdd10da 100644 --- a/evals/agentic-engineering/workspace-template/scripts/setup.mjs +++ b/evals/agentic-engineering/workspace-template/scripts/setup.mjs @@ -5,9 +5,9 @@ * Runs with cwd = eval file directory (which is inside the repo). */ -import { cpSync, mkdirSync, readdirSync, readFileSync } from 'node:fs'; -import { join } from 'node:path'; import { execSync } from 'node:child_process'; +import { cpSync, mkdirSync, readFileSync, readdirSync } from 'node:fs'; +import { join } from 'node:path'; // Read workspace_path from stdin (provided by AgentV orchestrator) let workspacePath; @@ -32,10 +32,7 @@ console.log(`Workspace: ${workspacePath}`); console.log(`Repo root: ${repoRoot}`); // Copy to skill discovery directories in the workspace -const skillDirs = [ - join(workspacePath, '.agents', 'skills'), - join(workspacePath, '.pi', 'skills'), -]; +const skillDirs = [join(workspacePath, '.agents', 'skills'), join(workspacePath, '.pi', 'skills')]; for (const dir of skillDirs) { mkdirSync(dir, { recursive: true }); } diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts index e8004bbc..8c2fc239 100644 --- a/packages/core/src/evaluation/yaml-parser.ts +++ b/packages/core/src/evaluation/yaml-parser.ts @@ -513,9 +513,28 @@ export async function loadTestById( /** @deprecated Use `loadTestById` instead */ export const loadEvalCaseById = loadTestById; +/** + * Normalize a command value from YAML into a string array. + * Accepts a string (split on whitespace) or an array of strings. + */ +function parseCommandArray(source: unknown): string[] | undefined { + if (typeof source === 'string') { + const parts = source.trim().split(/\s+/); + return parts.length > 0 && parts[0] !== '' ? parts : undefined; + } + if (Array.isArray(source)) { + const arr = source.filter((s): s is string => typeof s === 'string'); + return arr.length > 0 ? arr : undefined; + } + return undefined; +} + /** * Parse a WorkspaceScriptConfig from raw YAML value. * Accepts both `command` (preferred) and `script` (deprecated alias). + * Command can be an array of strings or a single string (auto-split on whitespace). + * Note: string commands are split naively on whitespace. For arguments containing + * spaces, use the array form: command: ["node", "path with spaces/setup.mjs"] */ function parseWorkspaceScriptConfig( raw: unknown, @@ -527,10 +546,9 @@ function parseWorkspaceScriptConfig( if (obj.script !== undefined && obj.command === undefined) { logWarning("'script' is deprecated. Use 'command' instead."); } - const commandSource = obj.command ?? obj.script; - if (!Array.isArray(commandSource) || commandSource.length === 0) return undefined; - const commandArr = commandSource.filter((s): s is string => typeof s === 'string'); - if (commandArr.length === 0) return undefined; + + const command = parseCommandArray(obj.command ?? obj.script); + if (!command) return undefined; const timeoutMs = typeof obj.timeout_ms === 'number' ? obj.timeout_ms : undefined; let cwd = typeof obj.cwd === 'string' ? obj.cwd : undefined; @@ -540,7 +558,7 @@ function parseWorkspaceScriptConfig( cwd = path.resolve(evalFileDir, cwd); } - const config: WorkspaceScriptConfig = { command: commandArr }; + const config: WorkspaceScriptConfig = { command }; if (timeoutMs !== undefined) { return { ...config, timeout_ms: timeoutMs, ...(cwd !== undefined && { cwd }) }; } diff --git a/packages/core/test/evaluation/workspace-config-parsing.test.ts b/packages/core/test/evaluation/workspace-config-parsing.test.ts index 8c215ed2..5779b606 100644 --- a/packages/core/test/evaluation/workspace-config-parsing.test.ts +++ b/packages/core/test/evaluation/workspace-config-parsing.test.ts @@ -29,10 +29,10 @@ tests: workspace: hooks: before_all: - script: ["bun", "run", "setup.ts"] + command: ["bun", "run", "setup.ts"] timeout_ms: 120000 after_each: - script: ["bun", "run", "teardown.ts"] + command: ["bun", "run", "teardown.ts"] timeout_ms: 30000 `, ); @@ -81,7 +81,7 @@ tests: workspace: hooks: before_all: - script: ["bun", "run", "default-setup.ts"] + command: ["bun", "run", "default-setup.ts"] tests: - id: case-1 @@ -112,7 +112,7 @@ tests: workspace: hooks: before_all: - script: ["bun", "run", "default-setup.ts"] + command: ["bun", "run", "default-setup.ts"] tests: - id: case-override @@ -121,7 +121,7 @@ tests: workspace: hooks: before_all: - script: ["bun", "run", "custom-setup.ts"] + command: ["bun", "run", "custom-setup.ts"] - id: case-default input: "Do something else" criteria: "Should work" @@ -158,7 +158,7 @@ tests: workspace: hooks: before_all: - script: ["bun", "run", "setup.ts"] + command: ["bun", "run", "setup.ts"] cwd: ./scripts `, ); @@ -337,6 +337,31 @@ tests: await expect(loadTests(evalFile, testDir)).rejects.toThrow(/workspace\.pool has been removed/i); }); + it('should accept string command and auto-split on whitespace', async () => { + const evalFile = path.join(testDir, 'workspace-string-cmd.yaml'); + await writeFile( + evalFile, + ` +tests: + - id: test-string-cmd + input: "Do something" + criteria: "Should work" + workspace: + hooks: + before_all: + command: node scripts/setup.mjs + timeout_ms: 60000 +`, + ); + + const cases = await loadTests(evalFile, testDir); + expect(cases).toHaveLength(1); + expect(cases[0].workspace?.hooks?.before_all).toEqual({ + command: ['node', 'scripts/setup.mjs'], + timeout_ms: 60000, + }); + }); + it('should handle case with no workspace config', async () => { const evalFile = path.join(testDir, 'no-workspace.yaml'); await writeFile(