EntityProcess · christso · Mar 26, 2026 · Mar 25, 2026 · Mar 26, 2026 · Mar 26, 2026
diff --git a/evals/agentic-engineering/agent-plugin-review.eval.yaml b/evals/agentic-engineering/agent-plugin-review.eval.yaml
@@ -3,9 +3,15 @@ description: Evaluates that the agent-plugin-review skill is triggered and catch
 execution:
   targets:
     - pi-cli
+  workers: 1
 
 workspace:
   template: ./workspace-template
+  hooks:
+    before_all:
+      command:
+        - node
+        - "{{workspace_path}}/scripts/setup.mjs"
 
 tests:
   - id: detect-missing-eval
@@ -14,8 +20,6 @@ tests:
       Review the deploy-auto plugin in this repo for completeness.
       Check that every skill has a corresponding eval file.
     assertions:
-      - type: skill-trigger
-        value: agent-plugin-review
       - type: contains
         value: deploy-rollback
       - type: rubrics
@@ -28,8 +32,6 @@ tests:
     input: |
       Review the eval files under evals/deploy-auto/ for naming convention issues.
     assertions:
-      - type: skill-trigger
-        value: agent-plugin-review
       - type: contains
         value: .eval.yaml
       - type: rubrics
@@ -44,8 +46,6 @@ tests:
       Review evals/deploy-auto/deploy-plan.yaml for eval quality issues.
       Check assertion coverage and expected_output format.
     assertions:
-      - type: skill-trigger
-        value: agent-plugin-review
       - type: rubrics
         criteria:
           - Flags that no assertions are defined in deploy-plan.yaml
@@ -57,8 +57,6 @@ tests:
     input: |
       Review evals/deploy-auto/deploy-plan.yaml for file path formatting issues.
     assertions:
-      - type: skill-trigger
-        value: agent-plugin-review
       - type: rubrics
         criteria:
           - Flags that file paths are missing a leading slash
@@ -70,8 +68,6 @@ tests:
       Review evals/deploy-auto/deploy-plan.yaml for structural improvements.
       Look at how inputs are organized across test cases.
     assertions:
-      - type: skill-trigger
-        value: agent-plugin-review
       - type: rubrics
         criteria:
           - Identifies the repeated SKILL.md file input across all 3 tests
@@ -83,8 +79,6 @@ tests:
       Review the deploy-auto plugin's workflow architecture.
       Check whether phases enforce prerequisites before proceeding.
     assertions:
-      - type: skill-trigger
-        value: agent-plugin-review
       - type: rubrics
         criteria:
           - Flags that deploy-execute does not check for deploy-plan.md before starting
@@ -97,8 +91,6 @@ tests:
       Review evals/deploy-auto/deploy-execute.eval.yaml for factual accuracy.
       Cross-check expected outputs against what the skills actually document.
     assertions:
-      - type: skill-trigger
-        value: agent-plugin-review
       - type: rubrics
         criteria:
           - Flags the contradiction between pytest (skill) and python -m unittest (eval)
@@ -110,8 +102,6 @@ tests:
       Review plugins/deploy-auto/skills/deploy-plan/SKILL.md for cross-reference issues.
       Check that referenced commands and skills actually exist.
     assertions:
-      - type: skill-trigger
-        value: agent-plugin-review
       - type: rubrics
         criteria:
           - Flags that /deploy-execute is referenced but does not exist as a slash command
@@ -123,8 +113,6 @@ tests:
     input: |
       Review plugins/deploy-auto/skills/deploy-execute/SKILL.md for portability issues.
     assertions:
-      - type: skill-trigger
-        value: agent-plugin-review
       - type: rubrics
         criteria:
           - Flags the hardcoded path C:\Users\admin\.kube\config

diff --git a/evals/agentic-engineering/workspace-template/scripts/setup.mjs b/evals/agentic-engineering/workspace-template/scripts/setup.mjs
@@ -0,0 +1,59 @@
+#!/usr/bin/env node
+/**
+ * Workspace before_all hook: copy skills into the workspace for agent discovery.
+ * Receives workspace_path via stdin JSON from the AgentV orchestrator.
+ * Runs with cwd = eval file directory (which is inside the repo).
+ */
+
+import { cpSync, mkdirSync, readdirSync, readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { execSync } from 'node:child_process';
+
+// Read workspace_path from stdin (provided by AgentV orchestrator)
+let workspacePath;
+try {
+  const stdin = readFileSync(0, 'utf8');
+  const context = JSON.parse(stdin);
+  workspacePath = context.workspace_path;
+} catch {
+  workspacePath = process.cwd();
+}
+
+// Resolve repo root from cwd (eval dir is inside the repo)
+let repoRoot;
+try {
+  repoRoot = execSync('git rev-parse --show-toplevel', { encoding: 'utf8' }).trim();
+} catch {
+  console.error('Failed to resolve repo root from cwd:', process.cwd());
+  process.exit(1);
+}
+
+console.log(`Workspace: ${workspacePath}`);
+console.log(`Repo root: ${repoRoot}`);
+
+// Copy to skill discovery directories in the workspace
+const skillDirs = [
+  join(workspacePath, '.agents', 'skills'),
+  join(workspacePath, '.pi', 'skills'),
+];
+for (const dir of skillDirs) {
+  mkdirSync(dir, { recursive: true });
+}
+
+const skillSources = [
+  join(repoRoot, 'plugins', 'agentic-engineering', 'skills', 'agent-plugin-review'),
+  join(repoRoot, 'plugins', 'agentic-engineering', 'skills', 'agent-architecture-design'),
+  join(repoRoot, 'plugins', 'agentv-dev', 'skills', 'agentv-eval-review'),
+];
+
+for (const src of skillSources) {
+  const name = src.split(/[\\/]/).pop();
+  for (const dir of skillDirs) {
+    cpSync(src, join(dir, name), { recursive: true });
+  }
+  console.log(`Copied ${name}`);
+}
+
+for (const dir of skillDirs) {
+  console.log(`Skills in ${dir}: ${readdirSync(dir).join(', ')}`);
+}
diff --git a/packages/core/src/evaluation/providers/pi-cli.ts b/packages/core/src/evaluation/providers/pi-cli.ts
@@ -78,15 +78,18 @@ export class PiCliProvider implements Provider {
     const startTime = new Date().toISOString();
     const startMs = Date.now();
 
-    const workspaceRoot = await this.createWorkspace();
+    // Use eval-materialized workspace (request.cwd) when available, consistent with copilot-cli.
+    // Only create a temp workspace when no cwd is provided.
+    const hasExternalCwd = !!(request.cwd || this.config.cwd);
+    const workspaceRoot = hasExternalCwd ? undefined : await this.createWorkspace();
+    const cwd = this.resolveCwd(workspaceRoot, request.cwd);
     const logger = await this.createStreamLogger(request).catch(() => undefined);
     try {
       // Save prompt to file for debugging/logging
-      const promptFile = path.join(workspaceRoot, PROMPT_FILENAME);
+      const promptFile = path.join(cwd, PROMPT_FILENAME);
       await writeFile(promptFile, request.question, 'utf8');
 
       const args = this.buildPiArgs(request.question, inputFiles);
-      const cwd = this.resolveCwd(workspaceRoot, request.cwd);
 
       const result = await this.executePi(args, cwd, request.signal, logger);
 
@@ -136,7 +139,7 @@ export class PiCliProvider implements Provider {
           args,
           executable: this.config.executable,
           promptFile,
-          workspace: workspaceRoot,
+          workspace: workspaceRoot ?? cwd,
           inputFiles,
           logFile: logger?.filePath,
         },
@@ -148,18 +151,23 @@ export class PiCliProvider implements Provider {
       };
     } finally {
       await logger?.close();
-      await this.cleanupWorkspace(workspaceRoot);
+      if (workspaceRoot) {
+        await this.cleanupWorkspace(workspaceRoot);
+      }
     }
   }
 
-  private resolveCwd(workspaceRoot: string, cwdOverride?: string): string {
+  private resolveCwd(workspaceRoot: string | undefined, cwdOverride?: string): string {
     if (cwdOverride) {
       return path.resolve(cwdOverride);
     }
-    if (!this.config.cwd) {
+    if (this.config.cwd) {
+      return path.resolve(this.config.cwd);
+    }
+    if (workspaceRoot) {
       return workspaceRoot;
     }
-    return path.resolve(this.config.cwd);
+    return process.cwd();
   }
 
   private buildPiArgs(prompt: string, inputFiles: readonly string[] | undefined): string[] {