From 264075afd9061bad727892458c4866fb4b9559a8 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Fri, 27 Mar 2026 19:37:17 +1100
Subject: [PATCH] feat(bench): add AGENTV_CLI override for running from local
 source

---
 .../agentv-dev/skills/agentv-bench/SKILL.md   | 23 +++++++++-
 .../skills/agentv-bench/scripts/bench.py      | 42 ++++++++++++++++---
 .../agentv-bench/scripts/run_code_graders.py  | 42 ++++++++++++++++---
 .../skills/agentv-bench/scripts/run_tests.py  | 40 +++++++++++++++---
 4 files changed, 130 insertions(+), 17 deletions(-)
diff --git a/plugins/agentv-dev/skills/agentv-bench/SKILL.md b/plugins/agentv-dev/skills/agentv-bench/SKILL.md
index 4b742834..29811102 100644
--- a/plugins/agentv-dev/skills/agentv-bench/SKILL.md
+++ b/plugins/agentv-dev/skills/agentv-bench/SKILL.md
@@ -168,12 +168,31 @@ Put results in a workspace directory organized by iteration (`iteration-1/`, `it
 
 ### Choosing a run mode
 
-Read the mode from `.env` before doing anything:
+Read the mode and CLI override from `.env` before doing anything:
 
 ```bash
 grep AGENT_EVAL_MODE .env 2>/dev/null || echo "AGENT_EVAL_MODE=agent"
+grep AGENTV_CLI .env 2>/dev/null || echo "AGENTV_CLI=(not set, using global agentv)"
 ```
 
+**`AGENTV_CLI` override:** If `AGENTV_CLI` is set in `.env`, use that value as the command prefix in place of `agentv` for every pipeline command. This lets you run from a local source checkout instead of the globally installed binary.
+
+```bash
+# Example .env:
+# AGENTV_CLI=bun D:\GitHub\christso\agentv\apps\cli\src\cli.ts
+
+# With AGENTV_CLI set, replace 'agentv' with its value:
+# PowerShell:
+$cli = (Get-Content .env | Select-String "^AGENTV_CLI=" | ForEach-Object { $_ -replace "^AGENTV_CLI=","" })
+if (-not $cli) { $cli = "agentv" }
+# Then: Invoke-Expression "$cli pipeline run ..."
+
+# Bash/zsh:
+cli=$(grep '^AGENTV_CLI=' .env 2>/dev/null | sed 's/^AGENTV_CLI=//' || echo "agentv")
+```
+
+The Python wrapper scripts (`scripts/run_tests.py`, etc.) pick up `AGENTV_CLI` automatically from `.env` — no extra steps needed when calling them.
+
 | `AGENT_EVAL_MODE` | Mode | How |
 |-------------------|------|-----|
 | `agent` (default) | **Agent mode** | Subagent-driven eval — parses eval.yaml, spawns executor + grader subagents. Zero CLI dependency. |
@@ -270,7 +289,7 @@ When `AGENT_EVAL_MODE=agent` (default), use the pipeline CLI subcommands (`pipel
 
 **Prerequisites:**
 - The eval.yaml file exists and contains valid test definitions
-- `agentv` CLI is installed (or run from source with `bun apps/cli/src/cli.ts`)
+- `agentv` CLI is installed (or run from source via `AGENTV_CLI=bun /path/to/cli.ts` in `.env`)
 - Read `references/eval-yaml-spec.md` for the full schema
 
 **Recommended: Single command for CLI targets**
diff --git a/plugins/agentv-dev/skills/agentv-bench/scripts/bench.py b/plugins/agentv-dev/skills/agentv-bench/scripts/bench.py
index 0197ced1..51c3d581 100644
--- a/plugins/agentv-dev/skills/agentv-bench/scripts/bench.py
+++ b/plugins/agentv-dev/skills/agentv-bench/scripts/bench.py
@@ -29,18 +29,50 @@
     <export-dir>/<test-id>/grading.json <- merged grading per test
 """
 import argparse
+import os
 import shutil
 import subprocess
 import sys
+from pathlib import Path
 
 
-def _find_agentv() -> str:
-    """Resolve the agentv executable via PATH (handles .ps1/.cmd on Windows)."""
+def _find_env_key(key: str) -> str | None:
+    """Search up from cwd for .env and return a specific key value."""
+    current = Path(os.getcwd())
+    while True:
+        env_file = current / ".env"
+        if env_file.exists():
+            for line in env_file.read_text().splitlines():
+                line = line.strip()
+                if line.startswith(f"{key}="):
+                    return line[len(key) + 1:]
+        parent = current.parent
+        if parent == current:
+            break
+        current = parent
+    return None
+
+
+def _find_agentv() -> list[str]:
+    """Resolve the agentv CLI command.
+
+    Checks AGENTV_CLI env var first (supports multi-word commands like
+    'bun /path/to/cli.ts' for running from source). If not in environment,
+    also searches the nearest .env file. Falls back to PATH lookup.
+    """
+    cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI")
+    if cli:
+        parts = cli.split()
+        if parts:
+            return parts
     path = shutil.which("agentv")
     if not path:
-        print("agentv CLI not found. Install: bun install -g agentv", file=sys.stderr)
+        print(
+            "agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv",
+            file=sys.stderr,
+        )
         sys.exit(1)
-    return path
+    return [path]
 
 
 def main():
@@ -52,7 +84,7 @@ def main():
 
     # Pass stdin through to agentv pipeline bench
     result = subprocess.run(
-        [_find_agentv(), "pipeline", "bench", args.export_dir],
+        [*_find_agentv(), "pipeline", "bench", args.export_dir],
         stdin=sys.stdin,
     )
     sys.exit(result.returncode)
diff --git a/plugins/agentv-dev/skills/agentv-bench/scripts/run_code_graders.py b/plugins/agentv-dev/skills/agentv-bench/scripts/run_code_graders.py
index b69dbf56..5bc8e397 100644
--- a/plugins/agentv-dev/skills/agentv-bench/scripts/run_code_graders.py
+++ b/plugins/agentv-dev/skills/agentv-bench/scripts/run_code_graders.py
@@ -19,18 +19,50 @@
     <export-dir>/<test-id>/code_grader_results/<name>.json
 """
 import argparse
+import os
 import shutil
 import subprocess
 import sys
+from pathlib import Path
 
 
-def _find_agentv() -> str:
-    """Resolve the agentv executable via PATH (handles .ps1/.cmd on Windows)."""
+def _find_env_key(key: str) -> str | None:
+    """Search up from cwd for .env and return a specific key value."""
+    current = Path(os.getcwd())
+    while True:
+        env_file = current / ".env"
+        if env_file.exists():
+            for line in env_file.read_text().splitlines():
+                line = line.strip()
+                if line.startswith(f"{key}="):
+                    return line[len(key) + 1:]
+        parent = current.parent
+        if parent == current:
+            break
+        current = parent
+    return None
+
+
+def _find_agentv() -> list[str]:
+    """Resolve the agentv CLI command.
+
+    Checks AGENTV_CLI env var first (supports multi-word commands like
+    'bun /path/to/cli.ts' for running from source). If not in environment,
+    also searches the nearest .env file. Falls back to PATH lookup.
+    """
+    cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI")
+    if cli:
+        parts = cli.split()
+        if parts:
+            return parts
     path = shutil.which("agentv")
     if not path:
-        print("agentv CLI not found. Install: bun install -g agentv", file=sys.stderr)
+        print(
+            "agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv",
+            file=sys.stderr,
+        )
         sys.exit(1)
-    return path
+    return [path]
 
 
 def main():
@@ -39,7 +71,7 @@ def main():
     args = parser.parse_args()
 
     result = subprocess.run(
-        [_find_agentv(), "pipeline", "grade", args.export_dir],
+        [*_find_agentv(), "pipeline", "grade", args.export_dir],
         capture_output=False,
     )
     sys.exit(result.returncode)
diff --git a/plugins/agentv-dev/skills/agentv-bench/scripts/run_tests.py b/plugins/agentv-dev/skills/agentv-bench/scripts/run_tests.py
index c8ce32f7..62313812 100644
--- a/plugins/agentv-dev/skills/agentv-bench/scripts/run_tests.py
+++ b/plugins/agentv-dev/skills/agentv-bench/scripts/run_tests.py
@@ -36,13 +36,43 @@
 from pathlib import Path
 
 
-def _find_agentv() -> str:
-    """Resolve the agentv executable via PATH (handles .ps1/.cmd on Windows)."""
+def _find_env_key(key: str) -> str | None:
+    """Search up from cwd for .env and return a specific key value."""
+    current = Path(os.getcwd())
+    while True:
+        env_file = current / ".env"
+        if env_file.exists():
+            for line in env_file.read_text().splitlines():
+                line = line.strip()
+                if line.startswith(f"{key}="):
+                    return line[len(key) + 1:]
+        parent = current.parent
+        if parent == current:
+            break
+        current = parent
+    return None
+
+
+def _find_agentv() -> list[str]:
+    """Resolve the agentv CLI command.
+
+    Checks AGENTV_CLI env var first (supports multi-word commands like
+    'bun /path/to/cli.ts' for running from source). If not in environment,
+    also searches the nearest .env file. Falls back to PATH lookup.
+    """
+    cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI")
+    if cli:
+        parts = cli.split()
+        if parts:
+            return parts
     path = shutil.which("agentv")
     if not path:
-        print("agentv CLI not found. Install: bun install -g agentv", file=sys.stderr)
+        print(
+            "agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv",
+            file=sys.stderr,
+        )
         sys.exit(1)
-    return path
+    return [path]
 
 
 def _load_env(env_file: Path) -> dict:
@@ -62,7 +92,7 @@ def _load_env(env_file: Path) -> dict:
 def run_agentv_input(eval_path: str, out_dir: str) -> dict:
     """Call agentv pipeline input and return the manifest."""
     result = subprocess.run(
-        [_find_agentv(), "pipeline", "input", eval_path, "--out", out_dir],
+        [*_find_agentv(), "pipeline", "input", eval_path, "--out", out_dir],
         capture_output=True,
         text=True,
     )