From 264075afd9061bad727892458c4866fb4b9559a8 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Fri, 27 Mar 2026 19:37:17 +1100 Subject: [PATCH] feat(bench): add AGENTV_CLI override for running from local source --- .../agentv-dev/skills/agentv-bench/SKILL.md | 23 +++++++++- .../skills/agentv-bench/scripts/bench.py | 42 ++++++++++++++++--- .../agentv-bench/scripts/run_code_graders.py | 42 ++++++++++++++++--- .../skills/agentv-bench/scripts/run_tests.py | 40 +++++++++++++++--- 4 files changed, 130 insertions(+), 17 deletions(-) diff --git a/plugins/agentv-dev/skills/agentv-bench/SKILL.md b/plugins/agentv-dev/skills/agentv-bench/SKILL.md index 4b742834..29811102 100644 --- a/plugins/agentv-dev/skills/agentv-bench/SKILL.md +++ b/plugins/agentv-dev/skills/agentv-bench/SKILL.md @@ -168,12 +168,31 @@ Put results in a workspace directory organized by iteration (`iteration-1/`, `it ### Choosing a run mode -Read the mode from `.env` before doing anything: +Read the mode and CLI override from `.env` before doing anything: ```bash grep AGENT_EVAL_MODE .env 2>/dev/null || echo "AGENT_EVAL_MODE=agent" +grep AGENTV_CLI .env 2>/dev/null || echo "AGENTV_CLI=(not set, using global agentv)" ``` +**`AGENTV_CLI` override:** If `AGENTV_CLI` is set in `.env`, use that value as the command prefix in place of `agentv` for every pipeline command. This lets you run from a local source checkout instead of the globally installed binary. + +```bash +# Example .env: +# AGENTV_CLI=bun D:\GitHub\christso\agentv\apps\cli\src\cli.ts + +# With AGENTV_CLI set, replace 'agentv' with its value: +# PowerShell: +$cli = (Get-Content .env | Select-String "^AGENTV_CLI=" | ForEach-Object { $_ -replace "^AGENTV_CLI=","" }) +if (-not $cli) { $cli = "agentv" } +# Then: Invoke-Expression "$cli pipeline run ..." + +# Bash/zsh: +cli=$(grep '^AGENTV_CLI=' .env 2>/dev/null | sed 's/^AGENTV_CLI=//' || echo "agentv") +``` + +The Python wrapper scripts (`scripts/run_tests.py`, etc.) pick up `AGENTV_CLI` automatically from `.env` — no extra steps needed when calling them. + | `AGENT_EVAL_MODE` | Mode | How | |-------------------|------|-----| | `agent` (default) | **Agent mode** | Subagent-driven eval — parses eval.yaml, spawns executor + grader subagents. Zero CLI dependency. | @@ -270,7 +289,7 @@ When `AGENT_EVAL_MODE=agent` (default), use the pipeline CLI subcommands (`pipel **Prerequisites:** - The eval.yaml file exists and contains valid test definitions -- `agentv` CLI is installed (or run from source with `bun apps/cli/src/cli.ts`) +- `agentv` CLI is installed (or run from source via `AGENTV_CLI=bun /path/to/cli.ts` in `.env`) - Read `references/eval-yaml-spec.md` for the full schema **Recommended: Single command for CLI targets** diff --git a/plugins/agentv-dev/skills/agentv-bench/scripts/bench.py b/plugins/agentv-dev/skills/agentv-bench/scripts/bench.py index 0197ced1..51c3d581 100644 --- a/plugins/agentv-dev/skills/agentv-bench/scripts/bench.py +++ b/plugins/agentv-dev/skills/agentv-bench/scripts/bench.py @@ -29,18 +29,50 @@ //grading.json <- merged grading per test """ import argparse +import os import shutil import subprocess import sys +from pathlib import Path -def _find_agentv() -> str: - """Resolve the agentv executable via PATH (handles .ps1/.cmd on Windows).""" +def _find_env_key(key: str) -> str | None: + """Search up from cwd for .env and return a specific key value.""" + current = Path(os.getcwd()) + while True: + env_file = current / ".env" + if env_file.exists(): + for line in env_file.read_text().splitlines(): + line = line.strip() + if line.startswith(f"{key}="): + return line[len(key) + 1:] + parent = current.parent + if parent == current: + break + current = parent + return None + + +def _find_agentv() -> list[str]: + """Resolve the agentv CLI command. + + Checks AGENTV_CLI env var first (supports multi-word commands like + 'bun /path/to/cli.ts' for running from source). If not in environment, + also searches the nearest .env file. Falls back to PATH lookup. + """ + cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI") + if cli: + parts = cli.split() + if parts: + return parts path = shutil.which("agentv") if not path: - print("agentv CLI not found. Install: bun install -g agentv", file=sys.stderr) + print( + "agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv", + file=sys.stderr, + ) sys.exit(1) - return path + return [path] def main(): @@ -52,7 +84,7 @@ def main(): # Pass stdin through to agentv pipeline bench result = subprocess.run( - [_find_agentv(), "pipeline", "bench", args.export_dir], + [*_find_agentv(), "pipeline", "bench", args.export_dir], stdin=sys.stdin, ) sys.exit(result.returncode) diff --git a/plugins/agentv-dev/skills/agentv-bench/scripts/run_code_graders.py b/plugins/agentv-dev/skills/agentv-bench/scripts/run_code_graders.py index b69dbf56..5bc8e397 100644 --- a/plugins/agentv-dev/skills/agentv-bench/scripts/run_code_graders.py +++ b/plugins/agentv-dev/skills/agentv-bench/scripts/run_code_graders.py @@ -19,18 +19,50 @@ //code_grader_results/.json """ import argparse +import os import shutil import subprocess import sys +from pathlib import Path -def _find_agentv() -> str: - """Resolve the agentv executable via PATH (handles .ps1/.cmd on Windows).""" +def _find_env_key(key: str) -> str | None: + """Search up from cwd for .env and return a specific key value.""" + current = Path(os.getcwd()) + while True: + env_file = current / ".env" + if env_file.exists(): + for line in env_file.read_text().splitlines(): + line = line.strip() + if line.startswith(f"{key}="): + return line[len(key) + 1:] + parent = current.parent + if parent == current: + break + current = parent + return None + + +def _find_agentv() -> list[str]: + """Resolve the agentv CLI command. + + Checks AGENTV_CLI env var first (supports multi-word commands like + 'bun /path/to/cli.ts' for running from source). If not in environment, + also searches the nearest .env file. Falls back to PATH lookup. + """ + cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI") + if cli: + parts = cli.split() + if parts: + return parts path = shutil.which("agentv") if not path: - print("agentv CLI not found. Install: bun install -g agentv", file=sys.stderr) + print( + "agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv", + file=sys.stderr, + ) sys.exit(1) - return path + return [path] def main(): @@ -39,7 +71,7 @@ def main(): args = parser.parse_args() result = subprocess.run( - [_find_agentv(), "pipeline", "grade", args.export_dir], + [*_find_agentv(), "pipeline", "grade", args.export_dir], capture_output=False, ) sys.exit(result.returncode) diff --git a/plugins/agentv-dev/skills/agentv-bench/scripts/run_tests.py b/plugins/agentv-dev/skills/agentv-bench/scripts/run_tests.py index c8ce32f7..62313812 100644 --- a/plugins/agentv-dev/skills/agentv-bench/scripts/run_tests.py +++ b/plugins/agentv-dev/skills/agentv-bench/scripts/run_tests.py @@ -36,13 +36,43 @@ from pathlib import Path -def _find_agentv() -> str: - """Resolve the agentv executable via PATH (handles .ps1/.cmd on Windows).""" +def _find_env_key(key: str) -> str | None: + """Search up from cwd for .env and return a specific key value.""" + current = Path(os.getcwd()) + while True: + env_file = current / ".env" + if env_file.exists(): + for line in env_file.read_text().splitlines(): + line = line.strip() + if line.startswith(f"{key}="): + return line[len(key) + 1:] + parent = current.parent + if parent == current: + break + current = parent + return None + + +def _find_agentv() -> list[str]: + """Resolve the agentv CLI command. + + Checks AGENTV_CLI env var first (supports multi-word commands like + 'bun /path/to/cli.ts' for running from source). If not in environment, + also searches the nearest .env file. Falls back to PATH lookup. + """ + cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI") + if cli: + parts = cli.split() + if parts: + return parts path = shutil.which("agentv") if not path: - print("agentv CLI not found. Install: bun install -g agentv", file=sys.stderr) + print( + "agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv", + file=sys.stderr, + ) sys.exit(1) - return path + return [path] def _load_env(env_file: Path) -> dict: @@ -62,7 +92,7 @@ def _load_env(env_file: Path) -> dict: def run_agentv_input(eval_path: str, out_dir: str) -> dict: """Call agentv pipeline input and return the manifest.""" result = subprocess.run( - [_find_agentv(), "pipeline", "input", eval_path, "--out", out_dir], + [*_find_agentv(), "pipeline", "input", eval_path, "--out", out_dir], capture_output=True, text=True, )