Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions plugins/agentv-dev/skills/agentv-bench/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,12 +168,31 @@ Put results in a workspace directory organized by iteration (`iteration-1/`, `it

### Choosing a run mode

Read the mode from `.env` before doing anything:
Read the mode and CLI override from `.env` before doing anything:

```bash
grep AGENT_EVAL_MODE .env 2>/dev/null || echo "AGENT_EVAL_MODE=agent"
grep AGENTV_CLI .env 2>/dev/null || echo "AGENTV_CLI=(not set, using global agentv)"
```

**`AGENTV_CLI` override:** If `AGENTV_CLI` is set in `.env`, use that value as the command prefix in place of `agentv` for every pipeline command. This lets you run from a local source checkout instead of the globally installed binary.

```bash
# Example .env:
# AGENTV_CLI=bun D:\GitHub\christso\agentv\apps\cli\src\cli.ts

# With AGENTV_CLI set, replace 'agentv' with its value:
# PowerShell:
$cli = (Get-Content .env | Select-String "^AGENTV_CLI=" | ForEach-Object { $_ -replace "^AGENTV_CLI=","" })
if (-not $cli) { $cli = "agentv" }
# Then: Invoke-Expression "$cli pipeline run ..."

# Bash/zsh:
cli=$(grep '^AGENTV_CLI=' .env 2>/dev/null | sed 's/^AGENTV_CLI=//' || echo "agentv")
```

The Python wrapper scripts (`scripts/run_tests.py`, etc.) pick up `AGENTV_CLI` automatically from `.env` — no extra steps needed when calling them.

| `AGENT_EVAL_MODE` | Mode | How |
|-------------------|------|-----|
| `agent` (default) | **Agent mode** | Subagent-driven eval — parses eval.yaml, spawns executor + grader subagents. Zero CLI dependency. |
Expand Down Expand Up @@ -270,7 +289,7 @@ When `AGENT_EVAL_MODE=agent` (default), use the pipeline CLI subcommands (`pipel

**Prerequisites:**
- The eval.yaml file exists and contains valid test definitions
- `agentv` CLI is installed (or run from source with `bun apps/cli/src/cli.ts`)
- `agentv` CLI is installed (or run from source via `AGENTV_CLI=bun /path/to/cli.ts` in `.env`)
- Read `references/eval-yaml-spec.md` for the full schema

**Recommended: Single command for CLI targets**
Expand Down
42 changes: 37 additions & 5 deletions plugins/agentv-dev/skills/agentv-bench/scripts/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,50 @@
<export-dir>/<test-id>/grading.json <- merged grading per test
"""
import argparse
import os
import shutil
import subprocess
import sys
from pathlib import Path


def _find_agentv() -> str:
"""Resolve the agentv executable via PATH (handles .ps1/.cmd on Windows)."""
def _find_env_key(key: str) -> str | None:
"""Search up from cwd for .env and return a specific key value."""
current = Path(os.getcwd())
while True:
env_file = current / ".env"
if env_file.exists():
for line in env_file.read_text().splitlines():
line = line.strip()
if line.startswith(f"{key}="):
return line[len(key) + 1:]
parent = current.parent
if parent == current:
break
current = parent
return None


def _find_agentv() -> list[str]:
"""Resolve the agentv CLI command.

Checks AGENTV_CLI env var first (supports multi-word commands like
'bun /path/to/cli.ts' for running from source). If not in environment,
also searches the nearest .env file. Falls back to PATH lookup.
"""
cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI")
if cli:
parts = cli.split()
if parts:
return parts
path = shutil.which("agentv")
if not path:
print("agentv CLI not found. Install: bun install -g agentv", file=sys.stderr)
print(
"agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv",
file=sys.stderr,
)
sys.exit(1)
return path
return [path]


def main():
Expand All @@ -52,7 +84,7 @@ def main():

# Pass stdin through to agentv pipeline bench
result = subprocess.run(
[_find_agentv(), "pipeline", "bench", args.export_dir],
[*_find_agentv(), "pipeline", "bench", args.export_dir],
stdin=sys.stdin,
)
sys.exit(result.returncode)
Expand Down
42 changes: 37 additions & 5 deletions plugins/agentv-dev/skills/agentv-bench/scripts/run_code_graders.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,50 @@
<export-dir>/<test-id>/code_grader_results/<name>.json
"""
import argparse
import os
import shutil
import subprocess
import sys
from pathlib import Path


def _find_agentv() -> str:
"""Resolve the agentv executable via PATH (handles .ps1/.cmd on Windows)."""
def _find_env_key(key: str) -> str | None:
"""Search up from cwd for .env and return a specific key value."""
current = Path(os.getcwd())
while True:
env_file = current / ".env"
if env_file.exists():
for line in env_file.read_text().splitlines():
line = line.strip()
if line.startswith(f"{key}="):
return line[len(key) + 1:]
parent = current.parent
if parent == current:
break
current = parent
return None


def _find_agentv() -> list[str]:
"""Resolve the agentv CLI command.

Checks AGENTV_CLI env var first (supports multi-word commands like
'bun /path/to/cli.ts' for running from source). If not in environment,
also searches the nearest .env file. Falls back to PATH lookup.
"""
cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI")
if cli:
parts = cli.split()
if parts:
return parts
path = shutil.which("agentv")
if not path:
print("agentv CLI not found. Install: bun install -g agentv", file=sys.stderr)
print(
"agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv",
file=sys.stderr,
)
sys.exit(1)
return path
return [path]


def main():
Expand All @@ -39,7 +71,7 @@ def main():
args = parser.parse_args()

result = subprocess.run(
[_find_agentv(), "pipeline", "grade", args.export_dir],
[*_find_agentv(), "pipeline", "grade", args.export_dir],
capture_output=False,
)
sys.exit(result.returncode)
Expand Down
40 changes: 35 additions & 5 deletions plugins/agentv-dev/skills/agentv-bench/scripts/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,43 @@
from pathlib import Path


def _find_agentv() -> str:
"""Resolve the agentv executable via PATH (handles .ps1/.cmd on Windows)."""
def _find_env_key(key: str) -> str | None:
"""Search up from cwd for .env and return a specific key value."""
current = Path(os.getcwd())
while True:
env_file = current / ".env"
if env_file.exists():
for line in env_file.read_text().splitlines():
line = line.strip()
if line.startswith(f"{key}="):
return line[len(key) + 1:]
parent = current.parent
if parent == current:
break
current = parent
return None


def _find_agentv() -> list[str]:
"""Resolve the agentv CLI command.

Checks AGENTV_CLI env var first (supports multi-word commands like
'bun /path/to/cli.ts' for running from source). If not in environment,
also searches the nearest .env file. Falls back to PATH lookup.
"""
cli = os.environ.get("AGENTV_CLI") or _find_env_key("AGENTV_CLI")
if cli:
parts = cli.split()
if parts:
return parts
path = shutil.which("agentv")
if not path:
print("agentv CLI not found. Install: bun install -g agentv", file=sys.stderr)
print(
"agentv CLI not found. Set AGENTV_CLI in .env or install: bun install -g agentv",
file=sys.stderr,
)
sys.exit(1)
return path
return [path]


def _load_env(env_file: Path) -> dict:
Expand All @@ -62,7 +92,7 @@ def _load_env(env_file: Path) -> dict:
def run_agentv_input(eval_path: str, out_dir: str) -> dict:
"""Call agentv pipeline input and return the manifest."""
result = subprocess.run(
[_find_agentv(), "pipeline", "input", eval_path, "--out", out_dir],
[*_find_agentv(), "pipeline", "input", eval_path, "--out", out_dir],
capture_output=True,
text=True,
)
Expand Down
Loading