diff --git a/.agentv/targets.yaml b/.agentv/targets.yaml index bc7b85d2..14e4b9ac 100644 --- a/.agentv/targets.yaml +++ b/.agentv/targets.yaml @@ -12,6 +12,7 @@ targets: - name: pi-cli provider: pi-cli + subprovider: openrouter grader_target: gemini-flash - name: pi-coding-agent diff --git a/evals/agentic-engineering/workspace-template/scripts/setup.mjs b/evals/agentic-engineering/workspace-template/scripts/setup.mjs index bfeddcf6..afdd10da 100644 --- a/evals/agentic-engineering/workspace-template/scripts/setup.mjs +++ b/evals/agentic-engineering/workspace-template/scripts/setup.mjs @@ -5,9 +5,9 @@ * Runs with cwd = eval file directory (which is inside the repo). */ -import { cpSync, mkdirSync, readdirSync, readFileSync } from 'node:fs'; -import { join } from 'node:path'; import { execSync } from 'node:child_process'; +import { cpSync, mkdirSync, readFileSync, readdirSync } from 'node:fs'; +import { join } from 'node:path'; // Read workspace_path from stdin (provided by AgentV orchestrator) let workspacePath; @@ -32,10 +32,7 @@ console.log(`Workspace: ${workspacePath}`); console.log(`Repo root: ${repoRoot}`); // Copy to skill discovery directories in the workspace -const skillDirs = [ - join(workspacePath, '.agents', 'skills'), - join(workspacePath, '.pi', 'skills'), -]; +const skillDirs = [join(workspacePath, '.agents', 'skills'), join(workspacePath, '.pi', 'skills')]; for (const dir of skillDirs) { mkdirSync(dir, { recursive: true }); } diff --git a/packages/core/src/evaluation/providers/pi-cli.ts b/packages/core/src/evaluation/providers/pi-cli.ts index c182f85a..7c0dc25e 100644 --- a/packages/core/src/evaluation/providers/pi-cli.ts +++ b/packages/core/src/evaluation/providers/pi-cli.ts @@ -259,6 +259,28 @@ export class PiCliProvider implements Provider { } } + // When a subprovider is explicitly configured, remove ambient env vars from + // other providers that pi-cli auto-detects (e.g., AZURE_OPENAI_* vars override + // --provider flags). This ensures the configured subprovider is actually used. + if (this.config.subprovider) { + const provider = this.config.subprovider.toLowerCase(); + const PROVIDER_ENV_PREFIXES: Record = { + openrouter: ['AZURE_OPENAI_', 'ANTHROPIC_API_KEY', 'GEMINI_API_KEY'], + anthropic: ['AZURE_OPENAI_', 'OPENROUTER_API_KEY', 'GEMINI_API_KEY'], + openai: ['AZURE_OPENAI_', 'OPENROUTER_API_KEY', 'GEMINI_API_KEY', 'ANTHROPIC_API_KEY'], + google: ['AZURE_OPENAI_', 'OPENROUTER_API_KEY', 'ANTHROPIC_API_KEY'], + gemini: ['AZURE_OPENAI_', 'OPENROUTER_API_KEY', 'ANTHROPIC_API_KEY'], + }; + const prefixesToRemove = PROVIDER_ENV_PREFIXES[provider]; + if (prefixesToRemove) { + for (const key of Object.keys(env)) { + if (prefixesToRemove.some((prefix) => key.startsWith(prefix))) { + delete env[key]; + } + } + } + } + return env; } diff --git a/plugins/agentic-engineering/skills/agent-plugin-review/SKILL.md b/plugins/agentic-engineering/skills/agent-plugin-review/SKILL.md index 9ba106df..b98a3f50 100644 --- a/plugins/agentic-engineering/skills/agent-plugin-review/SKILL.md +++ b/plugins/agentic-engineering/skills/agent-plugin-review/SKILL.md @@ -38,6 +38,11 @@ Report findings grouped by severity (error > warning > info). If the PR includes eval files, invoke `agentv-eval-review` for AgentV-specific eval quality checks. +Additionally, check each eval YAML for these structural patterns: + +- **File path format**: Every `type: file` input value MUST start with a leading `/` (workspace-root-relative). Paths like `plugins/foo/SKILL.md` are wrong — correct form is `/plugins/foo/SKILL.md`. Scan every `type: file` entry and flag any missing leading slash, showing the corrected path. +- **Repeated inputs**: If the same file input (same `type: file` + `value`) appears identically in every test case, recommend extracting it to the top-level `input` field. AgentV eval files support a top-level `input` section that applies to all tests, eliminating per-test duplication. + ### Step 3: Skill quality review (LLM judgment) For each SKILL.md, check against `references/skill-quality-checklist.md`: @@ -69,6 +74,11 @@ For plugins with multi-phase workflows, check against `references/workflow-check - Artifact self-correction with corrections log - Learning loop mechanism +**Hard gate detection recipe** — For each phase skill after the first: +1. Read the SKILL.md body +2. Check whether it verifies that the previous phase's output artifact exists before doing any work +3. If no such check exists, flag it as a missing hard gate. Recommend adding a gate at the top of the skill that checks for the prerequisite artifact (e.g., `deploy-plan.md`) and stops with a clear message telling the user which skill to run first if the artifact is missing + ### Step 5: Post review Post findings as inline PR comments at specific line numbers. Group by severity: diff --git a/plugins/agentic-engineering/skills/agent-plugin-review/references/workflow-checklist.md b/plugins/agentic-engineering/skills/agent-plugin-review/references/workflow-checklist.md index c5f3fa1f..7fed797b 100644 --- a/plugins/agentic-engineering/skills/agent-plugin-review/references/workflow-checklist.md +++ b/plugins/agentic-engineering/skills/agent-plugin-review/references/workflow-checklist.md @@ -28,10 +28,12 @@ From [Superpowers](https://github.com/obra/superpowers/) `` pattern: - [ ] Gates cannot be silently bypassed - [ ] Gate checks happen at the start of the skill, before any work +**How to detect missing hard gates:** For each phase skill beyond the first, read the SKILL.md and check whether it verifies the previous phase's output artifact exists (e.g., `deploy-plan.md`, `design.md`) before starting work. If a skill jumps straight into execution without checking prerequisites, it is missing a hard gate. + Example gate: ``` -HARD GATE: `hld-review.md` MUST exist in {output_dir}/. -If missing, inform the user: "Run the design-review skill first." STOP. +HARD GATE: `deploy-plan.md` MUST exist in {output_dir}/. +If missing, inform the user: "Run the deploy-plan skill first to generate a deployment plan." STOP. ``` ## Artifact Contracts