From 7ac2411a67ddfbd3d94485f7b77a889c526ac4ec Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Mon, 30 Mar 2026 04:33:45 +0000
Subject: [PATCH 1/7] feat(ci): add marketplace, frontmatter, and eval
 validation to GitHub Actions

- Add composite setup-bun action with caching (mirrors opencode pattern)
- Add marketplace job: schema validation, sort check, sync check, frontmatter validation
- Add evals job: installs agentv globally, validates eval dirs have eval files, runs agentv validate with glob patterns
- Add glob pattern support to `agentv validate` command (uses fast-glob, already a dependency)
- Sort marketplace.json plugins alphabetically and sync .github/plugin/ copy

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .claude-plugin/marketplace.json               |  12 +-
 .github/actions/setup-bun/action.yml          |  41 ++++
 .github/plugin/marketplace.json               |  12 +-
 .github/workflows/validate.yml                |  35 +++
 .../src/commands/validate/validate-files.ts   |  49 ++--
 scripts/marketplace/check-sorted.ts           |  41 ++++
 scripts/marketplace/sync.ts                   |  17 ++
 scripts/marketplace/validate-frontmatter.ts   | 219 ++++++++++++++++++
 scripts/marketplace/validate-marketplace.ts   |  94 ++++++++
 scripts/validate-eval-dirs.ts                 |  74 ++++++
 10 files changed, 566 insertions(+), 28 deletions(-)
 create mode 100644 .github/actions/setup-bun/action.yml
 create mode 100644 scripts/marketplace/check-sorted.ts
 create mode 100644 scripts/marketplace/sync.ts
 create mode 100644 scripts/marketplace/validate-frontmatter.ts
 create mode 100644 scripts/marketplace/validate-marketplace.ts
 create mode 100644 scripts/validate-eval-dirs.ts

diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index 1d2e158cb..8283ecf40 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -8,9 +8,9 @@
   },
   "plugins": [
     {
-      "name": "agentv-dev",
-      "description": "Development skills for building and optimizing AgentV evaluations",
-      "source": "./plugins/agentv-dev"
+      "name": "agentic-engineering",
+      "description": "Design and review AI agent systems — architecture patterns, workflow design, and plugin quality review",
+      "source": "./plugins/agentic-engineering"
     },
     {
       "name": "agentv-claude-trace",
@@ -18,9 +18,9 @@
       "source": "./plugins/agentv-claude-trace"
     },
     {
-      "name": "agentic-engineering",
-      "description": "Design and review AI agent systems — architecture patterns, workflow design, and plugin quality review",
-      "source": "./plugins/agentic-engineering"
+      "name": "agentv-dev",
+      "description": "Development skills for building and optimizing AgentV evaluations",
+      "source": "./plugins/agentv-dev"
     }
   ]
 }
diff --git a/.github/actions/setup-bun/action.yml b/.github/actions/setup-bun/action.yml
new file mode 100644
index 000000000..51efd282a
--- /dev/null
+++ b/.github/actions/setup-bun/action.yml
@@ -0,0 +1,41 @@
+name: "Setup Bun"
+description: "Setup Bun with caching and install dependencies"
+runs:
+  using: "composite"
+  steps:
+    - name: Get baseline download URL
+      id: bun-url
+      shell: bash
+      run: |
+        if [ "$RUNNER_ARCH" = "X64" ]; then
+          V=$(node -p "require('./package.json').packageManager.split('@')[1]")
+          case "$RUNNER_OS" in
+            macOS)   OS=darwin ;;
+            Linux)   OS=linux ;;
+            Windows) OS=windows ;;
+          esac
+          echo "url=https://github.com/oven-sh/bun/releases/download/bun-v${V}/bun-${OS}-x64-baseline.zip" >> "$GITHUB_OUTPUT"
+        fi
+
+    - name: Setup Bun
+      uses: oven-sh/setup-bun@v2
+      with:
+        bun-version-file: ${{ !steps.bun-url.outputs.url && 'package.json' || '' }}
+        bun-download-url: ${{ steps.bun-url.outputs.url }}
+
+    - name: Get cache directory
+      id: cache
+      shell: bash
+      run: echo "dir=$(bun pm cache)" >> "$GITHUB_OUTPUT"
+
+    - name: Cache Bun dependencies
+      uses: actions/cache@v4
+      with:
+        path: ${{ steps.cache.outputs.dir }}
+        key: ${{ runner.os }}-bun-${{ hashFiles('**/bun.lock') }}
+        restore-keys: |
+          ${{ runner.os }}-bun-
+
+    - name: Install dependencies
+      run: bun install --frozen-lockfile
+      shell: bash
diff --git a/.github/plugin/marketplace.json b/.github/plugin/marketplace.json
index 1d2e158cb..8283ecf40 100644
--- a/.github/plugin/marketplace.json
+++ b/.github/plugin/marketplace.json
@@ -8,9 +8,9 @@
   },
   "plugins": [
     {
-      "name": "agentv-dev",
-      "description": "Development skills for building and optimizing AgentV evaluations",
-      "source": "./plugins/agentv-dev"
+      "name": "agentic-engineering",
+      "description": "Design and review AI agent systems — architecture patterns, workflow design, and plugin quality review",
+      "source": "./plugins/agentic-engineering"
     },
     {
       "name": "agentv-claude-trace",
@@ -18,9 +18,9 @@
       "source": "./plugins/agentv-claude-trace"
     },
     {
-      "name": "agentic-engineering",
-      "description": "Design and review AI agent systems — architecture patterns, workflow design, and plugin quality review",
-      "source": "./plugins/agentic-engineering"
+      "name": "agentv-dev",
+      "description": "Development skills for building and optimizing AgentV evaluations",
+      "source": "./plugins/agentv-dev"
     }
   ]
 }
diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml
index 815846321..15036576a 100644
--- a/.github/workflows/validate.yml
+++ b/.github/workflows/validate.yml
@@ -21,3 +21,38 @@ jobs:
             --glob-ignore-case
             --root-dir .
             "**/*.md"
+
+  marketplace:
+    name: Validate Marketplace
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/setup-bun
+
+      - name: Validate marketplace.json (schema + sync)
+        run: bun scripts/marketplace/validate-marketplace.ts
+
+      - name: Check marketplace sorted
+        run: bun scripts/marketplace/check-sorted.ts
+
+      - name: Validate frontmatter
+        run: bun scripts/marketplace/validate-frontmatter.ts
+
+  evals:
+    name: Validate Evals
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/setup-bun
+
+      - name: Build
+        run: bun run build
+
+      - name: Install agentv globally
+        run: bun install -g agentv
+
+      - name: Check evals directories have eval files
+        run: bun scripts/validate-eval-dirs.ts
+
+      - name: Validate eval schemas
+        run: agentv validate 'examples/features/**/evals/**/*.eval.yaml' 'examples/features/**/*.EVAL.yaml'
diff --git a/apps/cli/src/commands/validate/validate-files.ts b/apps/cli/src/commands/validate/validate-files.ts
index 91b569e34..b00c27b47 100644
--- a/apps/cli/src/commands/validate/validate-files.ts
+++ b/apps/cli/src/commands/validate/validate-files.ts
@@ -10,6 +10,7 @@ import {
   validateFileReferences,
   validateTargetsFile,
 } from '@agentv/core/evaluation/validation';
+import fg from 'fast-glob';
 
 /**
  * Validate YAML files for AgentV schema compliance.
@@ -67,34 +68,50 @@ async function validateSingleFile(filePath: string): Promise<ValidationResult> {
 }
 
 async function expandPaths(paths: readonly string[]): Promise<readonly string[]> {
-  const expanded: string[] = [];
+  const expanded = new Set<string>();
 
   for (const inputPath of paths) {
     const absolutePath = path.resolve(inputPath);
 
-    // Check if path exists
+    // Try as literal file or directory first
     try {
       await access(absolutePath, constants.F_OK);
+      const stats = await stat(absolutePath);
+
+      if (stats.isFile()) {
+        if (isYamlFile(absolutePath)) expanded.add(absolutePath);
+        continue;
+      }
+      if (stats.isDirectory()) {
+        const yamlFiles = await findYamlFiles(absolutePath);
+        for (const f of yamlFiles) expanded.add(f);
+        continue;
+      }
     } catch {
-      console.warn(`Warning: Path not found: ${inputPath}`);
-      continue;
+      // Not a literal path — fall through to glob matching
     }
 
-    const stats = await stat(absolutePath);
-
-    if (stats.isFile()) {
-      // Only include YAML files
-      if (isYamlFile(absolutePath)) {
-        expanded.push(absolutePath);
-      }
-    } else if (stats.isDirectory()) {
-      // Recursively find all YAML files in directory
-      const yamlFiles = await findYamlFiles(absolutePath);
-      expanded.push(...yamlFiles);
+    // Treat as glob pattern
+    const globPattern = inputPath.includes('\\') ? inputPath.replace(/\\/g, '/') : inputPath;
+    const matches = await fg(globPattern, {
+      cwd: process.cwd(),
+      absolute: true,
+      onlyFiles: true,
+      unique: true,
+      dot: false,
+      followSymbolicLinks: true,
+    });
+
+    const yamlMatches = matches.filter((f) => isYamlFile(f));
+    if (yamlMatches.length === 0) {
+      console.warn(`Warning: No YAML files matched pattern: ${inputPath}`);
     }
+    for (const f of yamlMatches) expanded.add(path.normalize(f));
   }
 
-  return expanded;
+  const sorted = Array.from(expanded);
+  sorted.sort();
+  return sorted;
 }
 
 async function findYamlFiles(dirPath: string): Promise<readonly string[]> {
diff --git a/scripts/marketplace/check-sorted.ts b/scripts/marketplace/check-sorted.ts
new file mode 100644
index 000000000..8192911a8
--- /dev/null
+++ b/scripts/marketplace/check-sorted.ts
@@ -0,0 +1,41 @@
+#!/usr/bin/env bun
+/**
+ * Checks that marketplace.json plugins are alphabetically sorted by name.
+ *
+ * Usage:
+ *   bun scripts/marketplace/check-sorted.ts           # check, exit 1 if unsorted
+ *   bun scripts/marketplace/check-sorted.ts --fix     # sort in place
+ */
+
+import { readFileSync, writeFileSync } from 'node:fs';
+import { resolve } from 'node:path';
+
+const root = resolve(import.meta.dirname, '../..');
+const MARKETPLACE = resolve(root, '.claude-plugin/marketplace.json');
+
+type Plugin = { name: string; [k: string]: unknown };
+type Marketplace = { plugins: Plugin[]; [k: string]: unknown };
+
+const raw = readFileSync(MARKETPLACE, 'utf8');
+const mp: Marketplace = JSON.parse(raw);
+
+const cmp = (a: Plugin, b: Plugin) => a.name.toLowerCase().localeCompare(b.name.toLowerCase());
+
+if (process.argv.includes('--fix')) {
+  mp.plugins.sort(cmp);
+  writeFileSync(MARKETPLACE, `${JSON.stringify(mp, null, 2)}\n`);
+  console.log(`Sorted ${mp.plugins.length} plugins`);
+  process.exit(0);
+}
+
+for (let i = 1; i < mp.plugins.length; i++) {
+  if (cmp(mp.plugins[i - 1], mp.plugins[i]) > 0) {
+    console.error(
+      `marketplace.json plugins are not sorted: '${mp.plugins[i - 1].name}' should come after '${mp.plugins[i].name}' (index ${i})`,
+    );
+    console.error('  run: bun scripts/marketplace/check-sorted.ts --fix');
+    process.exit(1);
+  }
+}
+
+console.log(`OK: ${mp.plugins.length} plugins sorted`);
diff --git a/scripts/marketplace/sync.ts b/scripts/marketplace/sync.ts
new file mode 100644
index 000000000..68d621290
--- /dev/null
+++ b/scripts/marketplace/sync.ts
@@ -0,0 +1,17 @@
+#!/usr/bin/env bun
+/**
+ * Syncs marketplace.json from .claude-plugin/ to .github/plugin/.
+ *
+ * Usage:
+ *   bun scripts/marketplace/sync.ts
+ */
+
+import { cp } from 'node:fs/promises';
+import { resolve } from 'node:path';
+
+const root = resolve(import.meta.dirname, '../..');
+const src = resolve(root, '.claude-plugin/marketplace.json');
+const dest = resolve(root, '.github/plugin/marketplace.json');
+
+await cp(src, dest);
+console.log('Synced marketplace.json → .github/plugin/marketplace.json');
diff --git a/scripts/marketplace/validate-frontmatter.ts b/scripts/marketplace/validate-frontmatter.ts
new file mode 100644
index 000000000..99c205e9a
--- /dev/null
+++ b/scripts/marketplace/validate-frontmatter.ts
@@ -0,0 +1,219 @@
+#!/usr/bin/env bun
+/**
+ * Validates YAML frontmatter in agent, skill, and command .md files.
+ *
+ * Usage:
+ *   bun scripts/marketplace/validate-frontmatter.ts                  # scan plugins/
+ *   bun scripts/marketplace/validate-frontmatter.ts /path/to/dir     # scan specific directory
+ *   bun scripts/marketplace/validate-frontmatter.ts file1.md file2.md
+ */
+
+import { readFile, readdir } from 'node:fs/promises';
+import { basename, join, relative, resolve } from 'node:path';
+import { parse as parseYaml } from 'yaml';
+
+const YAML_SPECIAL_CHARS = /[{}[\]*&#!|>%@`]/;
+const BOM = /^\uFEFF/;
+const FRONTMATTER_REGEX = /^---\s*\n([\s\S]*?)---\s*\n?/;
+
+function quoteSpecialValues(text: string): string {
+  const lines = text.split('\n');
+  const result: string[] = [];
+
+  for (const line of lines) {
+    const match = line.match(/^([a-zA-Z_-]+):\s+(.+)$/);
+    if (match) {
+      const [, key, value] = match;
+      if (!key || !value) {
+        result.push(line);
+        continue;
+      }
+      if (
+        (value.startsWith('"') && value.endsWith('"')) ||
+        (value.startsWith("'") && value.endsWith("'"))
+      ) {
+        result.push(line);
+        continue;
+      }
+      // Skip YAML block scalar indicators (>, >-, |, |-, etc.)
+      if (/^[>|][+-]?$/.test(value.trim())) {
+        result.push(line);
+        continue;
+      }
+      if (YAML_SPECIAL_CHARS.test(value)) {
+        const escaped = value.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
+        result.push(`${key}: "${escaped}"`);
+        continue;
+      }
+    }
+    result.push(line);
+  }
+
+  return result.join('\n');
+}
+
+interface ParseResult {
+  frontmatter: Record<string, unknown>;
+  content: string;
+  error?: string;
+}
+
+function parseFrontmatter(markdown: string): ParseResult {
+  const match = markdown.replace(BOM, '').match(FRONTMATTER_REGEX);
+
+  if (!match) {
+    return { frontmatter: {}, content: markdown, error: 'No frontmatter found' };
+  }
+
+  const frontmatterText = quoteSpecialValues(match[1] || '');
+  const content = markdown.slice(match[0].length);
+
+  try {
+    const parsed = parseYaml(frontmatterText);
+    if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
+      return { frontmatter: parsed as Record<string, unknown>, content };
+    }
+    return {
+      frontmatter: {},
+      content,
+      error: `YAML parsed but result is not an object (got ${typeof parsed}${Array.isArray(parsed) ? ' array' : ''})`,
+    };
+  } catch (err) {
+    return {
+      frontmatter: {},
+      content,
+      error: `YAML parse failed: ${err instanceof Error ? err.message : err}`,
+    };
+  }
+}
+
+type FileType = 'agent' | 'skill' | 'command';
+
+interface ValidationIssue {
+  level: 'error' | 'warning';
+  message: string;
+}
+
+function validateAgent(fm: Record<string, unknown>): ValidationIssue[] {
+  const issues: ValidationIssue[] = [];
+  if (!fm.name || typeof fm.name !== 'string')
+    issues.push({ level: 'error', message: 'Missing required "name" field' });
+  if (!fm.description || typeof fm.description !== 'string')
+    issues.push({ level: 'error', message: 'Missing required "description" field' });
+  return issues;
+}
+
+function validateSkill(fm: Record<string, unknown>): ValidationIssue[] {
+  const issues: ValidationIssue[] = [];
+  if (!fm.description && !fm.when_to_use)
+    issues.push({ level: 'error', message: 'Missing required "description" field' });
+  return issues;
+}
+
+function validateCommand(fm: Record<string, unknown>): ValidationIssue[] {
+  const issues: ValidationIssue[] = [];
+  if (!fm.description || typeof fm.description !== 'string')
+    issues.push({ level: 'error', message: 'Missing required "description" field' });
+  return issues;
+}
+
+function detectFileType(filePath: string): FileType | null {
+  const normalized = filePath.replace(/\\/g, '/');
+  const inSkillContent = /\/skills\/[^/]+\//.test(normalized);
+  if (normalized.includes('/agents/') && !inSkillContent && basename(filePath) !== 'README.md')
+    return 'agent';
+  if (normalized.includes('/skills/') && basename(filePath) === 'SKILL.md') return 'skill';
+  if (normalized.includes('/commands/') && !inSkillContent) return 'command';
+  return null;
+}
+
+async function findMdFiles(baseDir: string): Promise<{ path: string; type: FileType }[]> {
+  const results: { path: string; type: FileType }[] = [];
+
+  async function walk(dir: string) {
+    const entries = await readdir(dir, { withFileTypes: true });
+    for (const entry of entries) {
+      const fullPath = join(dir, entry.name);
+      if (entry.isDirectory()) {
+        await walk(fullPath);
+      } else if (entry.name.endsWith('.md')) {
+        const type = detectFileType(fullPath);
+        if (type) results.push({ path: fullPath, type });
+      }
+    }
+  }
+
+  await walk(baseDir);
+  return results;
+}
+
+async function main() {
+  const args = process.argv.slice(2);
+  const root = resolve(import.meta.dirname, '../..');
+
+  let files: { path: string; type: FileType }[];
+  let baseDir: string;
+
+  if (args.length > 0 && args.every((a) => a.endsWith('.md'))) {
+    baseDir = process.cwd();
+    files = [];
+    for (const arg of args) {
+      const fullPath = resolve(arg);
+      const type = detectFileType(fullPath);
+      if (type) files.push({ path: fullPath, type });
+    }
+  } else {
+    baseDir = args[0] || resolve(root, 'plugins');
+    files = await findMdFiles(baseDir);
+  }
+
+  let totalErrors = 0;
+  let totalWarnings = 0;
+
+  console.log(`Validating ${files.length} frontmatter files...\n`);
+
+  for (const { path: filePath, type } of files) {
+    const rel = relative(baseDir, filePath);
+    const content = await readFile(filePath, 'utf-8');
+    const result = parseFrontmatter(content);
+
+    const issues: ValidationIssue[] = [];
+
+    if (result.error) {
+      issues.push({ level: 'error', message: result.error });
+    } else {
+      switch (type) {
+        case 'agent':
+          issues.push(...validateAgent(result.frontmatter));
+          break;
+        case 'skill':
+          issues.push(...validateSkill(result.frontmatter));
+          break;
+        case 'command':
+          issues.push(...validateCommand(result.frontmatter));
+          break;
+      }
+    }
+
+    if (issues.length > 0) {
+      console.log(`${rel} (${type})`);
+      for (const issue of issues) {
+        const prefix = issue.level === 'error' ? '  ERROR' : '  WARN ';
+        console.log(`${prefix}: ${issue.message}`);
+        if (issue.level === 'error') totalErrors++;
+        else totalWarnings++;
+      }
+      console.log();
+    }
+  }
+
+  console.log('---');
+  console.log(`Validated ${files.length} files: ${totalErrors} errors, ${totalWarnings} warnings`);
+
+  if (totalErrors > 0) process.exit(1);
+}
+
+main().catch((err) => {
+  console.error('Fatal error:', err);
+  process.exit(2);
+});
diff --git a/scripts/marketplace/validate-marketplace.ts b/scripts/marketplace/validate-marketplace.ts
new file mode 100644
index 000000000..c8486e204
--- /dev/null
+++ b/scripts/marketplace/validate-marketplace.ts
@@ -0,0 +1,94 @@
+#!/usr/bin/env bun
+/**
+ * Validates marketplace.json: well-formed JSON, plugins array present,
+ * each entry has required fields, no duplicates, and .github copy is in sync.
+ *
+ * Usage:
+ *   bun scripts/marketplace/validate-marketplace.ts
+ */
+
+import { readFile } from 'node:fs/promises';
+import { resolve } from 'node:path';
+
+const root = resolve(import.meta.dirname, '../..');
+const src = resolve(root, '.claude-plugin/marketplace.json');
+const dest = resolve(root, '.github/plugin/marketplace.json');
+
+// --- 1. JSON validation ---
+
+const content = await readFile(src, 'utf-8');
+
+let parsed: unknown;
+try {
+  parsed = JSON.parse(content);
+} catch (err) {
+  console.error(
+    `[json] ERROR: .claude-plugin/marketplace.json is not valid JSON: ${err instanceof Error ? err.message : err}`,
+  );
+  process.exit(1);
+}
+
+if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
+  console.error('[json] ERROR: .claude-plugin/marketplace.json must be a JSON object');
+  process.exit(1);
+}
+
+const marketplace = parsed as Record<string, unknown>;
+if (!Array.isArray(marketplace.plugins)) {
+  console.error('[json] ERROR: .claude-plugin/marketplace.json missing "plugins" array');
+  process.exit(1);
+}
+
+// --- 2. Plugin entry validation ---
+
+const errors: string[] = [];
+const seen = new Set<string>();
+const required = ['name', 'description', 'source'] as const;
+
+marketplace.plugins.forEach((p: unknown, i: number) => {
+  if (!p || typeof p !== 'object') {
+    errors.push(`plugins[${i}]: must be an object`);
+    return;
+  }
+  const entry = p as Record<string, unknown>;
+  for (const field of required) {
+    if (!entry[field]) {
+      errors.push(`plugins[${i}] (${entry.name ?? '?'}): missing required field "${field}"`);
+    }
+  }
+  if (typeof entry.name === 'string') {
+    if (seen.has(entry.name)) {
+      errors.push(`plugins[${i}]: duplicate plugin name "${entry.name}"`);
+    }
+    seen.add(entry.name);
+  }
+});
+
+if (errors.length) {
+  console.error(
+    `[schema] ${errors.length} validation error(s) in .claude-plugin/marketplace.json:`,
+  );
+  for (const e of errors) console.error(`  - ${e}`);
+  process.exit(1);
+}
+
+// --- 3. Sync check (.claude-plugin → .github/plugin) ---
+
+let destContent: string;
+try {
+  destContent = await readFile(dest, 'utf-8');
+} catch {
+  console.error('[sync] ERROR: .github/plugin/marketplace.json not found');
+  console.error('  Run: bun scripts/marketplace/sync.ts');
+  process.exit(1);
+}
+
+if (content !== destContent) {
+  console.error(
+    '[sync] ERROR: .github/plugin/marketplace.json is out of sync with .claude-plugin/marketplace.json',
+  );
+  console.error('  Run: bun scripts/marketplace/sync.ts');
+  process.exit(1);
+}
+
+console.log(`OK: ${marketplace.plugins.length} plugins validated, sync verified`);
diff --git a/scripts/validate-eval-dirs.ts b/scripts/validate-eval-dirs.ts
new file mode 100644
index 000000000..39c6e9896
--- /dev/null
+++ b/scripts/validate-eval-dirs.ts
@@ -0,0 +1,74 @@
+#!/usr/bin/env bun
+/**
+ * Validates that each feature directory under examples/features/ that has an
+ * evals/ subdirectory contains at least one *.eval.yaml or *.EVAL.yaml file
+ * (either inside evals/ or at the feature root).
+ *
+ * Directories without an evals/ subdirectory are skipped — they may be SDK
+ * examples or other non-eval feature demos.
+ *
+ * Usage:
+ *   bun scripts/validate-eval-dirs.ts
+ */
+
+import { globSync, readdirSync, statSync } from 'node:fs';
+import { join, relative, resolve } from 'node:path';
+
+const root = resolve(import.meta.dirname, '..');
+const featuresDir = resolve(root, 'examples/features');
+
+// Feature dirs whose evals/ folder intentionally holds only support files
+// (result JSONL, baselines) rather than eval definitions. Remove entries here
+// once they gain proper eval YAML files.
+const KNOWN_EXCEPTIONS = new Set([
+  'compare', // evals/ holds baseline/candidate result JSONL for agentv compare
+  'trace-analysis', // evals/ holds pre-recorded trace results
+]);
+
+const errors: string[] = [];
+const entries = readdirSync(featuresDir, { withFileTypes: true });
+
+for (const entry of entries) {
+  if (!entry.isDirectory() || entry.name.startsWith('.')) continue;
+
+  const featureDir = join(featuresDir, entry.name);
+  const evalsDir = join(featureDir, 'evals');
+
+  // Only check features that have an evals/ subdirectory
+  try {
+    if (!statSync(evalsDir).isDirectory()) continue;
+  } catch {
+    continue;
+  }
+
+  // Look for eval files in evals/ (recursive) and at feature root.
+  // Matches: *.eval.yaml, *.EVAL.yaml, eval.yaml, dataset*.yaml (config default patterns)
+  const evalPatterns = [
+    '**/*.{eval.yaml,eval.yml,EVAL.yaml,EVAL.yml}',
+    '**/eval.{yaml,yml}',
+    '**/dataset*.{yaml,yml}',
+  ];
+  const evalFilesInEvalsDir = evalPatterns.flatMap((p) => globSync(p, { cwd: evalsDir }));
+  const evalFilesAtRoot = evalPatterns.flatMap((p) =>
+    globSync(p.replace('**/', ''), { cwd: featureDir }),
+  );
+
+  if (evalFilesInEvalsDir.length === 0 && evalFilesAtRoot.length === 0) {
+    if (KNOWN_EXCEPTIONS.has(entry.name)) {
+      console.warn(`WARN: ${relative(root, evalsDir)} has no eval files (known exception)`);
+    } else {
+      errors.push(relative(root, evalsDir));
+    }
+  }
+}
+
+if (errors.length > 0) {
+  console.error(
+    'The following evals/ directories contain no eval files (*.eval.yaml or *.EVAL.yaml):',
+  );
+  for (const e of errors) console.error(`  - ${e}`);
+  process.exit(1);
+}
+
+const checked = entries.filter((e) => e.isDirectory() && !e.name.startsWith('.')).length;
+console.log(`OK: ${checked} feature directories checked`);

From f5b4f802192fb10c811c5a9baacfeba2fab4636f Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Mon, 30 Mar 2026 04:35:19 +0000
Subject: [PATCH 2/7] fix(examples): add demo evals for compare and
 trace-analysis features

Instead of hardcoding known exceptions in the eval-dirs validation
script, add proper eval YAML files to the compare and trace-analysis
example directories so they pass validation like all other features.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../features/compare/evals/dataset.eval.yaml  | 39 +++++++++++++++++++
 .../trace-analysis/evals/dataset.eval.yaml    | 36 +++++++++++++++++
 scripts/validate-eval-dirs.ts                 | 14 +------
 3 files changed, 76 insertions(+), 13 deletions(-)
 create mode 100644 examples/features/compare/evals/dataset.eval.yaml
 create mode 100644 examples/features/trace-analysis/evals/dataset.eval.yaml

diff --git a/examples/features/compare/evals/dataset.eval.yaml b/examples/features/compare/evals/dataset.eval.yaml
new file mode 100644
index 000000000..93adff30d
--- /dev/null
+++ b/examples/features/compare/evals/dataset.eval.yaml
@@ -0,0 +1,39 @@
+$schema: agentv-eval-v2
+
+# Demo eval for the compare example.
+# Run against two targets to generate baseline and candidate result files:
+#   agentv eval evals/dataset.eval.yaml --target baseline
+#   agentv eval evals/dataset.eval.yaml --target candidate
+# Then compare:
+#   agentv compare evals/baseline-results.jsonl evals/candidate-results.jsonl
+
+name: compare-demo
+description: Demo eval for generating baseline and candidate results to compare
+
+tests:
+  - id: code-review-001
+    input: Review the following code for bugs and suggest improvements.
+    criteria: Identifies at least one issue and suggests a fix
+    assertions:
+      - type: contains
+        value: bug
+      - type: contains
+        value: fix
+
+  - id: code-review-002
+    input: Explain what this function does and how it could be optimized.
+    criteria: Provides a clear explanation and at least one optimization suggestion
+    assertions:
+      - type: contains
+        value: function
+      - type: contains
+        value: optim
+
+  - id: code-gen-001
+    input: Write a function that checks if a string is a palindrome.
+    criteria: Returns working code that handles basic palindrome cases
+    assertions:
+      - type: contains
+        value: palindrome
+      - type: is-json
+        required: false
diff --git a/examples/features/trace-analysis/evals/dataset.eval.yaml b/examples/features/trace-analysis/evals/dataset.eval.yaml
new file mode 100644
index 000000000..d77ef444a
--- /dev/null
+++ b/examples/features/trace-analysis/evals/dataset.eval.yaml
@@ -0,0 +1,36 @@
+$schema: agentv-eval-v2
+
+# Demo eval for the trace-analysis example.
+# Run this eval to generate result traces, then analyze with:
+#   agentv trace evals/multi-agent.eval.results.jsonl
+
+name: trace-analysis-demo
+description: Demo eval for generating execution traces to analyze
+
+tests:
+  - id: research-question
+    input: What are the key differences between REST and GraphQL APIs?
+    criteria: Covers at least three differences including query flexibility, over-fetching, and type system
+    assertions:
+      - type: contains
+        value: REST
+      - type: contains
+        value: GraphQL
+      - type: regex
+        value: "type.?system|schema|typed"
+
+  - id: code-review-task
+    input: Review this Python function for potential issues and suggest improvements.
+    criteria: Identifies at least one code quality issue
+    assertions:
+      - type: contains
+        value: suggest
+      - type: regex
+        value: "improv|fix|refactor|optim"
+
+  - id: simple-qa
+    input: What is the capital of France?
+    criteria: Correctly answers Paris
+    assertions:
+      - type: contains
+        value: Paris
diff --git a/scripts/validate-eval-dirs.ts b/scripts/validate-eval-dirs.ts
index 39c6e9896..94e4c197d 100644
--- a/scripts/validate-eval-dirs.ts
+++ b/scripts/validate-eval-dirs.ts
@@ -17,14 +17,6 @@ import { join, relative, resolve } from 'node:path';
 const root = resolve(import.meta.dirname, '..');
 const featuresDir = resolve(root, 'examples/features');
 
-// Feature dirs whose evals/ folder intentionally holds only support files
-// (result JSONL, baselines) rather than eval definitions. Remove entries here
-// once they gain proper eval YAML files.
-const KNOWN_EXCEPTIONS = new Set([
-  'compare', // evals/ holds baseline/candidate result JSONL for agentv compare
-  'trace-analysis', // evals/ holds pre-recorded trace results
-]);
-
 const errors: string[] = [];
 const entries = readdirSync(featuresDir, { withFileTypes: true });
 
@@ -54,11 +46,7 @@ for (const entry of entries) {
   );
 
   if (evalFilesInEvalsDir.length === 0 && evalFilesAtRoot.length === 0) {
-    if (KNOWN_EXCEPTIONS.has(entry.name)) {
-      console.warn(`WARN: ${relative(root, evalsDir)} has no eval files (known exception)`);
-    } else {
-      errors.push(relative(root, evalsDir));
-    }
+    errors.push(relative(root, evalsDir));
   }
 }
 

From f153d0234876fa38ca842b696b97cbfc13534fb8 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Mon, 30 Mar 2026 04:44:15 +0000
Subject: [PATCH 3/7] fix(ci): use local build for validate, fix import.meta
 CJS warning

- Run agentv validate from built dist (bun apps/cli/dist/cli.js) instead
  of installing from npm, which lacks the new glob support
- Replace import.meta.url with __dirname in pi-coding-agent.ts to
  eliminate the esbuild CJS warning about empty import.meta

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/validate.yml                         |  5 +----
 .../core/src/evaluation/providers/pi-coding-agent.ts   | 10 ++++------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml
index 15036576a..8f62d5a8a 100644
--- a/.github/workflows/validate.yml
+++ b/.github/workflows/validate.yml
@@ -48,11 +48,8 @@ jobs:
       - name: Build
         run: bun run build
 
-      - name: Install agentv globally
-        run: bun install -g agentv
-
       - name: Check evals directories have eval files
         run: bun scripts/validate-eval-dirs.ts
 
       - name: Validate eval schemas
-        run: agentv validate 'examples/features/**/evals/**/*.eval.yaml' 'examples/features/**/*.EVAL.yaml'
+        run: bun apps/cli/dist/cli.js validate 'examples/features/**/evals/**/*.eval.yaml' 'examples/features/**/*.EVAL.yaml'
diff --git a/packages/core/src/evaluation/providers/pi-coding-agent.ts b/packages/core/src/evaluation/providers/pi-coding-agent.ts
index 3e4691bd0..10284081f 100644
--- a/packages/core/src/evaluation/providers/pi-coding-agent.ts
+++ b/packages/core/src/evaluation/providers/pi-coding-agent.ts
@@ -15,7 +15,6 @@ import type { WriteStream } from 'node:fs';
 import { mkdir } from 'node:fs/promises';
 import path from 'node:path';
 import { createInterface } from 'node:readline';
-import { fileURLToPath } from 'node:url';
 
 import { recordPiLogEntry } from './pi-log-tracker.js';
 import { extractPiTextContent, toFiniteNumber, toPiContentArray } from './pi-utils.js';
@@ -53,9 +52,9 @@ async function promptInstall(): Promise<boolean> {
 
 /** Resolve agentv's own package root (where bun add should install peer deps). */
 function findAgentvRoot(): string {
-  const thisFile = fileURLToPath(import.meta.url);
-  let dir = path.dirname(thisFile);
-  // Walk up until we find a package.json (covers both src and dist layouts)
+  // Walk up from this file's directory until we find a package.json.
+  // Works in both ESM (__dirname via Node/Bun polyfill) and CJS (__dirname native).
+  let dir = __dirname;
   for (let i = 0; i < 10; i++) {
     try {
       const pkg = path.join(dir, 'package.json');
@@ -68,8 +67,7 @@ function findAgentvRoot(): string {
       dir = parent;
     }
   }
-  // Fallback: current file's directory
-  return path.dirname(thisFile);
+  return __dirname;
 }
 
 async function doLoadSdkModules(): Promise<void> {

From 9aa3ff8b80cbaaad83bd65863f8ee2981312443a Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Mon, 30 Mar 2026 05:04:06 +0000
Subject: [PATCH 4/7] fix(core): use tsup shims for import.meta CJS
 compatibility

Enable `shims: true` in tsup config instead of manual runtime guards.
tsup injects proper CJS shims (pathToFileURL(__filename)) so
import.meta.url works in CJS output without esbuild warnings, while
ESM output uses native import.meta.url directly.

Reverts pi-coding-agent.ts to its original idiomatic ESM code since
the build tool now handles cross-format compatibility.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../core/src/evaluation/providers/pi-coding-agent.ts   | 10 ++++++----
 packages/core/tsup.config.ts                           |  1 +
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/packages/core/src/evaluation/providers/pi-coding-agent.ts b/packages/core/src/evaluation/providers/pi-coding-agent.ts
index 10284081f..3e4691bd0 100644
--- a/packages/core/src/evaluation/providers/pi-coding-agent.ts
+++ b/packages/core/src/evaluation/providers/pi-coding-agent.ts
@@ -15,6 +15,7 @@ import type { WriteStream } from 'node:fs';
 import { mkdir } from 'node:fs/promises';
 import path from 'node:path';
 import { createInterface } from 'node:readline';
+import { fileURLToPath } from 'node:url';
 
 import { recordPiLogEntry } from './pi-log-tracker.js';
 import { extractPiTextContent, toFiniteNumber, toPiContentArray } from './pi-utils.js';
@@ -52,9 +53,9 @@ async function promptInstall(): Promise<boolean> {
 
 /** Resolve agentv's own package root (where bun add should install peer deps). */
 function findAgentvRoot(): string {
-  // Walk up from this file's directory until we find a package.json.
-  // Works in both ESM (__dirname via Node/Bun polyfill) and CJS (__dirname native).
-  let dir = __dirname;
+  const thisFile = fileURLToPath(import.meta.url);
+  let dir = path.dirname(thisFile);
+  // Walk up until we find a package.json (covers both src and dist layouts)
   for (let i = 0; i < 10; i++) {
     try {
       const pkg = path.join(dir, 'package.json');
@@ -67,7 +68,8 @@ function findAgentvRoot(): string {
       dir = parent;
     }
   }
-  return __dirname;
+  // Fallback: current file's directory
+  return path.dirname(thisFile);
 }
 
 async function doLoadSdkModules(): Promise<void> {
diff --git a/packages/core/tsup.config.ts b/packages/core/tsup.config.ts
index e4edee5a4..e85a45308 100644
--- a/packages/core/tsup.config.ts
+++ b/packages/core/tsup.config.ts
@@ -3,6 +3,7 @@ import { defineConfig } from 'tsup';
 export default defineConfig({
   entry: ['src/index.ts', 'src/evaluation/validation/index.ts'],
   format: ['esm', 'cjs'],
+  shims: true,
   sourcemap: true,
   clean: true,
   dts: {

From a4ddc99de8a2dd56fd4addaa4aeeb2ffd98fb8cb Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Mon, 30 Mar 2026 05:20:26 +0000
Subject: [PATCH 5/7] fix(examples): use static criteria in env-interpolation
 eval

The criteria field was set to ${{ EVAL_CRITERIA }} which resolves to
empty string when the env var isn't set, causing validation to fail in
CI. Move interpolation demo to expected_output and input fields instead,
keeping criteria as a static string.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../features/env-interpolation/evals/dataset.eval.yaml    | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/examples/features/env-interpolation/evals/dataset.eval.yaml b/examples/features/env-interpolation/evals/dataset.eval.yaml
index b6d9980a1..608b843bd 100644
--- a/examples/features/env-interpolation/evals/dataset.eval.yaml
+++ b/examples/features/env-interpolation/evals/dataset.eval.yaml
@@ -4,12 +4,10 @@
 # Missing variables resolve to empty string.
 #
 # Usage:
-#   export EVAL_CRITERIA="Responds with a friendly greeting"
 #   export CUSTOM_SYSTEM_PROMPT="You are a helpful assistant who always greets warmly."
 #   agentv eval examples/features/env-interpolation/evals/dataset.eval.yaml
 #
 # Or use a .env file in the project root:
-#   EVAL_CRITERIA=Responds with a friendly greeting
 #   CUSTOM_SYSTEM_PROMPT=You are a helpful assistant who always greets warmly.
 
 description: Demonstrates ${{ VAR }} interpolation in eval fields
@@ -20,13 +18,13 @@ execution:
 tests:
   # Full-value interpolation: entire field value from env var
   - id: full-value
-    criteria: "${{ EVAL_CRITERIA }}"
+    criteria: Responds with a friendly greeting
     input: "Hello!"
-    expected_output: "Hello! How can I help you today?"
+    expected_output: "${{ EXPECTED_GREETING }}"
 
   # Partial/inline interpolation: env var embedded in a larger string
   - id: partial-value
-    criteria: "Response uses the system prompt persona and ${{ EVAL_CRITERIA }}"
+    criteria: Response uses the system prompt persona
     input:
       - role: system
         content: "${{ CUSTOM_SYSTEM_PROMPT }}"

From a23783806ad23dfd1e8aa8b1e5ac30c9f5013084 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Mon, 30 Mar 2026 05:24:42 +0000
Subject: [PATCH 6/7] fix(core): remove name-requires-description validation
 warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

name and description are optional metadata — no reason to couple them.
name can be derived from the filename if not provided.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../src/evaluation/validation/eval-validator.ts | 10 ----------
 .../validation/eval-validator.test.ts           | 17 -----------------
 2 files changed, 27 deletions(-)

diff --git a/packages/core/src/evaluation/validation/eval-validator.ts b/packages/core/src/evaluation/validation/eval-validator.ts
index 9db0d7f6a..133decd15 100644
--- a/packages/core/src/evaluation/validation/eval-validator.ts
+++ b/packages/core/src/evaluation/validation/eval-validator.ts
@@ -523,16 +523,6 @@ function validateMetadata(parsed: JsonObject, filePath: string, errors: Validati
         });
       }
     }
-
-    // Warn if name is present but description is missing
-    if (!('description' in parsed) || parsed.description === undefined) {
-      errors.push({
-        severity: 'warning',
-        filePath,
-        location: 'name',
-        message: "When 'name' is present, 'description' should also be provided.",
-      });
-    }
   }
 }
 
diff --git a/packages/core/test/evaluation/validation/eval-validator.test.ts b/packages/core/test/evaluation/validation/eval-validator.test.ts
index 65546754f..cd11bd48d 100644
--- a/packages/core/test/evaluation/validation/eval-validator.test.ts
+++ b/packages/core/test/evaluation/validation/eval-validator.test.ts
@@ -457,23 +457,6 @@ describe('validateEvalFile', () => {
   });
 
   describe('metadata validation', () => {
-    it('warns when name is present without description', async () => {
-      const filePath = path.join(tempDir, 'meta-name-only.yaml');
-      await writeFile(
-        filePath,
-        `name: my-eval
-tests:
-  - id: test-1
-    input: "Query"
-`,
-      );
-
-      const result = await validateEvalFile(filePath);
-
-      const warnings = result.errors.filter((e) => e.severity === 'warning');
-      expect(warnings.some((e) => e.message.includes('description'))).toBe(true);
-    });
-
     it('warns when name has invalid format', async () => {
       const filePath = path.join(tempDir, 'meta-invalid-name.yaml');
       await writeFile(

From 4b4230271d923f476df3be5c6272419be79c7173 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Mon, 30 Mar 2026 05:46:56 +0000
Subject: [PATCH 7/7] fix(examples): align demo eval test IDs with fixtures,
 remove $schema
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Compare eval: match all 5 fixture test IDs (code-review-001/002/003,
  code-gen-001/002) so eval output is directly comparable
- Remove $schema from eval YAML files — not needed
- Verified: full eval run passes (5/5, 3/3), output is
  agentv-compare-compatible with matching test IDs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../features/compare/evals/dataset.eval.yaml  | 20 +++++++++++++++----
 .../trace-analysis/evals/dataset.eval.yaml    |  2 --
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/examples/features/compare/evals/dataset.eval.yaml b/examples/features/compare/evals/dataset.eval.yaml
index 93adff30d..158c70b0d 100644
--- a/examples/features/compare/evals/dataset.eval.yaml
+++ b/examples/features/compare/evals/dataset.eval.yaml
@@ -1,5 +1,3 @@
-$schema: agentv-eval-v2
-
 # Demo eval for the compare example.
 # Run against two targets to generate baseline and candidate result files:
 #   agentv eval evals/dataset.eval.yaml --target baseline
@@ -29,11 +27,25 @@ tests:
       - type: contains
         value: optim
 
+  - id: code-review-003
+    input: Review this error handling code for edge cases and missing checks.
+    criteria: Identifies missing error handling or edge cases
+    assertions:
+      - type: contains
+        value: error
+      - type: regex
+        value: "edge.?case|missing|exception|null"
+
   - id: code-gen-001
     input: Write a function that checks if a string is a palindrome.
     criteria: Returns working code that handles basic palindrome cases
     assertions:
       - type: contains
         value: palindrome
-      - type: is-json
-        required: false
+
+  - id: code-gen-002
+    input: Write a function that finds the longest common subsequence of two strings.
+    criteria: Returns a correct implementation with reasonable time complexity
+    assertions:
+      - type: regex
+        value: "subsequence|lcs|LCS"
diff --git a/examples/features/trace-analysis/evals/dataset.eval.yaml b/examples/features/trace-analysis/evals/dataset.eval.yaml
index d77ef444a..a8f683aca 100644
--- a/examples/features/trace-analysis/evals/dataset.eval.yaml
+++ b/examples/features/trace-analysis/evals/dataset.eval.yaml
@@ -1,5 +1,3 @@
-$schema: agentv-eval-v2
-
 # Demo eval for the trace-analysis example.
 # Run this eval to generate result traces, then analyze with:
 #   agentv trace evals/multi-agent.eval.results.jsonl