EntityProcess · christso · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
@@ -8,19 +8,19 @@
   },
   "plugins": [
     {
-      "name": "agentv-dev",
-      "description": "Development skills for building and optimizing AgentV evaluations",
-      "source": "./plugins/agentv-dev"
+      "name": "agentic-engineering",
+      "description": "Design and review AI agent systems — architecture patterns, workflow design, and plugin quality review",
+      "source": "./plugins/agentic-engineering"
     },
     {
       "name": "agentv-claude-trace",
       "description": "Session tracing plugin — exports Claude Code session traces via OpenTelemetry",
       "source": "./plugins/agentv-claude-trace"
     },
     {
-      "name": "agentic-engineering",
-      "description": "Design and review AI agent systems — architecture patterns, workflow design, and plugin quality review",
-      "source": "./plugins/agentic-engineering"
+      "name": "agentv-dev",
+      "description": "Development skills for building and optimizing AgentV evaluations",
+      "source": "./plugins/agentv-dev"
     }
   ]
 }
diff --git a/.github/actions/setup-bun/action.yml b/.github/actions/setup-bun/action.yml
@@ -0,0 +1,41 @@
+name: "Setup Bun"
+description: "Setup Bun with caching and install dependencies"
+runs:
+  using: "composite"
+  steps:
+    - name: Get baseline download URL
+      id: bun-url
+      shell: bash
+      run: |
+        if [ "$RUNNER_ARCH" = "X64" ]; then
+          V=$(node -p "require('./package.json').packageManager.split('@')[1]")
+          case "$RUNNER_OS" in
+            macOS)   OS=darwin ;;
+            Linux)   OS=linux ;;
+            Windows) OS=windows ;;
+          esac
+          echo "url=https://github.com/oven-sh/bun/releases/download/bun-v${V}/bun-${OS}-x64-baseline.zip" >> "$GITHUB_OUTPUT"
+        fi
+
+    - name: Setup Bun
+      uses: oven-sh/setup-bun@v2
+      with:
+        bun-version-file: ${{ !steps.bun-url.outputs.url && 'package.json' || '' }}
+        bun-download-url: ${{ steps.bun-url.outputs.url }}
+
+    - name: Get cache directory
+      id: cache
+      shell: bash
+      run: echo "dir=$(bun pm cache)" >> "$GITHUB_OUTPUT"
+
+    - name: Cache Bun dependencies
+      uses: actions/cache@v4
+      with:
+        path: ${{ steps.cache.outputs.dir }}
+        key: ${{ runner.os }}-bun-${{ hashFiles('**/bun.lock') }}
+        restore-keys: |
+          ${{ runner.os }}-bun-
+
+    - name: Install dependencies
+      run: bun install --frozen-lockfile
+      shell: bash
diff --git a/.github/plugin/marketplace.json b/.github/plugin/marketplace.json
@@ -8,19 +8,19 @@
   },
   "plugins": [
     {
-      "name": "agentv-dev",
-      "description": "Development skills for building and optimizing AgentV evaluations",
-      "source": "./plugins/agentv-dev"
+      "name": "agentic-engineering",
+      "description": "Design and review AI agent systems — architecture patterns, workflow design, and plugin quality review",
+      "source": "./plugins/agentic-engineering"
     },
     {
       "name": "agentv-claude-trace",
       "description": "Session tracing plugin — exports Claude Code session traces via OpenTelemetry",
       "source": "./plugins/agentv-claude-trace"
     },
     {
-      "name": "agentic-engineering",
-      "description": "Design and review AI agent systems — architecture patterns, workflow design, and plugin quality review",
-      "source": "./plugins/agentic-engineering"
+      "name": "agentv-dev",
+      "description": "Development skills for building and optimizing AgentV evaluations",
+      "source": "./plugins/agentv-dev"
     }
   ]
 }
diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml
@@ -21,3 +21,35 @@ jobs:
             --glob-ignore-case
             --root-dir .
             "**/*.md"
+
+  marketplace:
+    name: Validate Marketplace
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/setup-bun
+
+      - name: Validate marketplace.json (schema + sync)
+        run: bun scripts/marketplace/validate-marketplace.ts
+
+      - name: Check marketplace sorted
+        run: bun scripts/marketplace/check-sorted.ts
+
+      - name: Validate frontmatter
+        run: bun scripts/marketplace/validate-frontmatter.ts
+
+  evals:
+    name: Validate Evals
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/setup-bun
+
+      - name: Build
+        run: bun run build
+
+      - name: Check evals directories have eval files
+        run: bun scripts/validate-eval-dirs.ts
+
+      - name: Validate eval schemas
+        run: bun apps/cli/dist/cli.js validate 'examples/features/**/evals/**/*.eval.yaml' 'examples/features/**/*.EVAL.yaml'
diff --git a/apps/cli/src/commands/validate/validate-files.ts b/apps/cli/src/commands/validate/validate-files.ts
@@ -10,6 +10,7 @@ import {
   validateFileReferences,
   validateTargetsFile,
 } from '@agentv/core/evaluation/validation';
+import fg from 'fast-glob';
 
 /**
  * Validate YAML files for AgentV schema compliance.
@@ -67,34 +68,50 @@ async function validateSingleFile(filePath: string): Promise<ValidationResult> {
 }
 
 async function expandPaths(paths: readonly string[]): Promise<readonly string[]> {
-  const expanded: string[] = [];
+  const expanded = new Set<string>();
 
   for (const inputPath of paths) {
     const absolutePath = path.resolve(inputPath);
 
-    // Check if path exists
+    // Try as literal file or directory first
     try {
       await access(absolutePath, constants.F_OK);
+      const stats = await stat(absolutePath);
+
+      if (stats.isFile()) {
+        if (isYamlFile(absolutePath)) expanded.add(absolutePath);
+        continue;
+      }
+      if (stats.isDirectory()) {
+        const yamlFiles = await findYamlFiles(absolutePath);
+        for (const f of yamlFiles) expanded.add(f);
+        continue;
+      }
     } catch {
-      console.warn(`Warning: Path not found: ${inputPath}`);
-      continue;
+      // Not a literal path — fall through to glob matching
     }
 
-    const stats = await stat(absolutePath);
-
-    if (stats.isFile()) {
-      // Only include YAML files
-      if (isYamlFile(absolutePath)) {
-        expanded.push(absolutePath);
-      }
-    } else if (stats.isDirectory()) {
-      // Recursively find all YAML files in directory
-      const yamlFiles = await findYamlFiles(absolutePath);
-      expanded.push(...yamlFiles);
+    // Treat as glob pattern
+    const globPattern = inputPath.includes('\\') ? inputPath.replace(/\\/g, '/') : inputPath;
+    const matches = await fg(globPattern, {
+      cwd: process.cwd(),
+      absolute: true,
+      onlyFiles: true,
+      unique: true,
+      dot: false,
+      followSymbolicLinks: true,
+    });
+
+    const yamlMatches = matches.filter((f) => isYamlFile(f));
+    if (yamlMatches.length === 0) {
+      console.warn(`Warning: No YAML files matched pattern: ${inputPath}`);
     }
+    for (const f of yamlMatches) expanded.add(path.normalize(f));
   }
 
-  return expanded;
+  const sorted = Array.from(expanded);
+  sorted.sort();
+  return sorted;
 }
 
 async function findYamlFiles(dirPath: string): Promise<readonly string[]> {

diff --git a/examples/features/compare/evals/dataset.eval.yaml b/examples/features/compare/evals/dataset.eval.yaml
@@ -0,0 +1,51 @@
+# Demo eval for the compare example.
+# Run against two targets to generate baseline and candidate result files:
+#   agentv eval evals/dataset.eval.yaml --target baseline
+#   agentv eval evals/dataset.eval.yaml --target candidate
+# Then compare:
+#   agentv compare evals/baseline-results.jsonl evals/candidate-results.jsonl
+
+name: compare-demo
+description: Demo eval for generating baseline and candidate results to compare
+
+tests:
+  - id: code-review-001
+    input: Review the following code for bugs and suggest improvements.
+    criteria: Identifies at least one issue and suggests a fix
+    assertions:
+      - type: contains
+        value: bug
+      - type: contains
+        value: fix
+
+  - id: code-review-002
+    input: Explain what this function does and how it could be optimized.
+    criteria: Provides a clear explanation and at least one optimization suggestion
+    assertions:
+      - type: contains
+        value: function
+      - type: contains
+        value: optim
+
+  - id: code-review-003
+    input: Review this error handling code for edge cases and missing checks.
+    criteria: Identifies missing error handling or edge cases
+    assertions:
+      - type: contains
+        value: error
+      - type: regex
+        value: "edge.?case|missing|exception|null"
+
+  - id: code-gen-001
+    input: Write a function that checks if a string is a palindrome.
+    criteria: Returns working code that handles basic palindrome cases
+    assertions:
+      - type: contains
+        value: palindrome
+
+  - id: code-gen-002
+    input: Write a function that finds the longest common subsequence of two strings.
+    criteria: Returns a correct implementation with reasonable time complexity
+    assertions:
+      - type: regex
+        value: "subsequence|lcs|LCS"
diff --git a/examples/features/env-interpolation/evals/dataset.eval.yaml b/examples/features/env-interpolation/evals/dataset.eval.yaml
@@ -4,12 +4,10 @@
 # Missing variables resolve to empty string.
 #
 # Usage:
-#   export EVAL_CRITERIA="Responds with a friendly greeting"
 #   export CUSTOM_SYSTEM_PROMPT="You are a helpful assistant who always greets warmly."
 #   agentv eval examples/features/env-interpolation/evals/dataset.eval.yaml
 #
 # Or use a .env file in the project root:
-#   EVAL_CRITERIA=Responds with a friendly greeting
 #   CUSTOM_SYSTEM_PROMPT=You are a helpful assistant who always greets warmly.
 
 description: Demonstrates ${{ VAR }} interpolation in eval fields
@@ -20,13 +18,13 @@ execution:
 tests:
   # Full-value interpolation: entire field value from env var
   - id: full-value
-    criteria: "${{ EVAL_CRITERIA }}"
+    criteria: Responds with a friendly greeting
     input: "Hello!"
-    expected_output: "Hello! How can I help you today?"
+    expected_output: "${{ EXPECTED_GREETING }}"
 
   # Partial/inline interpolation: env var embedded in a larger string
   - id: partial-value
-    criteria: "Response uses the system prompt persona and ${{ EVAL_CRITERIA }}"
+    criteria: Response uses the system prompt persona
     input:
       - role: system
         content: "${{ CUSTOM_SYSTEM_PROMPT }}"

diff --git a/examples/features/trace-analysis/evals/dataset.eval.yaml b/examples/features/trace-analysis/evals/dataset.eval.yaml
@@ -0,0 +1,34 @@
+# Demo eval for the trace-analysis example.
+# Run this eval to generate result traces, then analyze with:
+#   agentv trace evals/multi-agent.eval.results.jsonl
+
+name: trace-analysis-demo
+description: Demo eval for generating execution traces to analyze
+
+tests:
+  - id: research-question
+    input: What are the key differences between REST and GraphQL APIs?
+    criteria: Covers at least three differences including query flexibility, over-fetching, and type system
+    assertions:
+      - type: contains
+        value: REST
+      - type: contains
+        value: GraphQL
+      - type: regex
+        value: "type.?system|schema|typed"
+
+  - id: code-review-task
+    input: Review this Python function for potential issues and suggest improvements.
+    criteria: Identifies at least one code quality issue
+    assertions:
+      - type: contains
+        value: suggest
+      - type: regex
+        value: "improv|fix|refactor|optim"
+
+  - id: simple-qa
+    input: What is the capital of France?
+    criteria: Correctly answers Paris
+    assertions:
+      - type: contains
+        value: Paris
diff --git a/packages/core/src/evaluation/validation/eval-validator.ts b/packages/core/src/evaluation/validation/eval-validator.ts
@@ -523,16 +523,6 @@ function validateMetadata(parsed: JsonObject, filePath: string, errors: Validati
         });
       }
     }
-
-    // Warn if name is present but description is missing
-    if (!('description' in parsed) || parsed.description === undefined) {
-      errors.push({
-        severity: 'warning',
-        filePath,
-        location: 'name',
-        message: "When 'name' is present, 'description' should also be provided.",
-      });
-    }
   }
 }
 

diff --git a/packages/core/test/evaluation/validation/eval-validator.test.ts b/packages/core/test/evaluation/validation/eval-validator.test.ts
@@ -457,23 +457,6 @@ describe('validateEvalFile', () => {
   });
 
   describe('metadata validation', () => {
-    it('warns when name is present without description', async () => {
-      const filePath = path.join(tempDir, 'meta-name-only.yaml');
-      await writeFile(
-        filePath,
-        `name: my-eval
-tests:
-  - id: test-1
-    input: "Query"
-`,
-      );
-
-      const result = await validateEvalFile(filePath);
-
-      const warnings = result.errors.filter((e) => e.severity === 'warning');
-      expect(warnings.some((e) => e.message.includes('description'))).toBe(true);
-    });
-
     it('warns when name has invalid format', async () => {
       const filePath = path.join(tempDir, 'meta-invalid-name.yaml');
       await writeFile(

diff --git a/packages/core/tsup.config.ts b/packages/core/tsup.config.ts
@@ -3,6 +3,7 @@ import { defineConfig } from 'tsup';
 export default defineConfig({
   entry: ['src/index.ts', 'src/evaluation/validation/index.ts'],
   format: ['esm', 'cjs'],
+  shims: true,
   sourcemap: true,
   clean: true,
   dts: {