From 6f76ad1a8769b49e3471c663abdc2071dbe7ac2d Mon Sep 17 00:00:00 2001 From: "sentry-junior[bot]" <264270552+sentry-junior[bot]@users.noreply.github.com> Date: Sat, 23 May 2026 02:34:44 +0000 Subject: [PATCH 1/2] feat(cli): Add init command to generate baseline eval config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `vitest-evals init` — a zero-dependency CLI command that generates `vitest.evals.config.ts` and adds the `evals` and `evals:record` scripts to `package.json`, exactly matching the Configure Vitest section in the public docs. The command is idempotent (safe to rerun), detects and reports conflicts before writing, and accepts `--force` to overwrite differing config or scripts. `--cwd` targets a different project directory. Co-authored-by: David Cramer --- [View Session in Sentry](https://sentry.sentry.io/traces/?project=4510944073809921&query=gen_ai.conversation.id%3A%22slack%3AC0B595QDZLL%3A1779502924.003319%22) --- packages/docs/src/content/docs/docs.mdx | 9 + packages/vitest-evals/README.md | 13 ++ packages/vitest-evals/package.json | 7 +- packages/vitest-evals/src/cli.ts | 92 ++++++++++ packages/vitest-evals/src/cli/init.test.ts | 191 +++++++++++++++++++++ packages/vitest-evals/src/cli/init.ts | 159 +++++++++++++++++ 6 files changed, 468 insertions(+), 3 deletions(-) create mode 100644 packages/vitest-evals/src/cli.ts create mode 100644 packages/vitest-evals/src/cli/init.test.ts create mode 100644 packages/vitest-evals/src/cli/init.ts diff --git a/packages/docs/src/content/docs/docs.mdx b/packages/docs/src/content/docs/docs.mdx index a13bb34..0ed6593 100644 --- a/packages/docs/src/content/docs/docs.mdx +++ b/packages/docs/src/content/docs/docs.mdx @@ -36,6 +36,15 @@ Keep evals on their own command and Vitest config. The separate config keeps longer provider timeouts, eval-only includes, reporter setup, and replay defaults out of unit tests. +Run the `init` command to generate the baseline config and add the eval scripts +to `package.json` automatically: + +```bash +pnpm exec vitest-evals init +``` + +Or add the files by hand: + ```json title="package.json" { "scripts": { diff --git a/packages/vitest-evals/README.md b/packages/vitest-evals/README.md index 9101992..2e53a96 100644 --- a/packages/vitest-evals/README.md +++ b/packages/vitest-evals/README.md @@ -26,6 +26,19 @@ For GitHub Actions summaries and annotations, emit Vitest JSON and use the native `getsentry/vitest-evals` action. No extra npm package is needed in the workflow. +## Init + +Run `init` to generate the baseline Vitest config and add eval scripts to +`package.json`: + +```sh +pnpm exec vitest-evals init +``` + +This creates `vitest.evals.config.ts` and adds `evals` and `evals:record` +scripts. Rerunning is safe — existing identical content is left unchanged. +Pass `--force` to overwrite conflicting config or scripts. + ## Core Model - `describeEval(...)` binds exactly one harness to a suite diff --git a/packages/vitest-evals/package.json b/packages/vitest-evals/package.json index 98f76a1..d2f3d01 100644 --- a/packages/vitest-evals/package.json +++ b/packages/vitest-evals/package.json @@ -17,9 +17,10 @@ "types": "./dist/index.d.ts", "main": "./dist/index.js", "module": "./dist/index.mjs", - "files": [ - "dist" - ], + "bin": { + "vitest-evals": "./dist/cli.js" + }, + "files": ["dist"], "exports": { ".": { "source": "./src/index.ts", diff --git a/packages/vitest-evals/src/cli.ts b/packages/vitest-evals/src/cli.ts new file mode 100644 index 0000000..25ddc04 --- /dev/null +++ b/packages/vitest-evals/src/cli.ts @@ -0,0 +1,92 @@ +#!/usr/bin/env node +import { runInitCommand } from "./cli/init"; + +main().catch((error) => { + console.error(error instanceof Error ? error.message : String(error)); + process.exitCode = 1; +}); + +async function main() { + const args = process.argv.slice(2); + + if (args[0] === "--help" || args[0] === "-h" || args.length === 0) { + console.log(usage()); + return; + } + + const command = args[0]; + + if (command === "init") { + await dispatchInit(args.slice(1)); + return; + } + + console.error(`Unknown command: ${command}`); + console.error(usage()); + process.exitCode = 1; +} + +async function dispatchInit(args: string[]) { + const options = parseInitArgs(args); + + if (options.help) { + console.log(initUsage()); + return; + } + + await runInitCommand({ cwd: options.cwd, force: options.force }); +} + +function parseInitArgs(args: string[]) { + let force = false; + let cwd: string | undefined; + let help = false; + + for (let i = 0; i < args.length; i++) { + const arg = args[i]; + switch (arg) { + case "--force": + force = true; + break; + case "--cwd": { + const value = args[++i]; + if (!value) throw new Error("Missing value for --cwd"); + cwd = value; + break; + } + case "--help": + case "-h": + help = true; + break; + default: + throw new Error(`Unknown argument: ${arg}`); + } + } + + return { force, cwd, help }; +} + +function usage() { + return [ + "Usage: vitest-evals ", + "", + "Commands:", + " init Generate a baseline eval config and add scripts to package.json", + "", + "Options:", + " -h, --help Print help", + ].join("\n"); +} + +function initUsage() { + return [ + "Usage: vitest-evals init [options]", + "", + "Generate vitest.evals.config.ts and add eval scripts to package.json.", + "", + "Options:", + " --force Overwrite existing config and conflicting scripts", + " --cwd Target project directory (default: current directory)", + " -h, --help Print help", + ].join("\n"); +} diff --git a/packages/vitest-evals/src/cli/init.test.ts b/packages/vitest-evals/src/cli/init.test.ts new file mode 100644 index 0000000..6cd1281 --- /dev/null +++ b/packages/vitest-evals/src/cli/init.test.ts @@ -0,0 +1,191 @@ +import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { EVALS_CONFIG_CONTENT, EVALS_SCRIPTS, runInit } from "./init"; + +function makeTmpDir() { + const dir = join( + tmpdir(), + `vitest-evals-init-test-${Date.now()}-${Math.random().toString(36).slice(2)}`, + ); + mkdirSync(dir, { recursive: true }); + return dir; +} + +function writePkg(dir: string, pkg: Record) { + writeFileSync(join(dir, "package.json"), `${JSON.stringify(pkg, null, 2)}\n`); +} + +function readPkg(dir: string): Record { + return JSON.parse(readFileSync(join(dir, "package.json"), "utf8")) as Record< + string, + unknown + >; +} + +function readConfig(dir: string): string { + return readFileSync(join(dir, "vitest.evals.config.ts"), "utf8"); +} + +describe("runInit", () => { + let dir: string; + + beforeEach(() => { + dir = makeTmpDir(); + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it("returns no-package-json when package.json is missing", () => { + const result = runInit({ cwd: dir }); + expect(result).toEqual({ status: "no-package-json" }); + }); + + it("creates config and adds scripts on a fresh project", () => { + writePkg(dir, { name: "my-app" }); + + const result = runInit({ cwd: dir }); + + expect(result).toEqual({ + status: "ok", + wrote: [ + "vitest.evals.config.ts", + "package.json scripts.evals", + "package.json scripts.evals:record", + ], + skipped: [], + }); + + expect(readConfig(dir)).toBe(EVALS_CONFIG_CONTENT); + + const pkg = readPkg(dir); + const scripts = pkg.scripts as Record; + expect(scripts.evals).toBe(EVALS_SCRIPTS.evals); + expect(scripts["evals:record"]).toBe(EVALS_SCRIPTS["evals:record"]); + }); + + it("preserves existing scripts when adding new ones", () => { + writePkg(dir, { name: "my-app", scripts: { test: "vitest" } }); + + runInit({ cwd: dir }); + + const pkg = readPkg(dir); + const scripts = pkg.scripts as Record; + expect(scripts.test).toBe("vitest"); + expect(scripts.evals).toBe(EVALS_SCRIPTS.evals); + }); + + it("is idempotent on a second run", () => { + writePkg(dir, { name: "my-app" }); + + runInit({ cwd: dir }); + const result = runInit({ cwd: dir }); + + expect(result).toEqual({ + status: "ok", + wrote: [], + skipped: [ + "vitest.evals.config.ts", + "package.json scripts.evals", + "package.json scripts.evals:record", + ], + }); + }); + + it("returns conflict when config exists with different content", () => { + writePkg(dir, { name: "my-app" }); + writeFileSync(join(dir, "vitest.evals.config.ts"), "// custom config\n"); + + const result = runInit({ cwd: dir }); + + expect(result).toEqual({ + status: "conflict", + conflicts: ["vitest.evals.config.ts"], + }); + }); + + it("returns conflict when scripts have different values", () => { + writePkg(dir, { + name: "my-app", + scripts: { evals: "vitest run --config other.config.ts" }, + }); + + const result = runInit({ cwd: dir }); + + expect(result).toEqual({ + status: "conflict", + conflicts: ["package.json scripts.evals"], + }); + }); + + it("returns all conflicts when multiple things differ", () => { + writePkg(dir, { + name: "my-app", + scripts: { + evals: "custom", + "evals:record": "custom-record", + }, + }); + writeFileSync(join(dir, "vitest.evals.config.ts"), "// different\n"); + + const result = runInit({ cwd: dir }); + expect(result.status).toBe("conflict"); + if (result.status === "conflict") { + expect(result.conflicts).toHaveLength(3); + } + }); + + it("overwrites with --force even when conflicts exist", () => { + writePkg(dir, { + name: "my-app", + scripts: { evals: "custom", "evals:record": "custom-record" }, + }); + writeFileSync(join(dir, "vitest.evals.config.ts"), "// different\n"); + + const result = runInit({ cwd: dir, force: true }); + + expect(result.status).toBe("ok"); + expect(readConfig(dir)).toBe(EVALS_CONFIG_CONTENT); + + const pkg = readPkg(dir); + const scripts = pkg.scripts as Record; + expect(scripts.evals).toBe(EVALS_SCRIPTS.evals); + expect(scripts["evals:record"]).toBe(EVALS_SCRIPTS["evals:record"]); + }); + + it("--force preserves unrelated package.json fields and scripts", () => { + writePkg(dir, { + name: "my-app", + version: "1.2.3", + scripts: { test: "vitest", evals: "old-evals" }, + dependencies: { lodash: "^4.17.21" }, + }); + + runInit({ cwd: dir, force: true }); + + const pkg = readPkg(dir); + expect(pkg.name).toBe("my-app"); + expect(pkg.version).toBe("1.2.3"); + expect((pkg.dependencies as Record).lodash).toBe( + "^4.17.21", + ); + expect((pkg.scripts as Record).test).toBe("vitest"); + }); + + it("throws on invalid package.json JSON", () => { + writeFileSync(join(dir, "package.json"), "not valid json"); + expect(() => runInit({ cwd: dir })).toThrow(/invalid JSON/); + }); + + it("targets --cwd directory", () => { + writePkg(dir, { name: "my-app" }); + + const result = runInit({ cwd: dir }); + + expect(result.status).toBe("ok"); + expect(readConfig(dir)).toBe(EVALS_CONFIG_CONTENT); + }); +}); diff --git a/packages/vitest-evals/src/cli/init.ts b/packages/vitest-evals/src/cli/init.ts new file mode 100644 index 0000000..af39cb5 --- /dev/null +++ b/packages/vitest-evals/src/cli/init.ts @@ -0,0 +1,159 @@ +import { existsSync, readFileSync, writeFileSync } from "node:fs"; +import { join, resolve } from "node:path"; + +export type InitOptions = { + cwd?: string; + force?: boolean; +}; + +/** Generated vitest.evals.config.ts content — mirrors the Configure Vitest section in the public docs. */ +const EVALS_CONFIG_FILENAME = "vitest.evals.config.ts"; + +export const EVALS_CONFIG_CONTENT = `\ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + include: ["evals/**/*.eval.ts"], + testTimeout: 30_000, + hookTimeout: 30_000, + reporters: ["vitest-evals/reporter"], + env: { + VITEST_EVALS_REPLAY_MODE: + process.env.VITEST_EVALS_REPLAY_MODE ?? "auto", + VITEST_EVALS_REPLAY_DIR: ".vitest-evals/recordings", + }, + }, +}); +`; + +export const EVALS_SCRIPTS: Record = { + evals: "vitest run --config vitest.evals.config.ts", + "evals:record": + "VITEST_EVALS_REPLAY_MODE=record vitest run --config vitest.evals.config.ts", +}; + +export type InitResult = + | { status: "ok"; wrote: string[]; skipped: string[] } + | { status: "conflict"; conflicts: string[] } + | { status: "no-package-json" }; + +/** + * Run the init command and return a structured result. + * Does not write anything if conflicts are detected (unless force is true). + */ +export function runInit(options: InitOptions = {}): InitResult { + const cwd = resolve(options.cwd ?? process.cwd()); + const force = options.force ?? false; + + const pkgPath = join(cwd, "package.json"); + if (!existsSync(pkgPath)) { + return { status: "no-package-json" }; + } + + let pkg: Record; + try { + pkg = JSON.parse(readFileSync(pkgPath, "utf8")) as Record; + } catch { + throw new Error(`Could not parse ${pkgPath}: invalid JSON`); + } + + const configPath = join(cwd, EVALS_CONFIG_FILENAME); + + // Collect conflicts before writing anything. + const conflicts: string[] = []; + + const existingConfig = existsSync(configPath) + ? readFileSync(configPath, "utf8") + : null; + + if (existingConfig !== null && existingConfig !== EVALS_CONFIG_CONTENT) { + conflicts.push(EVALS_CONFIG_FILENAME); + } + + const existingScripts = + typeof pkg.scripts === "object" && pkg.scripts !== null + ? (pkg.scripts as Record) + : {}; + + for (const [key, value] of Object.entries(EVALS_SCRIPTS)) { + const current = existingScripts[key]; + if (current !== undefined && current !== value) { + conflicts.push(`package.json scripts.${key}`); + } + } + + if (conflicts.length > 0 && !force) { + return { status: "conflict", conflicts }; + } + + // Write files. + const wrote: string[] = []; + const skipped: string[] = []; + + if (existingConfig === EVALS_CONFIG_CONTENT) { + skipped.push(EVALS_CONFIG_FILENAME); + } else { + writeFileSync(configPath, EVALS_CONFIG_CONTENT); + wrote.push(EVALS_CONFIG_FILENAME); + } + + let scriptsChanged = false; + const scripts: Record = { ...existingScripts }; + for (const [key, value] of Object.entries(EVALS_SCRIPTS)) { + if (scripts[key] === value) { + skipped.push(`package.json scripts.${key}`); + } else { + scripts[key] = value; + wrote.push(`package.json scripts.${key}`); + scriptsChanged = true; + } + } + + if (scriptsChanged) { + const updatedPkg = { ...pkg, scripts }; + writeFileSync(pkgPath, `${JSON.stringify(updatedPkg, null, 2)}\n`); + } + + return { status: "ok", wrote, skipped }; +} + +/** + * Run init and print results to stdout/stderr. + * Sets process.exitCode on failure; throws on fatal errors. + */ +export async function runInitCommand(options: InitOptions = {}) { + const cwd = resolve(options.cwd ?? process.cwd()); + const result = runInit(options); + + switch (result.status) { + case "no-package-json": + console.error( + `No package.json found in ${cwd}. Run from your project root or pass --cwd .`, + ); + process.exitCode = 1; + break; + + case "conflict": + console.error( + "Cannot initialize: the following already exist and differ:", + ); + for (const c of result.conflicts) { + console.error(` ${c}`); + } + console.error("\nRerun with --force to overwrite."); + process.exitCode = 1; + break; + + case "ok": + if (result.wrote.length === 0) { + console.log("vitest-evals is already configured."); + } else { + for (const f of result.wrote) { + console.log(` created ${f}`); + } + console.log("\nDone. Run `pnpm evals` to run your evals."); + } + break; + } +} From 890208cf3da3b44bd6e3e94089ba9b1eddc7d688 Mon Sep 17 00:00:00 2001 From: "sentry-junior[bot]" <264270552+sentry-junior[bot]@users.noreply.github.com> Date: Sat, 23 May 2026 02:37:16 +0000 Subject: [PATCH 2/2] test(cli): Remove redundant cwd test covered by all other cases Co-authored-by: David Cramer --- packages/vitest-evals/src/cli/init.test.ts | 9 --------- 1 file changed, 9 deletions(-) diff --git a/packages/vitest-evals/src/cli/init.test.ts b/packages/vitest-evals/src/cli/init.test.ts index 6cd1281..c132791 100644 --- a/packages/vitest-evals/src/cli/init.test.ts +++ b/packages/vitest-evals/src/cli/init.test.ts @@ -179,13 +179,4 @@ describe("runInit", () => { writeFileSync(join(dir, "package.json"), "not valid json"); expect(() => runInit({ cwd: dir })).toThrow(/invalid JSON/); }); - - it("targets --cwd directory", () => { - writePkg(dir, { name: "my-app" }); - - const result = runInit({ cwd: dir }); - - expect(result.status).toBe("ok"); - expect(readConfig(dir)).toBe(EVALS_CONFIG_CONTENT); - }); });