Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion plugin/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"scripts": {
"build": "node ./scripts/build.mjs",
"test": "node --test test/*.test.mjs",
"test:ts": "bun test test/*.test.ts",
"test:ts": "bun test --isolate test/*.test.ts",
"prepack": "npm run build && npm test && npm run test:ts"
},
"dependencies": {
Expand Down
105 changes: 51 additions & 54 deletions plugin/test/run-loop.test.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import { afterEach, expect, mock, test } from "bun:test"
import { expect, mock, test } from "bun:test"

import type { EvalItem, EvalOutput, EvalResultItem } from "../lib/run-eval"

const calls: { evalResults: EvalOutput }[] = []

const result = (
query: string,
overrides: Partial<EvalResultItem> = {},
Expand All @@ -19,59 +17,56 @@ const result = (
...overrides,
})

Comment thread
antongulin marked this conversation as resolved.
mock.module("../lib/utils", () => ({
parseSkillMd: () => ({
name: "warning-skill",
description: "original description",
fullContent: "skill content",
}),
}))
test("runLoop derives train warnings from train results and prints unique split warnings", async () => {
const calls: { evalResults: EvalOutput }[] = []

mock.module("../lib/utils", () => ({
parseSkillMd: () => ({
name: "warning-skill",
description: "original description",
fullContent: "skill content",
}),
}))

mock.module("../lib/run-eval", () => ({
findProjectRoot: () => "/tmp/project",
buildEvalWarnings: (results: EvalResultItem[]) => {
const shouldTriggerResults = results.filter((r) => r.should_trigger)
if (shouldTriggerResults.length === 0) return []
return shouldTriggerResults.every((r) => r.triggers === 0 && r.errors === 0)
? ["all-zero warning"]
: []
},
runEval: () => ({
skill_name: "warning-skill",
description: "original description",
results: [
result("train trigger"),
result("train negative", { should_trigger: false }),
result("test trigger"),
],
warnings: [],
summary: {
passed: 1,
failed: 2,
total: 3,
run_errors: 0,
queries_with_errors: 0,
mock.module("../lib/run-eval", () => ({
findProjectRoot: () => "/tmp/project",
buildEvalWarnings: (results: EvalResultItem[]) => {
const shouldTriggerResults = results.filter((r) => r.should_trigger)
if (shouldTriggerResults.length === 0) return []
return shouldTriggerResults.every((r) => r.triggers === 0 && r.errors === 0)
? ["all-zero warning"]
: []
},
}),
}))
runEval: () => ({
skill_name: "warning-skill",
description: "original description",
results: [
result("train trigger"),
result("train negative", { should_trigger: false }),
result("test trigger"),
],
warnings: [],
summary: {
passed: 1,
failed: 2,
total: 3,
run_errors: 0,
queries_with_errors: 0,
},
}),
}))

mock.module("../lib/improve-description", () => ({
improveDescription: (opts: { evalResults: EvalOutput }) => {
calls.push({ evalResults: opts.evalResults })
return "improved description"
},
}))
mock.module("../lib/improve-description", () => ({
improveDescription: (opts: { evalResults: EvalOutput }) => {
calls.push({ evalResults: opts.evalResults })
return "improved description"
},
}))

mock.module("../lib/report", () => ({
generateHtml: () => "<html></html>",
}))
mock.module("../lib/report", () => ({
generateHtml: () => "<html></html>",
}))

afterEach(() => {
calls.length = 0
})

test("runLoop derives train warnings from train results and prints unique split warnings", async () => {
const { runLoop } = await import("../lib/run-loop")
const evalSet: EvalItem[] = [
{ query: "train trigger", should_trigger: true },
{ query: "train negative", should_trigger: false },
Expand All @@ -84,6 +79,7 @@ test("runLoop derives train warnings from train results and prints unique split
}

try {
const { runLoop } = await import("../lib/run-loop")
await runLoop({
evalSet,
skillPath: "/tmp/skill/SKILL.md",
Expand All @@ -97,10 +93,11 @@ test("runLoop derives train warnings from train results and prints unique split
agent: undefined,
verbose: true,
})

expect(calls[0]?.evalResults.warnings).toEqual(["all-zero warning"])
expect(errors.filter((line) => line === "Warning: all-zero warning")).toHaveLength(2)
} finally {
console.error = originalError
mock.restore()
}

expect(calls[0]?.evalResults.warnings).toEqual(["all-zero warning"])
expect(errors.filter((line) => line === "Warning: all-zero warning")).toHaveLength(2)
})