Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions apps/cli/src/commands/results/serve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import {
resolveResultSourcePath,
} from './manifest.js';
import { patchTestIds } from './shared.js';
import { type StudioConfig, loadStudioConfig, saveStudioConfig } from './studio-config.js';

// ── Source resolution ────────────────────────────────────────────────────

Expand Down Expand Up @@ -142,8 +143,27 @@ export function createApp(
options?: { studioDir?: string },
): Hono {
const searchDir = cwd ?? resultDir;
const agentvDir = path.join(searchDir, '.agentv');
const app = new Hono();

// Studio configuration (re-read on each request so external edits are picked up)
app.get('/api/config', (c) => c.json(loadStudioConfig(agentvDir)));

app.post('/api/config', async (c) => {
try {
const body = await c.req.json<Partial<StudioConfig>>();
const current = loadStudioConfig(agentvDir);
const updated = { ...current, ...body };
if (typeof updated.pass_threshold === 'number') {
updated.pass_threshold = Math.min(1, Math.max(0, updated.pass_threshold));
}
saveStudioConfig(agentvDir, updated);
return c.json(updated);
} catch {
return c.json({ error: 'Failed to save config' }, 500);
}
});

// Dashboard HTML — serve Studio SPA (React app).
const studioDistPath = options?.studioDir ?? resolveStudioDistDir();
if (!studioDistPath || !existsSync(path.join(studioDistPath, 'index.html'))) {
Expand Down Expand Up @@ -273,12 +293,13 @@ export function createApp(
}
try {
const loaded = patchTestIds(loadManifestResults(meta.path));
const { pass_threshold } = loadStudioConfig(agentvDir);
const datasetMap = new Map<string, { total: number; passed: number; scoreSum: number }>();
for (const r of loaded) {
const ds = r.dataset ?? r.target ?? 'default';
const entry = datasetMap.get(ds) ?? { total: 0, passed: 0, scoreSum: 0 };
entry.total++;
if (r.score >= 1) entry.passed++;
if (r.score >= pass_threshold) entry.passed++;
entry.scoreSum += r.score;
datasetMap.set(ds, entry);
}
Expand All @@ -305,6 +326,7 @@ export function createApp(
}
try {
const loaded = patchTestIds(loadManifestResults(meta.path));
const { pass_threshold } = loadStudioConfig(agentvDir);
const categoryMap = new Map<
string,
{ total: number; passed: number; scoreSum: number; datasets: Set<string> }
Expand All @@ -318,7 +340,7 @@ export function createApp(
datasets: new Set<string>(),
};
entry.total++;
if (r.score >= 1) entry.passed++;
if (r.score >= pass_threshold) entry.passed++;
entry.scoreSum += r.score;
entry.datasets.add(r.dataset ?? r.target ?? 'default');
categoryMap.set(cat, entry);
Expand Down Expand Up @@ -348,13 +370,14 @@ export function createApp(
}
try {
const loaded = patchTestIds(loadManifestResults(meta.path));
const { pass_threshold } = loadStudioConfig(agentvDir);
const filtered = loaded.filter((r) => (r.category ?? DEFAULT_CATEGORY) === category);
const datasetMap = new Map<string, { total: number; passed: number; scoreSum: number }>();
for (const r of filtered) {
const ds = r.dataset ?? r.target ?? 'default';
const entry = datasetMap.get(ds) ?? { total: 0, passed: 0, scoreSum: 0 };
entry.total++;
if (r.score >= 1) entry.passed++;
if (r.score >= pass_threshold) entry.passed++;
entry.scoreSum += r.score;
datasetMap.set(ds, entry);
}
Expand Down Expand Up @@ -575,6 +598,7 @@ export function createApp(
// Experiments aggregate (group all runs by experiment)
app.get('/api/experiments', (c) => {
const metas = listResultFiles(searchDir);
const { pass_threshold } = loadStudioConfig(agentvDir);
const experimentMap = new Map<
string,
{
Expand All @@ -601,7 +625,7 @@ export function createApp(
entry.runFilenames.add(m.filename);
if (r.target) entry.targets.add(r.target);
entry.evalCount++;
if (r.score >= 1) entry.passedCount++;
if (r.score >= pass_threshold) entry.passedCount++;
if (r.timestamp && r.timestamp > entry.lastTimestamp) {
entry.lastTimestamp = r.timestamp;
}
Expand All @@ -628,6 +652,7 @@ export function createApp(
// Targets aggregate (group all runs by target)
app.get('/api/targets', (c) => {
const metas = listResultFiles(searchDir);
const { pass_threshold } = loadStudioConfig(agentvDir);
const targetMap = new Map<
string,
{
Expand All @@ -652,7 +677,7 @@ export function createApp(
entry.runFilenames.add(m.filename);
if (r.experiment) entry.experiments.add(r.experiment);
entry.evalCount++;
if (r.score >= 1) entry.passedCount++;
if (r.score >= pass_threshold) entry.passedCount++;
targetMap.set(target, entry);
}
} catch {
Expand Down
67 changes: 67 additions & 0 deletions apps/cli/src/commands/results/studio-config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/**
* Studio configuration loader.
*
* Reads an optional `config.yaml` from the `.agentv/` directory to configure
* AgentV Studio behavior (e.g., pass/fail threshold).
*
* Location: `.agentv/config.yaml`
*
* config.yaml format:
* pass_threshold: 0.8 # score >= this value is considered "pass"
*
* If no config.yaml exists, defaults are used.
*/

import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
import path from 'node:path';

import { PASS_THRESHOLD } from '@agentv/core';
import { parse as parseYaml, stringify as stringifyYaml } from 'yaml';

export interface StudioConfig {
pass_threshold: number;
}

const DEFAULTS: StudioConfig = {
pass_threshold: PASS_THRESHOLD,
};

/**
* Load studio config from `config.yaml` in the given `.agentv/` directory.
* Returns defaults when the file does not exist or is empty.
* Clamps `pass_threshold` to [0, 1].
*/
export function loadStudioConfig(agentvDir: string): StudioConfig {
const configPath = path.join(agentvDir, 'config.yaml');

if (!existsSync(configPath)) {
return { ...DEFAULTS };
}

const raw = readFileSync(configPath, 'utf-8');
const parsed = parseYaml(raw);

if (!parsed || typeof parsed !== 'object') {
return { ...DEFAULTS };
}

const threshold =
typeof parsed.pass_threshold === 'number' ? parsed.pass_threshold : DEFAULTS.pass_threshold;

return {
pass_threshold: Math.min(1, Math.max(0, threshold)),
};
}

/**
* Save studio config to `config.yaml` in the given `.agentv/` directory.
* Creates the directory if it does not exist.
*/
export function saveStudioConfig(agentvDir: string, config: StudioConfig): void {
if (!existsSync(agentvDir)) {
mkdirSync(agentvDir, { recursive: true });
}
const configPath = path.join(agentvDir, 'config.yaml');
const yamlStr = stringifyYaml(config);
writeFileSync(configPath, yamlStr, 'utf-8');
}
4 changes: 2 additions & 2 deletions apps/cli/src/commands/trace/utils.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { readFileSync, readdirSync, statSync } from 'node:fs';
import path from 'node:path';
import type { EvaluationResult, TraceSummary } from '@agentv/core';
import { toCamelCaseDeep } from '@agentv/core';
import { PASS_THRESHOLD, toCamelCaseDeep } from '@agentv/core';
import {
RESULT_INDEX_FILENAME,
RESULT_RUNS_DIRNAME,
Expand Down Expand Up @@ -596,7 +596,7 @@ export function listResultFiles(cwd: string, limit?: number): ResultFileMeta[] {
const results = loadResultFile(filePath);

const testCount = results.length;
const passCount = results.filter((r) => r.score >= 1.0).length;
const passCount = results.filter((r) => r.score >= PASS_THRESHOLD).length;
const passRate = testCount > 0 ? passCount / testCount : 0;
const avgScore = testCount > 0 ? results.reduce((sum, r) => sum + r.score, 0) / testCount : 0;

Expand Down
55 changes: 55 additions & 0 deletions apps/cli/test/commands/results/studio-config.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
import { mkdtempSync, rmSync, writeFileSync } from 'node:fs';
import { tmpdir } from 'node:os';
import path from 'node:path';

import { PASS_THRESHOLD } from '@agentv/core';

import { loadStudioConfig } from '../../../src/commands/results/studio-config.js';

describe('loadStudioConfig', () => {
let tempDir: string;

beforeEach(() => {
tempDir = mkdtempSync(path.join(tmpdir(), 'studio-config-'));
});

afterEach(() => {
rmSync(tempDir, { recursive: true, force: true });
});

it('returns defaults when no config.yaml exists', () => {
const config = loadStudioConfig(tempDir);
expect(config.pass_threshold).toBe(PASS_THRESHOLD);
});

it('reads pass_threshold from config.yaml', () => {
writeFileSync(path.join(tempDir, 'config.yaml'), 'pass_threshold: 0.6\n');
const config = loadStudioConfig(tempDir);
expect(config.pass_threshold).toBe(0.6);
});

it('clamps pass_threshold to 0 when negative', () => {
writeFileSync(path.join(tempDir, 'config.yaml'), 'pass_threshold: -0.5\n');
const config = loadStudioConfig(tempDir);
expect(config.pass_threshold).toBe(0);
});

it('clamps pass_threshold to 1 when above 1', () => {
writeFileSync(path.join(tempDir, 'config.yaml'), 'pass_threshold: 1.5\n');
const config = loadStudioConfig(tempDir);
expect(config.pass_threshold).toBe(1);
});

it('returns defaults for empty config.yaml', () => {
writeFileSync(path.join(tempDir, 'config.yaml'), '');
const config = loadStudioConfig(tempDir);
expect(config.pass_threshold).toBe(PASS_THRESHOLD);
});

it('returns defaults when pass_threshold is not a number', () => {
writeFileSync(path.join(tempDir, 'config.yaml'), 'pass_threshold: "high"\n');
const config = loadStudioConfig(tempDir);
expect(config.pass_threshold).toBe(PASS_THRESHOLD);
});
});
Loading
Loading