diff --git a/src/agent/core/interfaces/i-harness-store.ts b/src/agent/core/interfaces/i-harness-store.ts index 0e7eb395f..82190e6df 100644 --- a/src/agent/core/interfaces/i-harness-store.ts +++ b/src/agent/core/interfaces/i-harness-store.ts @@ -66,6 +66,13 @@ export interface IHarnessStore { */ deleteOutcomes(projectId: string, commandType: string): Promise + /** + * Remove the pin for a `(projectId, commandType)` pair. + * Idempotent — returns `true` if a pin existed and was removed, + * `false` if no pin was set. + */ + deletePin(projectId: string, commandType: string): Promise + /** * Delete a single scenario by its `(projectId, commandType, scenarioId)` key. * Returns `true` when the scenario existed and was deleted; `false` on miss. @@ -78,6 +85,23 @@ export interface IHarnessStore { scenarioId: string, ): Promise + /** + * Delete every scenario for a `(projectId, commandType)` pair. + * Returns the number of records deleted. Used by `brv harness reset`. + */ + deleteScenarios(projectId: string, commandType: string): Promise + + /** + * Delete a single version by its `(projectId, commandType, versionId)` key. + * Returns `true` when the version existed and was deleted; `false` on miss. + * Used by `brv harness reset` to clear all versions for a pair. + */ + deleteVersion( + projectId: string, + commandType: string, + versionId: string, + ): Promise + /** * Return the most-recently-written version for a `(projectId, commandType)` * pair — ranked by the stored `version` number, not by `heuristic`. This diff --git a/src/agent/infra/agent/cipher-agent.ts b/src/agent/infra/agent/cipher-agent.ts index 588bebc92..5cb6bd415 100644 --- a/src/agent/infra/agent/cipher-agent.ts +++ b/src/agent/infra/agent/cipher-agent.ts @@ -13,6 +13,7 @@ import type {AgentState, ExecutionContext, ICipherAgent} from '../../core/interf import type {IHistoryStorage} from '../../core/interfaces/i-history-storage.js' import type {ITokenizer} from '../../core/interfaces/i-tokenizer.js' import type {FileSystemService} from '../file-system/file-system-service.js' +import type {HarnessSynthesizer} from '../harness/harness-synthesizer.js' import type {MemoryManager} from '../memory/memory-manager.js' import type {ProcessService} from '../process/process-service.js' import type {SystemPromptManager} from '../system-prompt/system-prompt-manager.js' @@ -513,6 +514,15 @@ export class CipherAgent extends BaseAgent implements ICipherAgent { } } + /** + * Public accessor for the harness synthesizer. Used by `brv harness refine` + * via the agent-process task dispatch. Returns `undefined` when harness is + * disabled (synthesizer was never instantiated). + */ + public getHarnessSynthesizer(): HarnessSynthesizer | undefined { + return this.services?.harnessSynthesizer + } + /** * Get an existing session or create a new one. */ @@ -572,7 +582,7 @@ export class CipherAgent extends BaseAgent implements ICipherAgent { protected override async initializeServices(): Promise { // Pass pre-created event bus to service initializer - return createCipherAgentServices(this.config, this._agentEventBus) + return createCipherAgentServices(this.config, this._agentEventBus, () => this.buildHttpConfig()) } /** @@ -660,6 +670,8 @@ export class CipherAgent extends BaseAgent implements ICipherAgent { this.rebindCurateTools(services, httpConfig, sessionLLMConfig) } + // === Protected Methods (implement abstract from BaseAgent) === + /** * Reset the agent to initial state. * Resets execution state only. To reset sessions, use resetSession(sessionId). @@ -671,8 +683,6 @@ export class CipherAgent extends BaseAgent implements ICipherAgent { } } - // === Protected Methods (implement abstract from BaseAgent) === - /** * Reset a specific session's conversation history. * @param sessionId - The session ID to reset diff --git a/src/agent/infra/agent/service-initializer.ts b/src/agent/infra/agent/service-initializer.ts index 7d698345f..3a54bca1f 100644 --- a/src/agent/infra/agent/service-initializer.ts +++ b/src/agent/infra/agent/service-initializer.ts @@ -138,6 +138,7 @@ export type {CipherAgentServices, SessionManagerConfig, SessionServices} from '. export async function createCipherAgentServices( config: ValidatedAgentConfig, agentEventBus: AgentEventBus, + httpConfigProvider?: () => ByteRoverHttpConfig, ): Promise { // 1. Logger (uses provided event bus ) const logger = new EventBasedLogger(agentEventBus, 'CipherAgent') @@ -333,6 +334,9 @@ export async function createCipherAgentServices( : config.providerApiKey, baseUrl: config.providerBaseUrl, headers: config.providerHeaders, + // byterover provider needs httpConfig for API routing (sessionKey, + // projectId, spaceId, teamId). Resolved lazily from CipherAgent. + httpConfig: httpConfigProvider ? {...httpConfigProvider()} : undefined, httpReferer: config.httpReferer, maxTokens: 4096, model: refinementModel, diff --git a/src/agent/infra/harness/harness-store.ts b/src/agent/infra/harness/harness-store.ts index 470b6de92..2a73ceb44 100644 --- a/src/agent/infra/harness/harness-store.ts +++ b/src/agent/infra/harness/harness-store.ts @@ -119,6 +119,14 @@ export class HarnessStore implements IHarnessStore { // ── scenarios ───────────────────────────────────────────────────────────── + async deletePin(projectId: string, commandType: string): Promise { + const key = this.pinKey(projectId, commandType) + const exists = await this.keyStorage.get(key) + if (exists === undefined) return false + await this.keyStorage.delete(key) + return true + } + async deleteScenario( projectId: string, commandType: string, @@ -149,6 +157,51 @@ export class HarnessStore implements IHarnessStore { // ── versions ─────────────────────────────────────────────────────────────── + async deleteScenarios(projectId: string, commandType: string): Promise { + const keys: StorageKey[] = [] + for (const projectType of ProjectTypeSchema.options) { + // eslint-disable-next-line no-await-in-loop + const entries = await this.keyStorage.listWithValues([ + HARNESS_PREFIX, + SCENARIO_PREFIX, + projectType, + projectId, + commandType, + ]) + for (const entry of entries) keys.push(entry.key) + } + + if (keys.length === 0) return 0 + + const operations: BatchOperation[] = keys.map((key) => ({key, type: 'delete' as const})) + await this.keyStorage.batch(operations) + this.logger.debug('HarnessStore.deleteScenarios cleared partition', { + commandType, + deleted: keys.length, + projectId, + }) + + return keys.length + } + + async deleteVersion( + projectId: string, + commandType: string, + versionId: string, + ): Promise { + const key = this.versionKey(projectId, commandType, versionId) + const exists = await this.keyStorage.exists(key) + if (!exists) return false + + await this.keyStorage.delete(key) + this.logger.debug('HarnessStore.deleteVersion removed entry', { + commandType, + projectId, + versionId, + }) + return true + } + async getLatest(projectId: string, commandType: string): Promise { // Delegate to `listVersions` rather than re-deriving the "max version" // comparator — a future change to the sort key can't silently break diff --git a/src/agent/infra/llm/agent-llm-service.ts b/src/agent/infra/llm/agent-llm-service.ts index 43171036d..9a2f21e9d 100644 --- a/src/agent/infra/llm/agent-llm-service.ts +++ b/src/agent/infra/llm/agent-llm-service.ts @@ -29,6 +29,7 @@ import type {CompactionService} from './context/compaction/compaction-service.js import type {ICompressionStrategy} from './context/compression/types.js' import {getErrorMessage} from '../../../server/utils/error-helpers.js' +import {sanitizeProjectPath} from '../../../server/utils/path-utils.js' import {AgentStateMachine} from '../../core/domain/agent/agent-state-machine.js' import {AgentState, TerminationReason} from '../../core/domain/agent/agent-state.js' import {LlmGenerationError, LlmMaxIterationsError, LlmResponseParsingError} from '../../core/domain/errors/llm-error.js' @@ -901,14 +902,11 @@ export class AgentLLMService implements ILLMService { } try { - // Slug/path gap workaround (known issue — see - // outcome-collection.test.ts:32): the recorder derives projectId - // from `environmentContext.workingDirectory`, so we use the same - // source here. `bootstrapIfNeeded` takes both `projectId` (for - // store key partitioning) and `workingDirectory` (for filesystem - // detection); at present they're the same value, aliased for - // readability at the call site. - const projectId = this.workingDirectory + // projectId is the sanitized working directory — FileKeyStorage + // rejects path separators in key segments, so the raw absolute + // path cannot be used as a store partition key. `workingDirectory` + // stays unsanitized for filesystem detection (template language). + const projectId = sanitizeProjectPath(this.workingDirectory) const {workingDirectory} = this await harnessBootstrap.bootstrapIfNeeded(projectId, commandType, workingDirectory) diff --git a/src/agent/infra/sandbox/sandbox-service.ts b/src/agent/infra/sandbox/sandbox-service.ts index 79eda3f4e..47312e989 100644 --- a/src/agent/infra/sandbox/sandbox-service.ts +++ b/src/agent/infra/sandbox/sandbox-service.ts @@ -21,6 +21,7 @@ import type { HarnessOutcomeRecorder } from '../harness/harness-outcome-recorder import type { SessionManager } from '../session/session-manager.js' import type { ISearchKnowledgeService, ToolsSDK } from './tools-sdk.js' +import { sanitizeProjectPath } from '../../../server/utils/path-utils.js' import { ProjectTypeSchema } from '../../core/domain/harness/types.js' import {HarnessEvaluatorError} from '../harness/harness-evaluator-errors.js' import { OpsCounter } from '../harness/ops-counter.js' @@ -232,7 +233,7 @@ export class SandboxService implements ISandboxService { conversationTurn: config?.conversationTurn, executionTimeMs: result.executionTime, harnessVersionId: this.harnessVersionIdBySession.get(sessionId), - projectId: this.environmentContext.workingDirectory, + projectId: sanitizeProjectPath(this.environmentContext.workingDirectory), projectType: this.resolveProjectType(), result, sessionId, diff --git a/src/oclif/commands/harness/refine.ts b/src/oclif/commands/harness/refine.ts new file mode 100644 index 000000000..6e0d11e52 --- /dev/null +++ b/src/oclif/commands/harness/refine.ts @@ -0,0 +1,204 @@ +/** + * `brv harness refine` — force-trigger a refinement attempt. + * + * Submits a `harness-refine` task to the daemon, which calls + * `HarnessSynthesizer.refineIfNeeded` in the agent process. Waits + * for the result (up to 60s) and prints the outcome. + * + * Respects the synthesizer's single-flight queue — if another + * refinement is already in progress for the pair, the user sees + * a "refinement already running" message and exits 0. + */ + +import {Command, Flags} from '@oclif/core' +import {randomUUID} from 'node:crypto' + +import type {SynthesisResult} from '../../../agent/infra/harness/harness-synthesizer.js' + +import {resolveProject} from '../../../server/infra/project/resolve-project.js' +import {HARNESS_NOT_ENABLED_REASON} from '../../../shared/constants/harness.js' +import {TaskEvents} from '../../../shared/transport/events/index.js' +import {withDaemonRetry} from '../../lib/daemon-client.js' +import { + HARNESS_COMMAND_TYPES, + isHarnessCommandType, + openHarnessStoreForProject, +} from '../../lib/harness-cli.js' +import {waitForTaskCompletion} from '../../lib/task-client.js' + +// --------------------------------------------------------------------------- +// Public types — tested directly by unit tests +// --------------------------------------------------------------------------- + +export type RefineJsonPayload = { + readonly accepted: boolean + readonly fromVersion?: number + readonly reason?: string + readonly toVersion?: number +} + +// --------------------------------------------------------------------------- +// Pure logic — unit-testable without oclif or daemon +// --------------------------------------------------------------------------- + +/** Format the synthesis result for text output. */ +export function renderRefineText( + result?: SynthesisResult, + fromVersion?: number, + toVersion?: number, +): string { + if (result === undefined) { + return 'No refinement performed — nothing to refine or refinement already running.' + } + + if (result.accepted) { + const delta = result.deltaH === undefined ? '' : ` (ΔH: +${result.deltaH.toFixed(2)})` + return `Refinement accepted — v${fromVersion} → v${toVersion}${delta}` + } + + return `Refinement rejected — ${result.reason ?? 'unknown reason'}` +} + +/** Build the JSON payload matching the event shape. */ +export function formatRefineResult( + result?: SynthesisResult, + fromVersion?: number, + toVersion?: number, +): RefineJsonPayload { + if (result === undefined) { + return {accepted: false, reason: 'no refinement performed — skipped'} + } + + if (result.accepted) { + return { + accepted: true, + fromVersion, + toVersion, + } + } + + return { + accepted: false, + fromVersion, + reason: result.reason, + } +} + +// --------------------------------------------------------------------------- +// oclif command +// --------------------------------------------------------------------------- + +export default class HarnessRefine extends Command { + static override description = 'Force-trigger a refinement attempt for a pair' + static override flags = { + commandType: Flags.string({ + default: 'curate', + description: 'Harness pair command type', + options: [...HARNESS_COMMAND_TYPES], + }), + format: Flags.string({ + default: 'text', + description: 'Output format', + options: ['json', 'text'], + }), + } + + async run(): Promise { + const {flags} = await this.parse(HarnessRefine) + + if (!isHarnessCommandType(flags.commandType)) { + this.error(`invalid --commandType value '${flags.commandType}'`, {exit: 1}) + } + + const format = flags.format === 'json' ? 'json' : 'text' + const projectRoot = resolveProject()?.projectRoot ?? process.cwd() + + // Read the current version number before refinement for display + const opened = await openHarnessStoreForProject(projectRoot) + let fromVersion: number | undefined + if (opened) { + try { + const latest = await opened.store.getLatest(opened.projectId, flags.commandType) + fromVersion = latest?.version + } finally { + opened.close() + } + } + + // Submit harness-refine task to daemon + let taskResult: string | undefined + try { + taskResult = await withDaemonRetry(async (client) => { + const taskId = randomUUID() + const completionPromise = new Promise((resolve, reject) => { + waitForTaskCompletion( + { + client, + command: 'harness refine', + format, + onCompleted: (r) => resolve(r.result ?? ''), + onError: (r) => reject(new Error(r.error.message)), + taskId, + timeoutMs: 60_000, + }, + () => {}, + ).catch(reject) + }) + + await client.requestWithAck(TaskEvents.CREATE, { + clientCwd: process.cwd(), + content: flags.commandType, + projectPath: projectRoot, + taskId, + type: 'harness-refine', + }) + + return completionPromise + }) + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + if (format === 'json') { + this.log(JSON.stringify({accepted: false, error: message}, null, 2)) + this.exit(2) + } else { + this.error(`Refinement failed: ${message}`, {exit: 2}) + } + + return + } + + // Parse the result from the agent process with minimal shape guard + let synthesisResult: SynthesisResult | undefined + try { + const parsed: unknown = taskResult ? JSON.parse(taskResult) : undefined + synthesisResult = + parsed !== null && typeof parsed === 'object' && 'accepted' in parsed + ? (parsed as SynthesisResult) + : undefined + } catch { + synthesisResult = undefined + } + + // Synthesizer unavailable — exit 2 per spec + if (synthesisResult?.reason === HARNESS_NOT_ENABLED_REASON) { + if (format === 'json') { + this.log(JSON.stringify({accepted: false, error: HARNESS_NOT_ENABLED_REASON}, null, 2)) + this.exit(2) + } else { + this.error('Harness is not enabled — configure harness.enabled in .brv/config.json', {exit: 2}) + } + + return + } + + // toVersion inferred — SynthesisResult carries toVersionId (UUID) but + // not the numeric version. Safe for strictly sequential refinement. + const toVersion = synthesisResult?.accepted ? (fromVersion === undefined ? undefined : fromVersion + 1) : undefined + + if (format === 'json') { + this.log(JSON.stringify(formatRefineResult(synthesisResult, fromVersion, toVersion), null, 2)) + } else { + this.log(renderRefineText(synthesisResult, fromVersion, toVersion)) + } + } +} diff --git a/src/oclif/commands/harness/reset.ts b/src/oclif/commands/harness/reset.ts new file mode 100644 index 000000000..a3d842a64 --- /dev/null +++ b/src/oclif/commands/harness/reset.ts @@ -0,0 +1,186 @@ +/** + * `brv harness reset` — wipe all harness state for a pair. + * + * Deletes versions, outcomes, scenarios, and pin for a + * `(projectId, commandType)` pair. Interactive confirmation prompt + * by default; `--force` skips it for scripts + CI. + */ + +import {confirm} from '@inquirer/prompts' +import {Command, Flags} from '@oclif/core' + +import type {IHarnessStore} from '../../../agent/core/interfaces/i-harness-store.js' + +import {resolveProject} from '../../../server/infra/project/resolve-project.js' +import { + HARNESS_COMMAND_TYPES, + isHarnessCommandType, + openHarnessStoreForProject, +} from '../../lib/harness-cli.js' + +// --------------------------------------------------------------------------- +// Public types — tested directly by unit tests +// --------------------------------------------------------------------------- + +export type ResetArtifactCounts = { + readonly outcomes: number + readonly scenarios: number + readonly versions: number +} + +// --------------------------------------------------------------------------- +// Pure logic — unit-testable without oclif +// --------------------------------------------------------------------------- + +/** + * Count artifacts that would be deleted. Shown in the confirmation + * prompt so the user knows what they're about to lose. + */ +export async function countArtifacts( + store: IHarnessStore, + projectId: string, + commandType: string, +): Promise { + const [versions, outcomes, scenarios] = await Promise.all([ + store.listVersions(projectId, commandType), + // Recorder caps at ~200 outcomes per pair, so this is effectively "all". + store.listOutcomes(projectId, commandType, 10_000), + store.listScenarios(projectId, commandType), + ]) + + return { + outcomes: outcomes.length, + scenarios: scenarios.length, + versions: versions.length, + } +} + +/** + * Delete every harness artifact for a `(projectId, commandType)` pair. + * Order: outcomes → scenarios → versions (each version individually). + */ +export async function executeReset( + store: IHarnessStore, + projectId: string, + commandType: string, +): Promise { + const outcomesDeleted = await store.deleteOutcomes(projectId, commandType) + const scenariosDeleted = await store.deleteScenarios(projectId, commandType) + + const versions = await store.listVersions(projectId, commandType) + for (const v of versions) { + // eslint-disable-next-line no-await-in-loop + await store.deleteVersion(projectId, commandType, v.id) + } + + await store.deletePin(projectId, commandType) + + return { + outcomes: outcomesDeleted, + scenarios: scenariosDeleted, + versions: versions.length, + } +} + +/** Format the reset result for text output. */ +export function renderResetText(counts: ResetArtifactCounts): string { + const total = counts.versions + counts.outcomes + counts.scenarios + if (total === 0) return 'Nothing to delete — pair has no stored state.' + + const parts: string[] = [] + if (counts.versions > 0) parts.push(`${counts.versions} version${counts.versions === 1 ? '' : 's'}`) + if (counts.outcomes > 0) parts.push(`${counts.outcomes} outcome${counts.outcomes === 1 ? '' : 's'}`) + if (counts.scenarios > 0) parts.push(`${counts.scenarios} scenario${counts.scenarios === 1 ? '' : 's'}`) + + return `Deleted ${parts.join(', ')}.` +} + +// --------------------------------------------------------------------------- +// oclif command +// --------------------------------------------------------------------------- + +export default class HarnessReset extends Command { + static override description = 'Delete all harness state for a (project, commandType) pair' + static override flags = { + commandType: Flags.string({ + default: 'curate', + description: 'Harness pair command type', + options: [...HARNESS_COMMAND_TYPES], + }), + force: Flags.boolean({ + default: false, + description: 'Skip confirmation prompt', + }), + format: Flags.string({ + default: 'text', + description: 'Output format', + options: ['json', 'text'], + }), + } + + async run(): Promise { + const {flags} = await this.parse(HarnessReset) + + if (!isHarnessCommandType(flags.commandType)) { + this.error(`invalid --commandType value '${flags.commandType}'`, {exit: 1}) + } + + const projectRoot = resolveProject()?.projectRoot ?? process.cwd() + const opened = await openHarnessStoreForProject(projectRoot) + + if (!opened) { + const msg = 'Nothing to delete — pair has no stored state.' + if (flags.format === 'json') { + this.log(JSON.stringify({outcomes: 0, scenarios: 0, versions: 0}, null, 2)) + } else { + this.log(msg) + } + + return + } + + try { + const {projectId, store} = opened + + if (!flags.force) { + const counts = await countArtifacts(store, projectId, flags.commandType) + const total = counts.versions + counts.outcomes + counts.scenarios + + if (total === 0) { + const msg = 'Nothing to delete — pair has no stored state.' + if (flags.format === 'json') { + this.log(JSON.stringify({outcomes: 0, scenarios: 0, versions: 0}, null, 2)) + } else { + this.log(msg) + } + + return + } + + const lines = [ + `This will delete for (${flags.commandType}):`, + ` ${counts.versions} version${counts.versions === 1 ? '' : 's'}`, + ` ${counts.outcomes} outcome${counts.outcomes === 1 ? '' : 's'}`, + ` ${counts.scenarios} scenario${counts.scenarios === 1 ? '' : 's'}`, + ] + this.log(lines.join('\n')) + + const proceed = await confirm({default: false, message: 'Proceed with reset?'}) + if (!proceed) { + this.log('Reset cancelled.') + return + } + } + + const result = await executeReset(store, projectId, flags.commandType) + + if (flags.format === 'json') { + this.log(JSON.stringify(result, null, 2)) + } else { + this.log(renderResetText(result)) + } + } finally { + opened.close() + } + } +} diff --git a/src/oclif/lib/harness-cli.ts b/src/oclif/lib/harness-cli.ts index 79d0e75ba..31769e477 100644 --- a/src/oclif/lib/harness-cli.ts +++ b/src/oclif/lib/harness-cli.ts @@ -94,7 +94,7 @@ export async function openHarnessStoreForProject( return { close: () => keyStorage.close(), - projectId: resolvedRoot, + projectId: sanitized, store, } } diff --git a/src/server/core/domain/transport/schemas.ts b/src/server/core/domain/transport/schemas.ts index 8ef1a8c3f..350c40456 100644 --- a/src/server/core/domain/transport/schemas.ts +++ b/src/server/core/domain/transport/schemas.ts @@ -399,7 +399,7 @@ export const TaskExecuteSchema = z.object({ /** Dream trigger source — how this dream was initiated */ trigger: z.enum(['agent-idle', 'cli', 'manual']).optional(), /** Task type */ - type: z.enum(['curate', 'curate-folder', 'dream', 'query', 'search']), + type: z.enum(['curate', 'curate-folder', 'dream', 'harness-refine', 'query', 'search']), /** Workspace root for scoped query/curate */ worktreeRoot: z.string().optional(), }) @@ -664,7 +664,7 @@ export type TaskQueryResultEvent = z.infer // Request/Response Schemas (for client → server commands) // ============================================================================ -export const TaskTypeSchema = z.enum(['curate', 'curate-folder', 'dream', 'query', 'search']) +export const TaskTypeSchema = z.enum(['curate', 'curate-folder', 'dream', 'harness-refine', 'query', 'search']) /** * Request to create a new task diff --git a/src/server/infra/daemon/agent-process.ts b/src/server/infra/daemon/agent-process.ts index e1c6675bc..acbee5b10 100644 --- a/src/server/infra/daemon/agent-process.ts +++ b/src/server/infra/daemon/agent-process.ts @@ -21,7 +21,7 @@ import {connectToTransport, type ITransportClient} from '@campfirein/brv-transport-client' import {randomUUID} from 'node:crypto' -import {appendFileSync} from 'node:fs' +import {appendFileSync, existsSync, readFileSync} from 'node:fs' import {join} from 'node:path' import type {ISearchKnowledgeService} from '../../../agent/infra/sandbox/tools-sdk.js' @@ -36,6 +36,7 @@ import {FolderPackService} from '../../../agent/infra/folder-pack/folder-pack-se import {SessionMetadataStore} from '../../../agent/infra/session/session-metadata-store.js' import {FileKeyStorage} from '../../../agent/infra/storage/file-key-storage.js' import {createSearchKnowledgeService} from '../../../agent/infra/tools/implementations/search-knowledge-service.js' +import {HARNESS_NOT_ENABLED_REASON} from '../../../shared/constants/harness.js' import {AuthEvents} from '../../../shared/transport/events/auth-events.js' import {decodeSearchContent} from '../../../shared/transport/search-content.js' import {getCurrentConfig} from '../../config/environment.js' @@ -48,6 +49,7 @@ import { TransportStateEventNames, TransportTaskEventNames, } from '../../core/domain/transport/schemas.js' +import {sanitizeProjectPath} from '../../utils/path-utils.js' import {FileContextTreeArchiveService} from '../context-tree/file-context-tree-archive-service.js' import {RuntimeSignalStore} from '../context-tree/runtime-signal-store.js' import {DreamLockService} from '../dream/dream-lock-service.js' @@ -260,9 +262,27 @@ async function start(): Promise { const sharedAllowedPaths = (sourcesData?.origins ?? []).map((o) => o.contextTreeRoot) const envConfig = getCurrentConfig() + + // Read harness config from the project's .brv/config.json. + // The AgentConfigSchema defaults to {enabled: false} when absent, + // so only override when the file explicitly sets harness fields. + let harnessConfig: Record | undefined + const projectConfigPath = join(projectPath, BRV_DIR, 'config.json') + if (existsSync(projectConfigPath)) { + try { + const raw = JSON.parse(readFileSync(projectConfigPath, 'utf8')) + if (typeof raw?.harness === 'object' && raw.harness !== null) { + harnessConfig = raw.harness + } + } catch { + // Malformed config — fall through to schema default (disabled) + } + } + const agentConfig = { apiBaseUrl: envConfig.llmBaseUrl, fileSystem: {allowedPaths: ['.', ...sharedAllowedPaths], workingDirectory: projectPath}, + ...(harnessConfig ? {harness: harnessConfig} : {}), llm: { maxIterations: 10, maxTokens: 4096, @@ -584,6 +604,20 @@ async function executeTask( break } + case 'harness-refine': { + const synthesizer = agent.getHarnessSynthesizer() + if (!synthesizer) { + result = JSON.stringify({accepted: false, reason: HARNESS_NOT_ENABLED_REASON}) + break + } + + const commandTypeForRefine = content as 'chat' | 'curate' | 'query' + const synthResult = await synthesizer.refineIfNeeded(sanitizeProjectPath(projectPath), commandTypeForRefine) + result = JSON.stringify(synthResult ?? {accepted: false, reason: 'refinement skipped'}) + + break + } + case 'query': { const queryResult = await queryExecutor.executeWithAgent(agent, {query: content, taskId, worktreeRoot}) result = queryResult.response diff --git a/src/shared/constants/harness.ts b/src/shared/constants/harness.ts new file mode 100644 index 000000000..6e5638482 --- /dev/null +++ b/src/shared/constants/harness.ts @@ -0,0 +1,7 @@ +/** + * Shared harness constants used across process boundaries + * (agent-process ↔ CLI commands). + */ + +/** Reason string returned by agent-process when harness is not enabled. */ +export const HARNESS_NOT_ENABLED_REASON = 'harness not enabled' as const diff --git a/test/helpers/in-memory-harness-store.ts b/test/helpers/in-memory-harness-store.ts index 061665d6d..108df96ed 100644 --- a/test/helpers/in-memory-harness-store.ts +++ b/test/helpers/in-memory-harness-store.ts @@ -78,6 +78,10 @@ export class InMemoryHarnessStore implements IHarnessStore { return deleted } + async deletePin(projectId: string, commandType: string): Promise { + return this.pins.delete(partitionKey(projectId, commandType)) + } + async deleteScenario( projectId: string, commandType: string, @@ -87,6 +91,28 @@ export class InMemoryHarnessStore implements IHarnessStore { return this.scenarios.delete(key) } + async deleteScenarios(projectId: string, commandType: string): Promise { + const partition = partitionKey(projectId, commandType) + let deleted = 0 + for (const [key, scenario] of this.scenarios) { + if (partitionKey(scenario.projectId, scenario.commandType) === partition) { + this.scenarios.delete(key) + deleted++ + } + } + + return deleted + } + + async deleteVersion( + projectId: string, + commandType: string, + versionId: string, + ): Promise { + const key = compositeKey(projectId, commandType, versionId) + return this.versions.delete(key) + } + async getLatest(projectId: string, commandType: string): Promise { const matches = this.versionsForPartition(projectId, commandType) if (matches.length === 0) return undefined diff --git a/test/unit/agent/harness/harness-baseline-runner.test.ts b/test/unit/agent/harness/harness-baseline-runner.test.ts index 0cca7a87f..c61a5bf47 100644 --- a/test/unit/agent/harness/harness-baseline-runner.test.ts +++ b/test/unit/agent/harness/harness-baseline-runner.test.ts @@ -74,7 +74,10 @@ function makeStoreStub(sb: SinonSandbox): { const store = { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), + deletePin: sb.stub().resolves(false), deleteScenario: sb.stub(), + deleteScenarios: sb.stub(), + deleteVersion: sb.stub(), getLatest, getPin: sb.stub(), getVersion: sb.stub(), diff --git a/test/unit/agent/harness/harness-bootstrap.test.ts b/test/unit/agent/harness/harness-bootstrap.test.ts index b6b6f7e7a..8ece78f89 100644 --- a/test/unit/agent/harness/harness-bootstrap.test.ts +++ b/test/unit/agent/harness/harness-bootstrap.test.ts @@ -39,7 +39,10 @@ function makeStoreStub(sb: SinonSandbox): { const store = { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), + deletePin: sb.stub().resolves(false), deleteScenario: sb.stub(), + deleteScenarios: sb.stub(), + deleteVersion: sb.stub(), getLatest, getPin: sb.stub(), getVersion: sb.stub(), diff --git a/test/unit/agent/harness/harness-evaluator.test.ts b/test/unit/agent/harness/harness-evaluator.test.ts index 698c74053..dc8f17da2 100644 --- a/test/unit/agent/harness/harness-evaluator.test.ts +++ b/test/unit/agent/harness/harness-evaluator.test.ts @@ -188,7 +188,10 @@ function makeStoreStub(sb: SinonSandbox): { const store = { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), + deletePin: sb.stub().resolves(false), deleteScenario: sb.stub(), + deleteScenarios: sb.stub(), + deleteVersion: sb.stub(), getLatest: sb.stub(), getPin: sb.stub(), getVersion: sb.stub(), diff --git a/test/unit/agent/llm/agent-llm-service-harness-wiring.test.ts b/test/unit/agent/llm/agent-llm-service-harness-wiring.test.ts index 1f73935eb..e40a26580 100644 --- a/test/unit/agent/llm/agent-llm-service-harness-wiring.test.ts +++ b/test/unit/agent/llm/agent-llm-service-harness-wiring.test.ts @@ -18,6 +18,7 @@ import {AgentLLMService} from '../../../../src/agent/infra/llm/agent-llm-service import {ByteRoverContentGenerator} from '../../../../src/agent/infra/llm/generators/byterover-content-generator.js' import {SystemPromptManager} from '../../../../src/agent/infra/system-prompt/system-prompt-manager.js' import {ToolManager} from '../../../../src/agent/infra/tools/tool-manager.js' +import {sanitizeProjectPath} from '../../../../src/server/utils/path-utils.js' // `AgentLLMService` derives `projectId` from `process.cwd()` at // construction time. Each test captures the same value inside @@ -172,10 +173,10 @@ describe('AgentLLMService.ensureHarnessReady (Phase 5 Task 5.4 wiring)', () => { beforeEach(() => { sb = createSandbox() - // Capture cwd once per test to match the service's own `process.cwd()` - // read at construction. Per-test capture keeps these tests immune to - // any other test file that calls `process.chdir()`. - projectId = process.cwd() + // The service sanitizes process.cwd() via sanitizeProjectPath before + // passing it as a store partition key (FileKeyStorage rejects path + // separators). Tests must use the same sanitized form. + projectId = sanitizeProjectPath(process.cwd()) sessionEventBus = new SessionEventBus() modeSelectedEvents = [] sessionEventBus.on('harness:mode-selected', (payload) => { diff --git a/test/unit/infra/sandbox/sandbox-service-harness-wiring.test.ts b/test/unit/infra/sandbox/sandbox-service-harness-wiring.test.ts index c8f51c9fb..a5392354e 100644 --- a/test/unit/infra/sandbox/sandbox-service-harness-wiring.test.ts +++ b/test/unit/infra/sandbox/sandbox-service-harness-wiring.test.ts @@ -137,7 +137,9 @@ describe('SandboxService — harness outcome recording', () => { expect(params.result).to.have.property('stderr') expect(params.executionTimeMs).to.be.a('number').and.to.be.at.least(0) expect(params.projectType).to.equal('typescript') - expect(params.projectId).to.equal('/my/project') + // projectId is sanitized via sanitizeProjectPath (FileKeyStorage + // rejects path separators in key segments) + expect(params.projectId).to.equal('my--project') expect(params.conversationTurn).to.equal(2) expect(params.taskDescription).to.equal('find the auth module') }) diff --git a/test/unit/oclif/commands/harness/refine.test.ts b/test/unit/oclif/commands/harness/refine.test.ts new file mode 100644 index 000000000..2e888d6f0 --- /dev/null +++ b/test/unit/oclif/commands/harness/refine.test.ts @@ -0,0 +1,94 @@ +/** + * Unit tests for `brv harness refine`. + * + * Tests the pure formatting functions and result interpretation. + * The daemon transport interaction is verified via the integration test. + */ + +import {expect} from 'chai' + +import type {SynthesisResult} from '../../../../../src/agent/infra/harness/harness-synthesizer.js' + +import { + formatRefineResult, + renderRefineText, +} from '../../../../../src/oclif/commands/harness/refine.js' + +describe('HarnessRefine command — renderRefineText + formatRefineResult', () => { + // Test 1: accepted result text output + it('renders accepted refinement with version transition and delta H', () => { + const result: SynthesisResult = { + accepted: true, + deltaH: 0.06, + fromVersionId: 'v-abc', + toVersionId: 'v-def', + } + + const text = renderRefineText(result, 1, 2) + + expect(text).to.match(/accepted/i) + expect(text).to.include('v1') + expect(text).to.include('v2') + expect(text).to.include('0.06') + }) + + // Test 2: rejected result text output with reason + it('renders rejected refinement with reason', () => { + const result: SynthesisResult = { + accepted: false, + deltaH: 0.03, + fromVersionId: 'v-abc', + reason: 'delta H was 0.03, below acceptance threshold', + } + + const text = renderRefineText(result, 1) + + expect(text).to.match(/rejected/i) + expect(text).to.include('delta H was 0.03') + }) + + // Test 3: undefined result (skipped — no parent, weak model, in-flight) + it('renders skipped refinement when result is undefined', () => { + const text = renderRefineText() + + expect(text).to.match(/no refinement|skipped|nothing to refine/i) + }) + + // Test 4: formatRefineResult produces JSON matching event payload shape + it('formatRefineResult returns JSON with accepted payload', () => { + const result: SynthesisResult = { + accepted: true, + deltaH: 0.06, + fromVersionId: 'v-abc', + toVersionId: 'v-def', + } + + const json = formatRefineResult(result, 1, 2) + + expect(json.accepted).to.equal(true) + expect(json.fromVersion).to.equal(1) + expect(json.toVersion).to.equal(2) + }) + + // Test 5: formatRefineResult with rejected result + it('formatRefineResult returns JSON with rejected payload', () => { + const result: SynthesisResult = { + accepted: false, + fromVersionId: 'v-abc', + reason: 'delta H was 0.03, below acceptance threshold', + } + + const json = formatRefineResult(result, 1) + + expect(json.accepted).to.equal(false) + expect(json.reason).to.include('delta H') + }) + + // Test 6: formatRefineResult with undefined result (skipped) + it('formatRefineResult returns JSON with skipped payload', () => { + const json = formatRefineResult() + + expect(json.accepted).to.equal(false) + expect(json.reason).to.match(/skipped|no refinement/i) + }) +}) diff --git a/test/unit/oclif/commands/harness/reset.test.ts b/test/unit/oclif/commands/harness/reset.test.ts new file mode 100644 index 000000000..7926f5fde --- /dev/null +++ b/test/unit/oclif/commands/harness/reset.test.ts @@ -0,0 +1,183 @@ +/** + * Unit tests for `brv harness reset`. + * + * Tests the pure logic functions (countArtifacts, executeReset, + * renderResetText) extracted from the command. The oclif `run()` method + * and interactive prompt are tested via the integration test. + */ + +import {expect} from 'chai' +import {createSandbox, type SinonSandbox, type SinonStub} from 'sinon' + +import type { + EvaluationScenario, + HarnessVersion, +} from '../../../../../src/agent/core/domain/harness/types.js' +import type {IHarnessStore} from '../../../../../src/agent/core/interfaces/i-harness-store.js' + +import { + countArtifacts, + executeReset, + renderResetText, +} from '../../../../../src/oclif/commands/harness/reset.js' + +const PROJECT_ID = '/fixture/proj' + +function makeVersion(overrides: Partial = {}): HarnessVersion { + return { + code: 'exports.curate = async () => {}', + commandType: 'curate', + createdAt: 1_700_000_000_000, + heuristic: 0.5, + id: 'v-abc', + metadata: { + capabilities: ['curate'], + commandType: 'curate', + projectPatterns: ['**/*'], + version: 1, + }, + projectId: PROJECT_ID, + projectType: 'typescript', + version: 1, + ...overrides, + } +} + +function makeScenario(overrides: Partial = {}): EvaluationScenario { + return { + code: 'ctx.tools.curate([])', + commandType: 'curate', + createdAt: 1_700_000_000_000, + expectedBehavior: 'Succeeds without errors', + id: 's-abc', + projectId: PROJECT_ID, + projectType: 'typescript', + taskDescription: 'Test scenario', + ...overrides, + } +} + +function makeStoreStub(sb: SinonSandbox): IHarnessStore { + return { + deleteOutcome: sb.stub(), + deleteOutcomes: sb.stub().resolves(0), + deletePin: sb.stub().resolves(false), + deleteScenario: sb.stub(), + deleteScenarios: sb.stub().resolves(0), + deleteVersion: sb.stub().resolves(true), + getLatest: sb.stub(), + getPin: sb.stub().resolves(), + getVersion: sb.stub(), + listOutcomes: sb.stub().resolves([]), + listScenarios: sb.stub().resolves([]), + listVersions: sb.stub().resolves([]), + pruneOldVersions: sb.stub(), + recordFeedback: sb.stub(), + saveOutcome: sb.stub(), + saveScenario: sb.stub(), + saveVersion: sb.stub(), + setPin: sb.stub().resolves(), + } satisfies IHarnessStore +} + +describe('HarnessReset command — countArtifacts + executeReset + renderResetText', () => { + let sb: SinonSandbox + + beforeEach(() => { + sb = createSandbox() + }) + + afterEach(() => { + sb.restore() + }) + + // Test 1: reset when nothing exists → counts are all zero + it('countArtifacts returns zero counts when no artifacts exist', async () => { + const store = makeStoreStub(sb) + + const counts = await countArtifacts(store, PROJECT_ID, 'curate') + + expect(counts.outcomes).to.equal(0) + expect(counts.scenarios).to.equal(0) + expect(counts.versions).to.equal(0) + }) + + // Test 2: countArtifacts returns correct counts from store + it('countArtifacts returns correct counts from store queries', async () => { + const store = makeStoreStub(sb) + ;(store.listVersions as SinonStub).resolves([ + makeVersion({id: 'v-1', version: 1}), + makeVersion({id: 'v-2', version: 2}), + ]) + ;(store.listOutcomes as SinonStub).resolves([{id: 'o-1'}, {id: 'o-2'}, {id: 'o-3'}]) + ;(store.listScenarios as SinonStub).resolves([makeScenario()]) + + const counts = await countArtifacts(store, PROJECT_ID, 'curate') + + expect(counts.versions).to.equal(2) + expect(counts.outcomes).to.equal(3) + expect(counts.scenarios).to.equal(1) + }) + + // Test 3: executeReset deletes versions + outcomes + scenarios in order + it('executeReset deletes outcomes, scenarios, and versions', async () => { + const store = makeStoreStub(sb) + const v1 = makeVersion({id: 'v-1', version: 1}) + const v2 = makeVersion({id: 'v-2', version: 2}) + ;(store.listVersions as SinonStub).resolves([v2, v1]) + ;(store.deleteOutcomes as SinonStub).resolves(5) + ;(store.deleteScenarios as SinonStub).resolves(3) + ;(store.deleteVersion as SinonStub).resolves(true) + + const result = await executeReset(store, PROJECT_ID, 'curate') + + expect(result.outcomes).to.equal(5) + expect(result.scenarios).to.equal(3) + expect(result.versions).to.equal(2) + + // deleteOutcomes called + expect((store.deleteOutcomes as SinonStub).calledOnce).to.equal(true) + expect((store.deleteOutcomes as SinonStub).firstCall.args).to.deep.equal([PROJECT_ID, 'curate']) + + // deleteScenarios called + expect((store.deleteScenarios as SinonStub).calledOnce).to.equal(true) + + // deleteVersion called for each version + expect((store.deleteVersion as SinonStub).callCount).to.equal(2) + expect((store.deleteVersion as SinonStub).firstCall.args).to.deep.equal([PROJECT_ID, 'curate', 'v-2']) + expect((store.deleteVersion as SinonStub).secondCall.args).to.deep.equal([PROJECT_ID, 'curate', 'v-1']) + + // Deletion order: outcomes before versions + expect((store.deleteOutcomes as SinonStub).calledBefore(store.deleteVersion as SinonStub)).to.equal(true) + + // Pin cleared + expect((store.deletePin as SinonStub).calledOnce).to.equal(true) + }) + + // Test 4: executeReset with nothing to delete returns zero counts + it('executeReset with empty store returns zero counts', async () => { + const store = makeStoreStub(sb) + + const result = await executeReset(store, PROJECT_ID, 'curate') + + expect(result.outcomes).to.equal(0) + expect(result.scenarios).to.equal(0) + expect(result.versions).to.equal(0) + }) + + // Test 5: renderResetText with deletions shows counts + it('renderResetText shows deletion counts', () => { + const text = renderResetText({outcomes: 47, scenarios: 12, versions: 3}) + + expect(text).to.include('3 version') + expect(text).to.include('47 outcome') + expect(text).to.include('12 scenario') + }) + + // Test 6: renderResetText with nothing deleted shows appropriate message + it('renderResetText with zero counts shows nothing-to-delete message', () => { + const text = renderResetText({outcomes: 0, scenarios: 0, versions: 0}) + + expect(text).to.match(/nothing to delete/i) + }) +}) diff --git a/test/unit/oclif/commands/harness/status.test.ts b/test/unit/oclif/commands/harness/status.test.ts index 08f58f27b..70c3908c3 100644 --- a/test/unit/oclif/commands/harness/status.test.ts +++ b/test/unit/oclif/commands/harness/status.test.ts @@ -55,7 +55,10 @@ function makeStoreStub(sb: SinonSandbox): IHarnessStore { return { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), + deletePin: sb.stub().resolves(false), deleteScenario: sb.stub(), + deleteScenarios: sb.stub(), + deleteVersion: sb.stub(), getLatest: sb.stub(), getPin: sb.stub(), getVersion: sb.stub(), diff --git a/test/unit/oclif/commands/harness/use.test.ts b/test/unit/oclif/commands/harness/use.test.ts index cb95e6050..651211a3a 100644 --- a/test/unit/oclif/commands/harness/use.test.ts +++ b/test/unit/oclif/commands/harness/use.test.ts @@ -36,7 +36,10 @@ function makeStoreStub(sb: SinonSandbox): IHarnessStore { return { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), + deletePin: sb.stub().resolves(false), deleteScenario: sb.stub(), + deleteScenarios: sb.stub().resolves(0), + deleteVersion: sb.stub().resolves(true), getLatest: sb.stub(), getPin: sb.stub(), getVersion: sb.stub(), diff --git a/test/unit/oclif/lib/resolve-version-ref.test.ts b/test/unit/oclif/lib/resolve-version-ref.test.ts index 19001f613..068278170 100644 --- a/test/unit/oclif/lib/resolve-version-ref.test.ts +++ b/test/unit/oclif/lib/resolve-version-ref.test.ts @@ -36,7 +36,10 @@ function makeStoreStub(sb: SinonSandbox): IHarnessStore { return { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), + deletePin: sb.stub().resolves(false), deleteScenario: sb.stub(), + deleteScenarios: sb.stub(), + deleteVersion: sb.stub(), getLatest: sb.stub(), getPin: sb.stub(), getVersion: sb.stub(),