From da92297f4456b56aa5a44232801827b7e7b267f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Thu=E1=BA=ADn=20Ph=C3=A1t?= Date: Thu, 23 Apr 2026 10:34:48 +0700 Subject: [PATCH 1/4] feat: [ENG-2323] brv harness reset + refine commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two admin CLI commands for AutoHarness V2: - `brv harness reset` — wipes all harness state (versions, outcomes, scenarios) for a (projectId, commandType) pair. Interactive confirmation prompt by default, `--force` to skip. Supports `--format json` for scripts. - `brv harness refine` — force-triggers the Critic→Refiner→Evaluator synthesis pipeline via the daemon task system. Waits up to 60s for the result. Exits 2 when harness is not enabled. Also fixes four pre-existing bugs that prevented AutoHarness from working on real projects: 1. Daemon config plumbing: agent-process.ts now reads harness field from .brv/config.json into agentConfig. 2. projectId sanitization: agent-llm-service.ts, sandbox-service.ts, and harness-cli.ts now apply sanitizeProjectPath() before store operations (FileKeyStorage rejects path separators in keys). 3. Refiner httpConfig: service-initializer.ts receives httpConfigProvider from cipher-agent.ts so the byterover LLM provider gets apiBaseUrl. 4. Refine projectId: agent-process.ts sanitizes projectPath before passing to synthesizer.refineIfNeeded. Adds deleteScenarios and deleteVersion to IHarnessStore interface. Adds getPin/setPin stubs to upstream test files for cross-merge compat. --- src/agent/core/interfaces/i-harness-store.ts | 17 ++ src/agent/infra/agent/cipher-agent.ts | 16 +- src/agent/infra/agent/service-initializer.ts | 4 + src/agent/infra/harness/harness-store.ts | 45 ++++ src/agent/infra/llm/agent-llm-service.ts | 14 +- src/agent/infra/sandbox/sandbox-service.ts | 3 +- src/oclif/commands/harness/refine.ts | 195 ++++++++++++++++++ src/oclif/commands/harness/reset.ts | 183 ++++++++++++++++ src/oclif/lib/harness-cli.ts | 2 +- src/server/core/domain/transport/schemas.ts | 4 +- src/server/infra/daemon/agent-process.ts | 35 +++- test/helpers/in-memory-harness-store.ts | 22 ++ .../harness/harness-baseline-runner.test.ts | 2 + .../agent/harness/harness-bootstrap.test.ts | 2 + .../agent/harness/harness-evaluator.test.ts | 2 + .../agent-llm-service-harness-wiring.test.ts | 9 +- .../sandbox-service-harness-wiring.test.ts | 4 +- .../oclif/commands/harness/refine.test.ts | 94 +++++++++ .../unit/oclif/commands/harness/reset.test.ts | 176 ++++++++++++++++ .../oclif/commands/harness/status.test.ts | 2 + test/unit/oclif/commands/harness/use.test.ts | 2 + .../oclif/lib/resolve-version-ref.test.ts | 2 + 22 files changed, 814 insertions(+), 21 deletions(-) create mode 100644 src/oclif/commands/harness/refine.ts create mode 100644 src/oclif/commands/harness/reset.ts create mode 100644 test/unit/oclif/commands/harness/refine.test.ts create mode 100644 test/unit/oclif/commands/harness/reset.test.ts diff --git a/src/agent/core/interfaces/i-harness-store.ts b/src/agent/core/interfaces/i-harness-store.ts index 0e7eb395f..199dec2fc 100644 --- a/src/agent/core/interfaces/i-harness-store.ts +++ b/src/agent/core/interfaces/i-harness-store.ts @@ -78,6 +78,23 @@ export interface IHarnessStore { scenarioId: string, ): Promise + /** + * Delete every scenario for a `(projectId, commandType)` pair. + * Returns the number of records deleted. Used by `brv harness reset`. + */ + deleteScenarios(projectId: string, commandType: string): Promise + + /** + * Delete a single version by its `(projectId, commandType, versionId)` key. + * Returns `true` when the version existed and was deleted; `false` on miss. + * Used by `brv harness reset` to clear all versions for a pair. + */ + deleteVersion( + projectId: string, + commandType: string, + versionId: string, + ): Promise + /** * Return the most-recently-written version for a `(projectId, commandType)` * pair — ranked by the stored `version` number, not by `heuristic`. This diff --git a/src/agent/infra/agent/cipher-agent.ts b/src/agent/infra/agent/cipher-agent.ts index 588bebc92..5cb6bd415 100644 --- a/src/agent/infra/agent/cipher-agent.ts +++ b/src/agent/infra/agent/cipher-agent.ts @@ -13,6 +13,7 @@ import type {AgentState, ExecutionContext, ICipherAgent} from '../../core/interf import type {IHistoryStorage} from '../../core/interfaces/i-history-storage.js' import type {ITokenizer} from '../../core/interfaces/i-tokenizer.js' import type {FileSystemService} from '../file-system/file-system-service.js' +import type {HarnessSynthesizer} from '../harness/harness-synthesizer.js' import type {MemoryManager} from '../memory/memory-manager.js' import type {ProcessService} from '../process/process-service.js' import type {SystemPromptManager} from '../system-prompt/system-prompt-manager.js' @@ -513,6 +514,15 @@ export class CipherAgent extends BaseAgent implements ICipherAgent { } } + /** + * Public accessor for the harness synthesizer. Used by `brv harness refine` + * via the agent-process task dispatch. Returns `undefined` when harness is + * disabled (synthesizer was never instantiated). + */ + public getHarnessSynthesizer(): HarnessSynthesizer | undefined { + return this.services?.harnessSynthesizer + } + /** * Get an existing session or create a new one. */ @@ -572,7 +582,7 @@ export class CipherAgent extends BaseAgent implements ICipherAgent { protected override async initializeServices(): Promise { // Pass pre-created event bus to service initializer - return createCipherAgentServices(this.config, this._agentEventBus) + return createCipherAgentServices(this.config, this._agentEventBus, () => this.buildHttpConfig()) } /** @@ -660,6 +670,8 @@ export class CipherAgent extends BaseAgent implements ICipherAgent { this.rebindCurateTools(services, httpConfig, sessionLLMConfig) } + // === Protected Methods (implement abstract from BaseAgent) === + /** * Reset the agent to initial state. * Resets execution state only. To reset sessions, use resetSession(sessionId). @@ -671,8 +683,6 @@ export class CipherAgent extends BaseAgent implements ICipherAgent { } } - // === Protected Methods (implement abstract from BaseAgent) === - /** * Reset a specific session's conversation history. * @param sessionId - The session ID to reset diff --git a/src/agent/infra/agent/service-initializer.ts b/src/agent/infra/agent/service-initializer.ts index 7d698345f..3a54bca1f 100644 --- a/src/agent/infra/agent/service-initializer.ts +++ b/src/agent/infra/agent/service-initializer.ts @@ -138,6 +138,7 @@ export type {CipherAgentServices, SessionManagerConfig, SessionServices} from '. export async function createCipherAgentServices( config: ValidatedAgentConfig, agentEventBus: AgentEventBus, + httpConfigProvider?: () => ByteRoverHttpConfig, ): Promise { // 1. Logger (uses provided event bus ) const logger = new EventBasedLogger(agentEventBus, 'CipherAgent') @@ -333,6 +334,9 @@ export async function createCipherAgentServices( : config.providerApiKey, baseUrl: config.providerBaseUrl, headers: config.providerHeaders, + // byterover provider needs httpConfig for API routing (sessionKey, + // projectId, spaceId, teamId). Resolved lazily from CipherAgent. + httpConfig: httpConfigProvider ? {...httpConfigProvider()} : undefined, httpReferer: config.httpReferer, maxTokens: 4096, model: refinementModel, diff --git a/src/agent/infra/harness/harness-store.ts b/src/agent/infra/harness/harness-store.ts index 470b6de92..fc4661b9c 100644 --- a/src/agent/infra/harness/harness-store.ts +++ b/src/agent/infra/harness/harness-store.ts @@ -147,8 +147,53 @@ export class HarnessStore implements IHarnessStore { return false } + async deleteScenarios(projectId: string, commandType: string): Promise { + const keys: StorageKey[] = [] + for (const projectType of ProjectTypeSchema.options) { + // eslint-disable-next-line no-await-in-loop + const entries = await this.keyStorage.listWithValues([ + HARNESS_PREFIX, + SCENARIO_PREFIX, + projectType, + projectId, + commandType, + ]) + for (const entry of entries) keys.push(entry.key) + } + + if (keys.length === 0) return 0 + + const operations: BatchOperation[] = keys.map((key) => ({key, type: 'delete' as const})) + await this.keyStorage.batch(operations) + this.logger.debug('HarnessStore.deleteScenarios cleared partition', { + commandType, + deleted: keys.length, + projectId, + }) + + return keys.length + } + // ── versions ─────────────────────────────────────────────────────────────── + async deleteVersion( + projectId: string, + commandType: string, + versionId: string, + ): Promise { + const key = this.versionKey(projectId, commandType, versionId) + const exists = await this.keyStorage.exists(key) + if (!exists) return false + + await this.keyStorage.delete(key) + this.logger.debug('HarnessStore.deleteVersion removed entry', { + commandType, + projectId, + versionId, + }) + return true + } + async getLatest(projectId: string, commandType: string): Promise { // Delegate to `listVersions` rather than re-deriving the "max version" // comparator — a future change to the sort key can't silently break diff --git a/src/agent/infra/llm/agent-llm-service.ts b/src/agent/infra/llm/agent-llm-service.ts index 43171036d..9a2f21e9d 100644 --- a/src/agent/infra/llm/agent-llm-service.ts +++ b/src/agent/infra/llm/agent-llm-service.ts @@ -29,6 +29,7 @@ import type {CompactionService} from './context/compaction/compaction-service.js import type {ICompressionStrategy} from './context/compression/types.js' import {getErrorMessage} from '../../../server/utils/error-helpers.js' +import {sanitizeProjectPath} from '../../../server/utils/path-utils.js' import {AgentStateMachine} from '../../core/domain/agent/agent-state-machine.js' import {AgentState, TerminationReason} from '../../core/domain/agent/agent-state.js' import {LlmGenerationError, LlmMaxIterationsError, LlmResponseParsingError} from '../../core/domain/errors/llm-error.js' @@ -901,14 +902,11 @@ export class AgentLLMService implements ILLMService { } try { - // Slug/path gap workaround (known issue — see - // outcome-collection.test.ts:32): the recorder derives projectId - // from `environmentContext.workingDirectory`, so we use the same - // source here. `bootstrapIfNeeded` takes both `projectId` (for - // store key partitioning) and `workingDirectory` (for filesystem - // detection); at present they're the same value, aliased for - // readability at the call site. - const projectId = this.workingDirectory + // projectId is the sanitized working directory — FileKeyStorage + // rejects path separators in key segments, so the raw absolute + // path cannot be used as a store partition key. `workingDirectory` + // stays unsanitized for filesystem detection (template language). + const projectId = sanitizeProjectPath(this.workingDirectory) const {workingDirectory} = this await harnessBootstrap.bootstrapIfNeeded(projectId, commandType, workingDirectory) diff --git a/src/agent/infra/sandbox/sandbox-service.ts b/src/agent/infra/sandbox/sandbox-service.ts index 79eda3f4e..47312e989 100644 --- a/src/agent/infra/sandbox/sandbox-service.ts +++ b/src/agent/infra/sandbox/sandbox-service.ts @@ -21,6 +21,7 @@ import type { HarnessOutcomeRecorder } from '../harness/harness-outcome-recorder import type { SessionManager } from '../session/session-manager.js' import type { ISearchKnowledgeService, ToolsSDK } from './tools-sdk.js' +import { sanitizeProjectPath } from '../../../server/utils/path-utils.js' import { ProjectTypeSchema } from '../../core/domain/harness/types.js' import {HarnessEvaluatorError} from '../harness/harness-evaluator-errors.js' import { OpsCounter } from '../harness/ops-counter.js' @@ -232,7 +233,7 @@ export class SandboxService implements ISandboxService { conversationTurn: config?.conversationTurn, executionTimeMs: result.executionTime, harnessVersionId: this.harnessVersionIdBySession.get(sessionId), - projectId: this.environmentContext.workingDirectory, + projectId: sanitizeProjectPath(this.environmentContext.workingDirectory), projectType: this.resolveProjectType(), result, sessionId, diff --git a/src/oclif/commands/harness/refine.ts b/src/oclif/commands/harness/refine.ts new file mode 100644 index 000000000..4109f3ff8 --- /dev/null +++ b/src/oclif/commands/harness/refine.ts @@ -0,0 +1,195 @@ +/** + * `brv harness refine` — force-trigger a refinement attempt. + * + * Submits a `harness-refine` task to the daemon, which calls + * `HarnessSynthesizer.refineIfNeeded` in the agent process. Waits + * for the result (up to 60s) and prints the outcome. + * + * Respects the synthesizer's single-flight queue — if another + * refinement is already in progress for the pair, the user sees + * a "refinement already running" message and exits 0. + */ + +import {Command, Flags} from '@oclif/core' +import {randomUUID} from 'node:crypto' + +import type {SynthesisResult} from '../../../agent/infra/harness/harness-synthesizer.js' + +import {resolveProject} from '../../../server/infra/project/resolve-project.js' +import {TaskEvents} from '../../../shared/transport/events/index.js' +import {withDaemonRetry} from '../../lib/daemon-client.js' +import { + HARNESS_COMMAND_TYPES, + isHarnessCommandType, + openHarnessStoreForProject, +} from '../../lib/harness-cli.js' +import {waitForTaskCompletion} from '../../lib/task-client.js' + +// --------------------------------------------------------------------------- +// Public types — tested directly by unit tests +// --------------------------------------------------------------------------- + +export type RefineJsonPayload = { + readonly accepted: boolean + readonly fromVersion?: number + readonly reason?: string + readonly toVersion?: number +} + +// --------------------------------------------------------------------------- +// Pure logic — unit-testable without oclif or daemon +// --------------------------------------------------------------------------- + +/** Format the synthesis result for text output. */ +export function renderRefineText( + result?: SynthesisResult, + fromVersion?: number, + toVersion?: number, +): string { + if (result === undefined) { + return 'No refinement performed — nothing to refine or refinement already running.' + } + + if (result.accepted) { + const delta = result.deltaH === undefined ? '' : ` (ΔH: +${result.deltaH.toFixed(2)})` + return `Refinement accepted — v${fromVersion} → v${toVersion}${delta}` + } + + return `Refinement rejected — ${result.reason ?? 'unknown reason'}` +} + +/** Build the JSON payload matching the event shape. */ +export function formatRefineResult( + result?: SynthesisResult, + fromVersion?: number, + toVersion?: number, +): RefineJsonPayload { + if (result === undefined) { + return {accepted: false, reason: 'no refinement performed — skipped'} + } + + if (result.accepted) { + return { + accepted: true, + fromVersion, + toVersion, + } + } + + return { + accepted: false, + fromVersion, + reason: result.reason, + } +} + +// --------------------------------------------------------------------------- +// oclif command +// --------------------------------------------------------------------------- + +export default class HarnessRefine extends Command { + static override description = 'Force-trigger a refinement attempt for a pair' +static override flags = { + commandType: Flags.string({ + default: 'curate', + description: 'Harness pair command type', + options: [...HARNESS_COMMAND_TYPES], + }), + format: Flags.string({ + default: 'text', + description: 'Output format', + options: ['json', 'text'], + }), + } + + async run(): Promise { + const {flags} = await this.parse(HarnessRefine) + + if (!isHarnessCommandType(flags.commandType)) { + this.error(`invalid --commandType value '${flags.commandType}'`, {exit: 1}) + } + + const format = flags.format === 'json' ? 'json' : 'text' + const projectRoot = resolveProject()?.projectRoot ?? process.cwd() + + // Read the current version number before refinement for display + const opened = await openHarnessStoreForProject(projectRoot) + let fromVersion: number | undefined + if (opened) { + try { + const latest = await opened.store.getLatest(opened.projectId, flags.commandType) + fromVersion = latest?.version + } finally { + opened.close() + } + } + + // Submit harness-refine task to daemon + let taskResult: string | undefined + try { + taskResult = await withDaemonRetry(async (client) => { + const taskId = randomUUID() + const completionPromise = new Promise((resolve, reject) => { + waitForTaskCompletion( + { + client, + command: 'harness refine', + format, + onCompleted: (r) => resolve(r.result ?? ''), + onError: (r) => reject(new Error(r.error.message)), + taskId, + timeoutMs: 60_000, + }, + () => {}, + ) + }) + + await client.requestWithAck(TaskEvents.CREATE, { + clientCwd: process.cwd(), + content: flags.commandType, + projectPath: projectRoot, + taskId, + type: 'harness-refine', + }) + + return completionPromise + }) + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + if (format === 'json') { + this.log(JSON.stringify({accepted: false, error: message}, null, 2)) + } else { + this.error(`Refinement failed: ${message}`, {exit: 2}) + } + + return + } + + // Parse the result from the agent process + let synthesisResult: SynthesisResult | undefined + try { + synthesisResult = taskResult ? JSON.parse(taskResult) as SynthesisResult : undefined + } catch { + synthesisResult = undefined + } + + // Synthesizer unavailable — exit 2 per spec + if (synthesisResult?.reason === 'harness not enabled') { + if (format === 'json') { + this.log(JSON.stringify({accepted: false, error: 'harness not enabled'}, null, 2)) + } else { + this.error('Harness is not enabled — configure harness.enabled in .brv/config.json', {exit: 2}) + } + + return + } + + const toVersion = synthesisResult?.accepted ? (fromVersion === undefined ? undefined : fromVersion + 1) : undefined + + if (format === 'json') { + this.log(JSON.stringify(formatRefineResult(synthesisResult, fromVersion, toVersion), null, 2)) + } else { + this.log(renderRefineText(synthesisResult, fromVersion, toVersion)) + } + } +} diff --git a/src/oclif/commands/harness/reset.ts b/src/oclif/commands/harness/reset.ts new file mode 100644 index 000000000..0a17eab5f --- /dev/null +++ b/src/oclif/commands/harness/reset.ts @@ -0,0 +1,183 @@ +/** + * `brv harness reset` — wipe all harness state for a pair. + * + * Deletes versions, outcomes, scenarios, and pin for a + * `(projectId, commandType)` pair. Interactive confirmation prompt + * by default; `--force` skips it for scripts + CI. + */ + +import {confirm} from '@inquirer/prompts' +import {Command, Flags} from '@oclif/core' + +import type {IHarnessStore} from '../../../agent/core/interfaces/i-harness-store.js' + +import {resolveProject} from '../../../server/infra/project/resolve-project.js' +import { + HARNESS_COMMAND_TYPES, + isHarnessCommandType, + openHarnessStoreForProject, +} from '../../lib/harness-cli.js' + +// --------------------------------------------------------------------------- +// Public types — tested directly by unit tests +// --------------------------------------------------------------------------- + +export type ResetArtifactCounts = { + readonly outcomes: number + readonly scenarios: number + readonly versions: number +} + +// --------------------------------------------------------------------------- +// Pure logic — unit-testable without oclif +// --------------------------------------------------------------------------- + +/** + * Count artifacts that would be deleted. Shown in the confirmation + * prompt so the user knows what they're about to lose. + */ +export async function countArtifacts( + store: IHarnessStore, + projectId: string, + commandType: string, +): Promise { + const [versions, outcomes, scenarios] = await Promise.all([ + store.listVersions(projectId, commandType), + store.listOutcomes(projectId, commandType, Number.MAX_SAFE_INTEGER), + store.listScenarios(projectId, commandType), + ]) + + return { + outcomes: outcomes.length, + scenarios: scenarios.length, + versions: versions.length, + } +} + +/** + * Delete every harness artifact for a `(projectId, commandType)` pair. + * Order: outcomes → scenarios → versions (each version individually). + */ +export async function executeReset( + store: IHarnessStore, + projectId: string, + commandType: string, +): Promise { + const outcomesDeleted = await store.deleteOutcomes(projectId, commandType) + const scenariosDeleted = await store.deleteScenarios(projectId, commandType) + + const versions = await store.listVersions(projectId, commandType) + for (const v of versions) { + // eslint-disable-next-line no-await-in-loop + await store.deleteVersion(projectId, commandType, v.id) + } + + return { + outcomes: outcomesDeleted, + scenarios: scenariosDeleted, + versions: versions.length, + } +} + +/** Format the reset result for text output. */ +export function renderResetText(counts: ResetArtifactCounts): string { + const total = counts.versions + counts.outcomes + counts.scenarios + if (total === 0) return 'Nothing to delete — pair has no stored state.' + + const parts: string[] = [] + if (counts.versions > 0) parts.push(`${counts.versions} version${counts.versions === 1 ? '' : 's'}`) + if (counts.outcomes > 0) parts.push(`${counts.outcomes} outcome${counts.outcomes === 1 ? '' : 's'}`) + if (counts.scenarios > 0) parts.push(`${counts.scenarios} scenario${counts.scenarios === 1 ? '' : 's'}`) + + return `Deleted ${parts.join(', ')}.` +} + +// --------------------------------------------------------------------------- +// oclif command +// --------------------------------------------------------------------------- + +export default class HarnessReset extends Command { + static override description = 'Delete all harness state for a (project, commandType) pair' +static override flags = { + commandType: Flags.string({ + default: 'curate', + description: 'Harness pair command type', + options: [...HARNESS_COMMAND_TYPES], + }), + force: Flags.boolean({ + default: false, + description: 'Skip confirmation prompt', + }), + format: Flags.string({ + default: 'text', + description: 'Output format', + options: ['json', 'text'], + }), + } + + async run(): Promise { + const {flags} = await this.parse(HarnessReset) + + if (!isHarnessCommandType(flags.commandType)) { + this.error(`invalid --commandType value '${flags.commandType}'`, {exit: 1}) + } + + const projectRoot = resolveProject()?.projectRoot ?? process.cwd() + const opened = await openHarnessStoreForProject(projectRoot) + + if (!opened) { + const msg = 'Nothing to delete — pair has no stored state.' + if (flags.format === 'json') { + this.log(JSON.stringify({outcomes: 0, scenarios: 0, versions: 0}, null, 2)) + } else { + this.log(msg) + } + + return + } + + try { + const {projectId, store} = opened + + if (!flags.force) { + const counts = await countArtifacts(store, projectId, flags.commandType) + const total = counts.versions + counts.outcomes + counts.scenarios + + if (total === 0) { + const msg = 'Nothing to delete — pair has no stored state.' + if (flags.format === 'json') { + this.log(JSON.stringify({outcomes: 0, scenarios: 0, versions: 0}, null, 2)) + } else { + this.log(msg) + } + + return + } + + const lines = [ + `This will delete for (${flags.commandType}):`, + ` ${counts.versions} version${counts.versions === 1 ? '' : 's'}`, + ` ${counts.outcomes} outcome${counts.outcomes === 1 ? '' : 's'}`, + ` ${counts.scenarios} scenario${counts.scenarios === 1 ? '' : 's'}`, + ] + this.log(lines.join('\n')) + + const proceed = await confirm({default: false, message: 'Proceed with reset?'}) + if (!proceed) { + this.log('Reset cancelled.') + return + } + } + + const result = await executeReset(store, projectId, flags.commandType) + + if (flags.format === 'json') { + this.log(JSON.stringify(result, null, 2)) + } else { + this.log(renderResetText(result)) + } + } finally { + opened.close() + } + } +} diff --git a/src/oclif/lib/harness-cli.ts b/src/oclif/lib/harness-cli.ts index 79d0e75ba..31769e477 100644 --- a/src/oclif/lib/harness-cli.ts +++ b/src/oclif/lib/harness-cli.ts @@ -94,7 +94,7 @@ export async function openHarnessStoreForProject( return { close: () => keyStorage.close(), - projectId: resolvedRoot, + projectId: sanitized, store, } } diff --git a/src/server/core/domain/transport/schemas.ts b/src/server/core/domain/transport/schemas.ts index 8ef1a8c3f..350c40456 100644 --- a/src/server/core/domain/transport/schemas.ts +++ b/src/server/core/domain/transport/schemas.ts @@ -399,7 +399,7 @@ export const TaskExecuteSchema = z.object({ /** Dream trigger source — how this dream was initiated */ trigger: z.enum(['agent-idle', 'cli', 'manual']).optional(), /** Task type */ - type: z.enum(['curate', 'curate-folder', 'dream', 'query', 'search']), + type: z.enum(['curate', 'curate-folder', 'dream', 'harness-refine', 'query', 'search']), /** Workspace root for scoped query/curate */ worktreeRoot: z.string().optional(), }) @@ -664,7 +664,7 @@ export type TaskQueryResultEvent = z.infer // Request/Response Schemas (for client → server commands) // ============================================================================ -export const TaskTypeSchema = z.enum(['curate', 'curate-folder', 'dream', 'query', 'search']) +export const TaskTypeSchema = z.enum(['curate', 'curate-folder', 'dream', 'harness-refine', 'query', 'search']) /** * Request to create a new task diff --git a/src/server/infra/daemon/agent-process.ts b/src/server/infra/daemon/agent-process.ts index e1c6675bc..f79aedf8b 100644 --- a/src/server/infra/daemon/agent-process.ts +++ b/src/server/infra/daemon/agent-process.ts @@ -21,7 +21,7 @@ import {connectToTransport, type ITransportClient} from '@campfirein/brv-transport-client' import {randomUUID} from 'node:crypto' -import {appendFileSync} from 'node:fs' +import {appendFileSync, existsSync, readFileSync} from 'node:fs' import {join} from 'node:path' import type {ISearchKnowledgeService} from '../../../agent/infra/sandbox/tools-sdk.js' @@ -48,6 +48,7 @@ import { TransportStateEventNames, TransportTaskEventNames, } from '../../core/domain/transport/schemas.js' +import {sanitizeProjectPath} from '../../utils/path-utils.js' import {FileContextTreeArchiveService} from '../context-tree/file-context-tree-archive-service.js' import {RuntimeSignalStore} from '../context-tree/runtime-signal-store.js' import {DreamLockService} from '../dream/dream-lock-service.js' @@ -260,9 +261,27 @@ async function start(): Promise { const sharedAllowedPaths = (sourcesData?.origins ?? []).map((o) => o.contextTreeRoot) const envConfig = getCurrentConfig() + + // Read harness config from the project's .brv/config.json. + // The AgentConfigSchema defaults to {enabled: false} when absent, + // so only override when the file explicitly sets harness fields. + let harnessConfig: Record | undefined + const projectConfigPath = join(projectPath, BRV_DIR, 'config.json') + if (existsSync(projectConfigPath)) { + try { + const raw = JSON.parse(readFileSync(projectConfigPath, 'utf8')) + if (typeof raw?.harness === 'object' && raw.harness !== null) { + harnessConfig = raw.harness + } + } catch { + // Malformed config — fall through to schema default (disabled) + } + } + const agentConfig = { apiBaseUrl: envConfig.llmBaseUrl, fileSystem: {allowedPaths: ['.', ...sharedAllowedPaths], workingDirectory: projectPath}, + ...(harnessConfig ? {harness: harnessConfig} : {}), llm: { maxIterations: 10, maxTokens: 4096, @@ -584,6 +603,20 @@ async function executeTask( break } + case 'harness-refine': { + const synthesizer = agent.getHarnessSynthesizer() + if (!synthesizer) { + result = JSON.stringify({accepted: false, reason: 'harness not enabled'}) + break + } + + const commandTypeForRefine = content as 'chat' | 'curate' | 'query' + const synthResult = await synthesizer.refineIfNeeded(sanitizeProjectPath(projectPath), commandTypeForRefine) + result = JSON.stringify(synthResult ?? {accepted: false, reason: 'refinement skipped'}) + + break + } + case 'query': { const queryResult = await queryExecutor.executeWithAgent(agent, {query: content, taskId, worktreeRoot}) result = queryResult.response diff --git a/test/helpers/in-memory-harness-store.ts b/test/helpers/in-memory-harness-store.ts index 061665d6d..884f0970a 100644 --- a/test/helpers/in-memory-harness-store.ts +++ b/test/helpers/in-memory-harness-store.ts @@ -87,6 +87,28 @@ export class InMemoryHarnessStore implements IHarnessStore { return this.scenarios.delete(key) } + async deleteScenarios(projectId: string, commandType: string): Promise { + const partition = partitionKey(projectId, commandType) + let deleted = 0 + for (const [key, scenario] of this.scenarios) { + if (partitionKey(scenario.projectId, scenario.commandType) === partition) { + this.scenarios.delete(key) + deleted++ + } + } + + return deleted + } + + async deleteVersion( + projectId: string, + commandType: string, + versionId: string, + ): Promise { + const key = compositeKey(projectId, commandType, versionId) + return this.versions.delete(key) + } + async getLatest(projectId: string, commandType: string): Promise { const matches = this.versionsForPartition(projectId, commandType) if (matches.length === 0) return undefined diff --git a/test/unit/agent/harness/harness-baseline-runner.test.ts b/test/unit/agent/harness/harness-baseline-runner.test.ts index 0cca7a87f..84b54cc2a 100644 --- a/test/unit/agent/harness/harness-baseline-runner.test.ts +++ b/test/unit/agent/harness/harness-baseline-runner.test.ts @@ -75,6 +75,8 @@ function makeStoreStub(sb: SinonSandbox): { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), deleteScenario: sb.stub(), + deleteScenarios: sb.stub(), + deleteVersion: sb.stub(), getLatest, getPin: sb.stub(), getVersion: sb.stub(), diff --git a/test/unit/agent/harness/harness-bootstrap.test.ts b/test/unit/agent/harness/harness-bootstrap.test.ts index b6b6f7e7a..d6d7e8c22 100644 --- a/test/unit/agent/harness/harness-bootstrap.test.ts +++ b/test/unit/agent/harness/harness-bootstrap.test.ts @@ -40,6 +40,8 @@ function makeStoreStub(sb: SinonSandbox): { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), deleteScenario: sb.stub(), + deleteScenarios: sb.stub(), + deleteVersion: sb.stub(), getLatest, getPin: sb.stub(), getVersion: sb.stub(), diff --git a/test/unit/agent/harness/harness-evaluator.test.ts b/test/unit/agent/harness/harness-evaluator.test.ts index 698c74053..98836239e 100644 --- a/test/unit/agent/harness/harness-evaluator.test.ts +++ b/test/unit/agent/harness/harness-evaluator.test.ts @@ -189,6 +189,8 @@ function makeStoreStub(sb: SinonSandbox): { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), deleteScenario: sb.stub(), + deleteScenarios: sb.stub(), + deleteVersion: sb.stub(), getLatest: sb.stub(), getPin: sb.stub(), getVersion: sb.stub(), diff --git a/test/unit/agent/llm/agent-llm-service-harness-wiring.test.ts b/test/unit/agent/llm/agent-llm-service-harness-wiring.test.ts index 1f73935eb..e40a26580 100644 --- a/test/unit/agent/llm/agent-llm-service-harness-wiring.test.ts +++ b/test/unit/agent/llm/agent-llm-service-harness-wiring.test.ts @@ -18,6 +18,7 @@ import {AgentLLMService} from '../../../../src/agent/infra/llm/agent-llm-service import {ByteRoverContentGenerator} from '../../../../src/agent/infra/llm/generators/byterover-content-generator.js' import {SystemPromptManager} from '../../../../src/agent/infra/system-prompt/system-prompt-manager.js' import {ToolManager} from '../../../../src/agent/infra/tools/tool-manager.js' +import {sanitizeProjectPath} from '../../../../src/server/utils/path-utils.js' // `AgentLLMService` derives `projectId` from `process.cwd()` at // construction time. Each test captures the same value inside @@ -172,10 +173,10 @@ describe('AgentLLMService.ensureHarnessReady (Phase 5 Task 5.4 wiring)', () => { beforeEach(() => { sb = createSandbox() - // Capture cwd once per test to match the service's own `process.cwd()` - // read at construction. Per-test capture keeps these tests immune to - // any other test file that calls `process.chdir()`. - projectId = process.cwd() + // The service sanitizes process.cwd() via sanitizeProjectPath before + // passing it as a store partition key (FileKeyStorage rejects path + // separators). Tests must use the same sanitized form. + projectId = sanitizeProjectPath(process.cwd()) sessionEventBus = new SessionEventBus() modeSelectedEvents = [] sessionEventBus.on('harness:mode-selected', (payload) => { diff --git a/test/unit/infra/sandbox/sandbox-service-harness-wiring.test.ts b/test/unit/infra/sandbox/sandbox-service-harness-wiring.test.ts index c8f51c9fb..a5392354e 100644 --- a/test/unit/infra/sandbox/sandbox-service-harness-wiring.test.ts +++ b/test/unit/infra/sandbox/sandbox-service-harness-wiring.test.ts @@ -137,7 +137,9 @@ describe('SandboxService — harness outcome recording', () => { expect(params.result).to.have.property('stderr') expect(params.executionTimeMs).to.be.a('number').and.to.be.at.least(0) expect(params.projectType).to.equal('typescript') - expect(params.projectId).to.equal('/my/project') + // projectId is sanitized via sanitizeProjectPath (FileKeyStorage + // rejects path separators in key segments) + expect(params.projectId).to.equal('my--project') expect(params.conversationTurn).to.equal(2) expect(params.taskDescription).to.equal('find the auth module') }) diff --git a/test/unit/oclif/commands/harness/refine.test.ts b/test/unit/oclif/commands/harness/refine.test.ts new file mode 100644 index 000000000..2e888d6f0 --- /dev/null +++ b/test/unit/oclif/commands/harness/refine.test.ts @@ -0,0 +1,94 @@ +/** + * Unit tests for `brv harness refine`. + * + * Tests the pure formatting functions and result interpretation. + * The daemon transport interaction is verified via the integration test. + */ + +import {expect} from 'chai' + +import type {SynthesisResult} from '../../../../../src/agent/infra/harness/harness-synthesizer.js' + +import { + formatRefineResult, + renderRefineText, +} from '../../../../../src/oclif/commands/harness/refine.js' + +describe('HarnessRefine command — renderRefineText + formatRefineResult', () => { + // Test 1: accepted result text output + it('renders accepted refinement with version transition and delta H', () => { + const result: SynthesisResult = { + accepted: true, + deltaH: 0.06, + fromVersionId: 'v-abc', + toVersionId: 'v-def', + } + + const text = renderRefineText(result, 1, 2) + + expect(text).to.match(/accepted/i) + expect(text).to.include('v1') + expect(text).to.include('v2') + expect(text).to.include('0.06') + }) + + // Test 2: rejected result text output with reason + it('renders rejected refinement with reason', () => { + const result: SynthesisResult = { + accepted: false, + deltaH: 0.03, + fromVersionId: 'v-abc', + reason: 'delta H was 0.03, below acceptance threshold', + } + + const text = renderRefineText(result, 1) + + expect(text).to.match(/rejected/i) + expect(text).to.include('delta H was 0.03') + }) + + // Test 3: undefined result (skipped — no parent, weak model, in-flight) + it('renders skipped refinement when result is undefined', () => { + const text = renderRefineText() + + expect(text).to.match(/no refinement|skipped|nothing to refine/i) + }) + + // Test 4: formatRefineResult produces JSON matching event payload shape + it('formatRefineResult returns JSON with accepted payload', () => { + const result: SynthesisResult = { + accepted: true, + deltaH: 0.06, + fromVersionId: 'v-abc', + toVersionId: 'v-def', + } + + const json = formatRefineResult(result, 1, 2) + + expect(json.accepted).to.equal(true) + expect(json.fromVersion).to.equal(1) + expect(json.toVersion).to.equal(2) + }) + + // Test 5: formatRefineResult with rejected result + it('formatRefineResult returns JSON with rejected payload', () => { + const result: SynthesisResult = { + accepted: false, + fromVersionId: 'v-abc', + reason: 'delta H was 0.03, below acceptance threshold', + } + + const json = formatRefineResult(result, 1) + + expect(json.accepted).to.equal(false) + expect(json.reason).to.include('delta H') + }) + + // Test 6: formatRefineResult with undefined result (skipped) + it('formatRefineResult returns JSON with skipped payload', () => { + const json = formatRefineResult() + + expect(json.accepted).to.equal(false) + expect(json.reason).to.match(/skipped|no refinement/i) + }) +}) diff --git a/test/unit/oclif/commands/harness/reset.test.ts b/test/unit/oclif/commands/harness/reset.test.ts new file mode 100644 index 000000000..b4c1bb9b6 --- /dev/null +++ b/test/unit/oclif/commands/harness/reset.test.ts @@ -0,0 +1,176 @@ +/** + * Unit tests for `brv harness reset`. + * + * Tests the pure logic functions (countArtifacts, executeReset, + * renderResetText) extracted from the command. The oclif `run()` method + * and interactive prompt are tested via the integration test. + */ + +import {expect} from 'chai' +import {createSandbox, type SinonSandbox, type SinonStub} from 'sinon' + +import type { + EvaluationScenario, + HarnessVersion, +} from '../../../../../src/agent/core/domain/harness/types.js' +import type {IHarnessStore} from '../../../../../src/agent/core/interfaces/i-harness-store.js' + +import { + countArtifacts, + executeReset, + renderResetText, +} from '../../../../../src/oclif/commands/harness/reset.js' + +const PROJECT_ID = '/fixture/proj' + +function makeVersion(overrides: Partial = {}): HarnessVersion { + return { + code: 'exports.curate = async () => {}', + commandType: 'curate', + createdAt: 1_700_000_000_000, + heuristic: 0.5, + id: 'v-abc', + metadata: { + capabilities: ['curate'], + commandType: 'curate', + projectPatterns: ['**/*'], + version: 1, + }, + projectId: PROJECT_ID, + projectType: 'typescript', + version: 1, + ...overrides, + } +} + +function makeScenario(overrides: Partial = {}): EvaluationScenario { + return { + code: 'ctx.tools.curate([])', + commandType: 'curate', + createdAt: 1_700_000_000_000, + expectedBehavior: 'Succeeds without errors', + id: 's-abc', + projectId: PROJECT_ID, + projectType: 'typescript', + taskDescription: 'Test scenario', + ...overrides, + } +} + +function makeStoreStub(sb: SinonSandbox): IHarnessStore { + return { + deleteOutcome: sb.stub(), + deleteOutcomes: sb.stub().resolves(0), + deleteScenario: sb.stub(), + deleteScenarios: sb.stub().resolves(0), + deleteVersion: sb.stub().resolves(true), + getLatest: sb.stub(), + getPin: sb.stub().resolves(), + getVersion: sb.stub(), + listOutcomes: sb.stub().resolves([]), + listScenarios: sb.stub().resolves([]), + listVersions: sb.stub().resolves([]), + pruneOldVersions: sb.stub(), + recordFeedback: sb.stub(), + saveOutcome: sb.stub(), + saveScenario: sb.stub(), + saveVersion: sb.stub(), + setPin: sb.stub().resolves(), + } satisfies IHarnessStore +} + +describe('HarnessReset command — countArtifacts + executeReset + renderResetText', () => { + let sb: SinonSandbox + + beforeEach(() => { + sb = createSandbox() + }) + + afterEach(() => { + sb.restore() + }) + + // Test 1: reset when nothing exists → counts are all zero + it('countArtifacts returns zero counts when no artifacts exist', async () => { + const store = makeStoreStub(sb) + + const counts = await countArtifacts(store, PROJECT_ID, 'curate') + + expect(counts.outcomes).to.equal(0) + expect(counts.scenarios).to.equal(0) + expect(counts.versions).to.equal(0) + }) + + // Test 2: countArtifacts returns correct counts from store + it('countArtifacts returns correct counts from store queries', async () => { + const store = makeStoreStub(sb) + ;(store.listVersions as SinonStub).resolves([ + makeVersion({id: 'v-1', version: 1}), + makeVersion({id: 'v-2', version: 2}), + ]) + ;(store.listOutcomes as SinonStub).resolves([{id: 'o-1'}, {id: 'o-2'}, {id: 'o-3'}]) + ;(store.listScenarios as SinonStub).resolves([makeScenario()]) + + const counts = await countArtifacts(store, PROJECT_ID, 'curate') + + expect(counts.versions).to.equal(2) + expect(counts.outcomes).to.equal(3) + expect(counts.scenarios).to.equal(1) + }) + + // Test 3: executeReset deletes versions + outcomes + scenarios in order + it('executeReset deletes outcomes, scenarios, and versions', async () => { + const store = makeStoreStub(sb) + const v1 = makeVersion({id: 'v-1', version: 1}) + const v2 = makeVersion({id: 'v-2', version: 2}) + ;(store.listVersions as SinonStub).resolves([v2, v1]) + ;(store.deleteOutcomes as SinonStub).resolves(5) + ;(store.deleteScenarios as SinonStub).resolves(3) + ;(store.deleteVersion as SinonStub).resolves(true) + + const result = await executeReset(store, PROJECT_ID, 'curate') + + expect(result.outcomes).to.equal(5) + expect(result.scenarios).to.equal(3) + expect(result.versions).to.equal(2) + + // deleteOutcomes called + expect((store.deleteOutcomes as SinonStub).calledOnce).to.equal(true) + expect((store.deleteOutcomes as SinonStub).firstCall.args).to.deep.equal([PROJECT_ID, 'curate']) + + // deleteScenarios called + expect((store.deleteScenarios as SinonStub).calledOnce).to.equal(true) + + // deleteVersion called for each version + expect((store.deleteVersion as SinonStub).callCount).to.equal(2) + expect((store.deleteVersion as SinonStub).firstCall.args).to.deep.equal([PROJECT_ID, 'curate', 'v-2']) + expect((store.deleteVersion as SinonStub).secondCall.args).to.deep.equal([PROJECT_ID, 'curate', 'v-1']) + }) + + // Test 4: executeReset with nothing to delete returns zero counts + it('executeReset with empty store returns zero counts', async () => { + const store = makeStoreStub(sb) + + const result = await executeReset(store, PROJECT_ID, 'curate') + + expect(result.outcomes).to.equal(0) + expect(result.scenarios).to.equal(0) + expect(result.versions).to.equal(0) + }) + + // Test 5: renderResetText with deletions shows counts + it('renderResetText shows deletion counts', () => { + const text = renderResetText({outcomes: 47, scenarios: 12, versions: 3}) + + expect(text).to.include('3') + expect(text).to.include('47') + expect(text).to.include('12') + }) + + // Test 6: renderResetText with nothing deleted shows appropriate message + it('renderResetText with zero counts shows nothing-to-delete message', () => { + const text = renderResetText({outcomes: 0, scenarios: 0, versions: 0}) + + expect(text).to.match(/nothing to delete/i) + }) +}) diff --git a/test/unit/oclif/commands/harness/status.test.ts b/test/unit/oclif/commands/harness/status.test.ts index 08f58f27b..94d40772b 100644 --- a/test/unit/oclif/commands/harness/status.test.ts +++ b/test/unit/oclif/commands/harness/status.test.ts @@ -56,6 +56,8 @@ function makeStoreStub(sb: SinonSandbox): IHarnessStore { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), deleteScenario: sb.stub(), + deleteScenarios: sb.stub(), + deleteVersion: sb.stub(), getLatest: sb.stub(), getPin: sb.stub(), getVersion: sb.stub(), diff --git a/test/unit/oclif/commands/harness/use.test.ts b/test/unit/oclif/commands/harness/use.test.ts index cb95e6050..408f90d5a 100644 --- a/test/unit/oclif/commands/harness/use.test.ts +++ b/test/unit/oclif/commands/harness/use.test.ts @@ -37,6 +37,8 @@ function makeStoreStub(sb: SinonSandbox): IHarnessStore { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), deleteScenario: sb.stub(), + deleteScenarios: sb.stub().resolves(0), + deleteVersion: sb.stub().resolves(true), getLatest: sb.stub(), getPin: sb.stub(), getVersion: sb.stub(), diff --git a/test/unit/oclif/lib/resolve-version-ref.test.ts b/test/unit/oclif/lib/resolve-version-ref.test.ts index 19001f613..65f9cde7a 100644 --- a/test/unit/oclif/lib/resolve-version-ref.test.ts +++ b/test/unit/oclif/lib/resolve-version-ref.test.ts @@ -37,6 +37,8 @@ function makeStoreStub(sb: SinonSandbox): IHarnessStore { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), deleteScenario: sb.stub(), + deleteScenarios: sb.stub(), + deleteVersion: sb.stub(), getLatest: sb.stub(), getPin: sb.stub(), getVersion: sb.stub(), From d31174858e52698d8b40ce54f074b39dce2e31a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Thu=E1=BA=ADn=20Ph=C3=A1t?= Date: Thu, 23 Apr 2026 10:57:43 +0700 Subject: [PATCH 2/4] refactor: [ENG-2323] address review-agent feedback on reset + refine - Fix indentation on static override flags in reset.ts and refine.ts - Add deletePin to IHarnessStore + HarnessStore + InMemoryHarnessStore - Wire deletePin into executeReset (spec: "Clear pin if present") - Add deletion order assertion (deleteOutcomes before deleteVersion) - Tighten renderResetText assertions to check labels not just numbers - Exit 2 in JSON mode when harness not enabled (consistent exit codes) - Add deletePin stubs to all test files with satisfies IHarnessStore --- src/agent/core/interfaces/i-harness-store.ts | 7 +++++++ src/agent/infra/harness/harness-store.ts | 12 ++++++++++-- src/oclif/commands/harness/refine.ts | 3 ++- src/oclif/commands/harness/reset.ts | 4 +++- test/helpers/in-memory-harness-store.ts | 4 ++++ .../agent/harness/harness-baseline-runner.test.ts | 1 + test/unit/agent/harness/harness-bootstrap.test.ts | 1 + test/unit/agent/harness/harness-evaluator.test.ts | 1 + test/unit/oclif/commands/harness/reset.test.ts | 13 ++++++++++--- test/unit/oclif/commands/harness/status.test.ts | 1 + test/unit/oclif/commands/harness/use.test.ts | 1 + test/unit/oclif/lib/resolve-version-ref.test.ts | 1 + 12 files changed, 42 insertions(+), 7 deletions(-) diff --git a/src/agent/core/interfaces/i-harness-store.ts b/src/agent/core/interfaces/i-harness-store.ts index 199dec2fc..82190e6df 100644 --- a/src/agent/core/interfaces/i-harness-store.ts +++ b/src/agent/core/interfaces/i-harness-store.ts @@ -66,6 +66,13 @@ export interface IHarnessStore { */ deleteOutcomes(projectId: string, commandType: string): Promise + /** + * Remove the pin for a `(projectId, commandType)` pair. + * Idempotent — returns `true` if a pin existed and was removed, + * `false` if no pin was set. + */ + deletePin(projectId: string, commandType: string): Promise + /** * Delete a single scenario by its `(projectId, commandType, scenarioId)` key. * Returns `true` when the scenario existed and was deleted; `false` on miss. diff --git a/src/agent/infra/harness/harness-store.ts b/src/agent/infra/harness/harness-store.ts index fc4661b9c..2a73ceb44 100644 --- a/src/agent/infra/harness/harness-store.ts +++ b/src/agent/infra/harness/harness-store.ts @@ -119,6 +119,14 @@ export class HarnessStore implements IHarnessStore { // ── scenarios ───────────────────────────────────────────────────────────── + async deletePin(projectId: string, commandType: string): Promise { + const key = this.pinKey(projectId, commandType) + const exists = await this.keyStorage.get(key) + if (exists === undefined) return false + await this.keyStorage.delete(key) + return true + } + async deleteScenario( projectId: string, commandType: string, @@ -147,6 +155,8 @@ export class HarnessStore implements IHarnessStore { return false } + // ── versions ─────────────────────────────────────────────────────────────── + async deleteScenarios(projectId: string, commandType: string): Promise { const keys: StorageKey[] = [] for (const projectType of ProjectTypeSchema.options) { @@ -174,8 +184,6 @@ export class HarnessStore implements IHarnessStore { return keys.length } - // ── versions ─────────────────────────────────────────────────────────────── - async deleteVersion( projectId: string, commandType: string, diff --git a/src/oclif/commands/harness/refine.ts b/src/oclif/commands/harness/refine.ts index 4109f3ff8..251240646 100644 --- a/src/oclif/commands/harness/refine.ts +++ b/src/oclif/commands/harness/refine.ts @@ -89,7 +89,7 @@ export function formatRefineResult( export default class HarnessRefine extends Command { static override description = 'Force-trigger a refinement attempt for a pair' -static override flags = { + static override flags = { commandType: Flags.string({ default: 'curate', description: 'Harness pair command type', @@ -177,6 +177,7 @@ static override flags = { if (synthesisResult?.reason === 'harness not enabled') { if (format === 'json') { this.log(JSON.stringify({accepted: false, error: 'harness not enabled'}, null, 2)) + this.exit(2) } else { this.error('Harness is not enabled — configure harness.enabled in .brv/config.json', {exit: 2}) } diff --git a/src/oclif/commands/harness/reset.ts b/src/oclif/commands/harness/reset.ts index 0a17eab5f..305f6d536 100644 --- a/src/oclif/commands/harness/reset.ts +++ b/src/oclif/commands/harness/reset.ts @@ -72,6 +72,8 @@ export async function executeReset( await store.deleteVersion(projectId, commandType, v.id) } + await store.deletePin(projectId, commandType) + return { outcomes: outcomesDeleted, scenarios: scenariosDeleted, @@ -98,7 +100,7 @@ export function renderResetText(counts: ResetArtifactCounts): string { export default class HarnessReset extends Command { static override description = 'Delete all harness state for a (project, commandType) pair' -static override flags = { + static override flags = { commandType: Flags.string({ default: 'curate', description: 'Harness pair command type', diff --git a/test/helpers/in-memory-harness-store.ts b/test/helpers/in-memory-harness-store.ts index 884f0970a..108df96ed 100644 --- a/test/helpers/in-memory-harness-store.ts +++ b/test/helpers/in-memory-harness-store.ts @@ -78,6 +78,10 @@ export class InMemoryHarnessStore implements IHarnessStore { return deleted } + async deletePin(projectId: string, commandType: string): Promise { + return this.pins.delete(partitionKey(projectId, commandType)) + } + async deleteScenario( projectId: string, commandType: string, diff --git a/test/unit/agent/harness/harness-baseline-runner.test.ts b/test/unit/agent/harness/harness-baseline-runner.test.ts index 84b54cc2a..c61a5bf47 100644 --- a/test/unit/agent/harness/harness-baseline-runner.test.ts +++ b/test/unit/agent/harness/harness-baseline-runner.test.ts @@ -74,6 +74,7 @@ function makeStoreStub(sb: SinonSandbox): { const store = { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), + deletePin: sb.stub().resolves(false), deleteScenario: sb.stub(), deleteScenarios: sb.stub(), deleteVersion: sb.stub(), diff --git a/test/unit/agent/harness/harness-bootstrap.test.ts b/test/unit/agent/harness/harness-bootstrap.test.ts index d6d7e8c22..8ece78f89 100644 --- a/test/unit/agent/harness/harness-bootstrap.test.ts +++ b/test/unit/agent/harness/harness-bootstrap.test.ts @@ -39,6 +39,7 @@ function makeStoreStub(sb: SinonSandbox): { const store = { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), + deletePin: sb.stub().resolves(false), deleteScenario: sb.stub(), deleteScenarios: sb.stub(), deleteVersion: sb.stub(), diff --git a/test/unit/agent/harness/harness-evaluator.test.ts b/test/unit/agent/harness/harness-evaluator.test.ts index 98836239e..dc8f17da2 100644 --- a/test/unit/agent/harness/harness-evaluator.test.ts +++ b/test/unit/agent/harness/harness-evaluator.test.ts @@ -188,6 +188,7 @@ function makeStoreStub(sb: SinonSandbox): { const store = { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), + deletePin: sb.stub().resolves(false), deleteScenario: sb.stub(), deleteScenarios: sb.stub(), deleteVersion: sb.stub(), diff --git a/test/unit/oclif/commands/harness/reset.test.ts b/test/unit/oclif/commands/harness/reset.test.ts index b4c1bb9b6..7926f5fde 100644 --- a/test/unit/oclif/commands/harness/reset.test.ts +++ b/test/unit/oclif/commands/harness/reset.test.ts @@ -61,6 +61,7 @@ function makeStoreStub(sb: SinonSandbox): IHarnessStore { return { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub().resolves(0), + deletePin: sb.stub().resolves(false), deleteScenario: sb.stub(), deleteScenarios: sb.stub().resolves(0), deleteVersion: sb.stub().resolves(true), @@ -145,6 +146,12 @@ describe('HarnessReset command — countArtifacts + executeReset + renderResetTe expect((store.deleteVersion as SinonStub).callCount).to.equal(2) expect((store.deleteVersion as SinonStub).firstCall.args).to.deep.equal([PROJECT_ID, 'curate', 'v-2']) expect((store.deleteVersion as SinonStub).secondCall.args).to.deep.equal([PROJECT_ID, 'curate', 'v-1']) + + // Deletion order: outcomes before versions + expect((store.deleteOutcomes as SinonStub).calledBefore(store.deleteVersion as SinonStub)).to.equal(true) + + // Pin cleared + expect((store.deletePin as SinonStub).calledOnce).to.equal(true) }) // Test 4: executeReset with nothing to delete returns zero counts @@ -162,9 +169,9 @@ describe('HarnessReset command — countArtifacts + executeReset + renderResetTe it('renderResetText shows deletion counts', () => { const text = renderResetText({outcomes: 47, scenarios: 12, versions: 3}) - expect(text).to.include('3') - expect(text).to.include('47') - expect(text).to.include('12') + expect(text).to.include('3 version') + expect(text).to.include('47 outcome') + expect(text).to.include('12 scenario') }) // Test 6: renderResetText with nothing deleted shows appropriate message diff --git a/test/unit/oclif/commands/harness/status.test.ts b/test/unit/oclif/commands/harness/status.test.ts index 94d40772b..70c3908c3 100644 --- a/test/unit/oclif/commands/harness/status.test.ts +++ b/test/unit/oclif/commands/harness/status.test.ts @@ -55,6 +55,7 @@ function makeStoreStub(sb: SinonSandbox): IHarnessStore { return { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), + deletePin: sb.stub().resolves(false), deleteScenario: sb.stub(), deleteScenarios: sb.stub(), deleteVersion: sb.stub(), diff --git a/test/unit/oclif/commands/harness/use.test.ts b/test/unit/oclif/commands/harness/use.test.ts index 408f90d5a..651211a3a 100644 --- a/test/unit/oclif/commands/harness/use.test.ts +++ b/test/unit/oclif/commands/harness/use.test.ts @@ -36,6 +36,7 @@ function makeStoreStub(sb: SinonSandbox): IHarnessStore { return { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), + deletePin: sb.stub().resolves(false), deleteScenario: sb.stub(), deleteScenarios: sb.stub().resolves(0), deleteVersion: sb.stub().resolves(true), diff --git a/test/unit/oclif/lib/resolve-version-ref.test.ts b/test/unit/oclif/lib/resolve-version-ref.test.ts index 65f9cde7a..068278170 100644 --- a/test/unit/oclif/lib/resolve-version-ref.test.ts +++ b/test/unit/oclif/lib/resolve-version-ref.test.ts @@ -36,6 +36,7 @@ function makeStoreStub(sb: SinonSandbox): IHarnessStore { return { deleteOutcome: sb.stub(), deleteOutcomes: sb.stub(), + deletePin: sb.stub().resolves(false), deleteScenario: sb.stub(), deleteScenarios: sb.stub(), deleteVersion: sb.stub(), From fd82cebec23ce31a1e63a51a49b02ad0855e1229 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Thu=E1=BA=ADn=20Ph=C3=A1t?= Date: Thu, 23 Apr 2026 11:05:47 +0700 Subject: [PATCH 3/4] refactor: [ENG-2323] address second review round on refine command - Propagate waitForTaskCompletion rejection via .catch(reject) to prevent completionPromise from hanging on timeout/disconnect - Exit code 2 in JSON error catch block (was exiting 0) --- src/oclif/commands/harness/refine.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/oclif/commands/harness/refine.ts b/src/oclif/commands/harness/refine.ts index 251240646..b4c6dfa9b 100644 --- a/src/oclif/commands/harness/refine.ts +++ b/src/oclif/commands/harness/refine.ts @@ -141,7 +141,7 @@ export default class HarnessRefine extends Command { timeoutMs: 60_000, }, () => {}, - ) + ).catch(reject) }) await client.requestWithAck(TaskEvents.CREATE, { @@ -158,6 +158,7 @@ export default class HarnessRefine extends Command { const message = error instanceof Error ? error.message : String(error) if (format === 'json') { this.log(JSON.stringify({accepted: false, error: message}, null, 2)) + this.exit(2) } else { this.error(`Refinement failed: ${message}`, {exit: 2}) } From d4a148db96a49dd5f9d5fd9e90789f4a7884f6ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Thu=E1=BA=ADn=20Ph=C3=A1t?= Date: Thu, 23 Apr 2026 11:14:45 +0700 Subject: [PATCH 4/4] refactor: [ENG-2323] address remaining review suggestions - Extract HARNESS_NOT_ENABLED_REASON shared constant to eliminate stringly-typed cross-process protocol between agent-process and refine command - Add runtime shape guard on JSON.parse result (check 'accepted' key) before casting to SynthesisResult - Replace Number.MAX_SAFE_INTEGER with bounded 10_000 limit in countArtifacts (recorder caps at ~200 per pair) - Add toVersion inference comment documenting the assumption --- src/oclif/commands/harness/refine.ts | 15 +++++++++++---- src/oclif/commands/harness/reset.ts | 3 ++- src/server/infra/daemon/agent-process.ts | 3 ++- src/shared/constants/harness.ts | 7 +++++++ 4 files changed, 22 insertions(+), 6 deletions(-) create mode 100644 src/shared/constants/harness.ts diff --git a/src/oclif/commands/harness/refine.ts b/src/oclif/commands/harness/refine.ts index b4c6dfa9b..6e0d11e52 100644 --- a/src/oclif/commands/harness/refine.ts +++ b/src/oclif/commands/harness/refine.ts @@ -16,6 +16,7 @@ import {randomUUID} from 'node:crypto' import type {SynthesisResult} from '../../../agent/infra/harness/harness-synthesizer.js' import {resolveProject} from '../../../server/infra/project/resolve-project.js' +import {HARNESS_NOT_ENABLED_REASON} from '../../../shared/constants/harness.js' import {TaskEvents} from '../../../shared/transport/events/index.js' import {withDaemonRetry} from '../../lib/daemon-client.js' import { @@ -166,18 +167,22 @@ export default class HarnessRefine extends Command { return } - // Parse the result from the agent process + // Parse the result from the agent process with minimal shape guard let synthesisResult: SynthesisResult | undefined try { - synthesisResult = taskResult ? JSON.parse(taskResult) as SynthesisResult : undefined + const parsed: unknown = taskResult ? JSON.parse(taskResult) : undefined + synthesisResult = + parsed !== null && typeof parsed === 'object' && 'accepted' in parsed + ? (parsed as SynthesisResult) + : undefined } catch { synthesisResult = undefined } // Synthesizer unavailable — exit 2 per spec - if (synthesisResult?.reason === 'harness not enabled') { + if (synthesisResult?.reason === HARNESS_NOT_ENABLED_REASON) { if (format === 'json') { - this.log(JSON.stringify({accepted: false, error: 'harness not enabled'}, null, 2)) + this.log(JSON.stringify({accepted: false, error: HARNESS_NOT_ENABLED_REASON}, null, 2)) this.exit(2) } else { this.error('Harness is not enabled — configure harness.enabled in .brv/config.json', {exit: 2}) @@ -186,6 +191,8 @@ export default class HarnessRefine extends Command { return } + // toVersion inferred — SynthesisResult carries toVersionId (UUID) but + // not the numeric version. Safe for strictly sequential refinement. const toVersion = synthesisResult?.accepted ? (fromVersion === undefined ? undefined : fromVersion + 1) : undefined if (format === 'json') { diff --git a/src/oclif/commands/harness/reset.ts b/src/oclif/commands/harness/reset.ts index 305f6d536..a3d842a64 100644 --- a/src/oclif/commands/harness/reset.ts +++ b/src/oclif/commands/harness/reset.ts @@ -43,7 +43,8 @@ export async function countArtifacts( ): Promise { const [versions, outcomes, scenarios] = await Promise.all([ store.listVersions(projectId, commandType), - store.listOutcomes(projectId, commandType, Number.MAX_SAFE_INTEGER), + // Recorder caps at ~200 outcomes per pair, so this is effectively "all". + store.listOutcomes(projectId, commandType, 10_000), store.listScenarios(projectId, commandType), ]) diff --git a/src/server/infra/daemon/agent-process.ts b/src/server/infra/daemon/agent-process.ts index f79aedf8b..acbee5b10 100644 --- a/src/server/infra/daemon/agent-process.ts +++ b/src/server/infra/daemon/agent-process.ts @@ -36,6 +36,7 @@ import {FolderPackService} from '../../../agent/infra/folder-pack/folder-pack-se import {SessionMetadataStore} from '../../../agent/infra/session/session-metadata-store.js' import {FileKeyStorage} from '../../../agent/infra/storage/file-key-storage.js' import {createSearchKnowledgeService} from '../../../agent/infra/tools/implementations/search-knowledge-service.js' +import {HARNESS_NOT_ENABLED_REASON} from '../../../shared/constants/harness.js' import {AuthEvents} from '../../../shared/transport/events/auth-events.js' import {decodeSearchContent} from '../../../shared/transport/search-content.js' import {getCurrentConfig} from '../../config/environment.js' @@ -606,7 +607,7 @@ async function executeTask( case 'harness-refine': { const synthesizer = agent.getHarnessSynthesizer() if (!synthesizer) { - result = JSON.stringify({accepted: false, reason: 'harness not enabled'}) + result = JSON.stringify({accepted: false, reason: HARNESS_NOT_ENABLED_REASON}) break } diff --git a/src/shared/constants/harness.ts b/src/shared/constants/harness.ts new file mode 100644 index 000000000..6e5638482 --- /dev/null +++ b/src/shared/constants/harness.ts @@ -0,0 +1,7 @@ +/** + * Shared harness constants used across process boundaries + * (agent-process ↔ CLI commands). + */ + +/** Reason string returned by agent-process when harness is not enabled. */ +export const HARNESS_NOT_ENABLED_REASON = 'harness not enabled' as const