Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/agent/infra/harness/harness-outcome-recorder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class Semaphore {
// ---------------------------------------------------------------------------

/** Synthetic outcome count per verdict (§C2 weighting policy). */
const BAD_SYNTHETIC_COUNT = 3
export const BAD_SYNTHETIC_COUNT = 3

/**
* Maximum feedback-sourced synthetic outcomes in the H window.
Expand All @@ -105,7 +105,7 @@ const FEEDBACK_SYNTHETIC_CAP = 10
const FEEDBACK_LIST_LIMIT = 100

/** Synthetic outcome count for 'good' verdict — asymmetric with BAD (3:1) per §C2. */
const GOOD_SYNTHETIC_COUNT = 1
export const GOOD_SYNTHETIC_COUNT = 1

/** H window size — matches the synthesizer's OUTCOMES_WINDOW. */
const H_WINDOW_SIZE = 50
Expand All @@ -114,7 +114,7 @@ const MAX_OUTCOMES_PER_SESSION = 50
const SEMAPHORE_PERMITS = 5

/** Delimiter between the original outcome ID and the synthetic suffix. */
const SYNTHETIC_DELIMITER = '__synthetic_'
export const SYNTHETIC_DELIMITER = '__synthetic_'

// ---------------------------------------------------------------------------
// Recorder
Expand Down
102 changes: 102 additions & 0 deletions src/oclif/commands/curate/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,18 @@ import {
providerMissingMessage,
withDaemonRetry,
} from '../../lib/daemon-client.js'
import {
attachFeedbackFromCli,
FeedbackError,
type FeedbackVerdict,
} from '../../lib/harness-feedback.js'
import {writeJsonResponse} from '../../lib/json-response.js'
import {DEFAULT_TIMEOUT_SECONDS, MAX_TIMEOUT_SECONDS, MIN_TIMEOUT_SECONDS, type ToolCallRecord, waitForTaskCompletion} from '../../lib/task-client.js'

/** Parsed flags type */
type CurateFlags = {
detach?: boolean
feedback?: FeedbackVerdict
files?: string[]
folder?: string[]
format?: 'json' | 'text'
Expand Down Expand Up @@ -72,6 +78,11 @@ Bad examples:
default: false,
description: 'Queue task and exit without waiting for completion',
}),
feedback: Flags.string({
description:
'After the curate completes, flag the most-recent outcome for AutoHarness learning. "bad" inserts 3 synthetic failures (weighted heavier); "good" inserts 1 synthetic success.',
options: ['good', 'bad'],
}),
files: Flags.string({
char: 'f',
description: 'Include specific file paths for critical context (max 5 files)',
Expand Down Expand Up @@ -101,8 +112,14 @@ Bad examples:

public async run(): Promise<void> {
const {args, flags: rawFlags} = await this.parse(Curate)
// oclif's `options: ['good', 'bad']` validator rejects anything
// else before we reach here, so the cast is type-narrowing, not
// input validation.
const feedbackVerdict: FeedbackVerdict | undefined =
rawFlags.feedback === 'good' || rawFlags.feedback === 'bad' ? rawFlags.feedback : undefined
const flags: CurateFlags = {
detach: rawFlags.detach,
feedback: feedbackVerdict,
files: rawFlags.files,
folder: rawFlags.folder,
format: rawFlags.format === 'json' ? 'json' : rawFlags.format === 'text' ? 'text' : undefined,
Expand All @@ -121,9 +138,19 @@ Bad examples:

let providerContext: ProviderErrorContext | undefined

// Capture projectRoot out of the daemon callback so feedback can
// run AFTER withDaemonRetry resolves. If feedback ran inside the
// callback, `this.error(..., {exit: 1})` would be caught by the
// outer try/catch below and routed to `reportError`, which
// swallows the exit code — the CLI would exit 0 on a
// NO_RECENT_OUTCOME path.
let capturedProjectRoot: string | undefined
let daemonSucceeded = false
try {
await withDaemonRetry(
async (client, projectRoot, worktreeRoot) => {
capturedProjectRoot = projectRoot

const active = await client.requestWithAck<ProviderConfigResponse>(
TransportStateEventNames.GET_PROVIDER_CONFIG,
)
Expand All @@ -140,6 +167,7 @@ Bad examples:
}

await this.submitTask({client, content: resolvedContent, flags, format, projectRoot, taskType, worktreeRoot})
daemonSucceeded = true
},
{
...this.getDaemonClientOptions(),
Expand All @@ -152,6 +180,16 @@ Bad examples:
)
} catch (error) {
this.reportError(error, format, providerContext)
return
}

// Feedback attaches only on a successful primary run.
if (
daemonSucceeded &&
flags.feedback !== undefined &&
capturedProjectRoot !== undefined
) {
await this.handleFeedback(capturedProjectRoot, flags, format)
}
}

Expand Down Expand Up @@ -247,6 +285,70 @@ Bad examples:
return filePath.slice(idx + marker.length)
}

/**
* Attach the `--feedback` verdict to the most-recent curate outcome.
*
* Surface contract (handoff §C1):
* - HARNESS_DISABLED → warn, exit 0 (primary curate already succeeded)
* - NO_RECENT_OUTCOME / NO_STORAGE → `this.error` with exit 1
* - detach mode → skipped with a hint (no outcome to flag yet)
*/
private async handleFeedback(
projectRoot: string,
flags: CurateFlags,
format: 'json' | 'text',
): Promise<void> {
if (flags.feedback === undefined) return

if (flags.detach === true) {
if (format === 'text') {
this.warn('--feedback skipped: detach mode — no completed outcome to flag yet.')
}

return
}

try {
const result = await attachFeedbackFromCli(projectRoot, 'curate', flags.feedback)
if (format === 'json') {
writeJsonResponse({
command: 'curate:feedback',
data: {
outcomeId: result.outcomeId,
syntheticCount: result.syntheticCount,
verdict: result.verdict,
},
success: true,
})
} else {
this.log(
`feedback attached: ${result.verdict} → outcome ${result.outcomeId} (${result.syntheticCount} synthetic row${result.syntheticCount === 1 ? '' : 's'} inserted for heuristic weighting)`,
)
}
} catch (error) {
if (error instanceof FeedbackError) {
if (error.code === 'HARNESS_DISABLED') {
if (format === 'json') {
writeJsonResponse({
command: 'curate:feedback',
data: {reason: error.message, skipped: true},
success: true,
})
} else {
this.warn(`--feedback ignored: ${error.message}`)
}

return
}

// NO_RECENT_OUTCOME / NO_STORAGE — user-input error per §C1.
this.error(error.message, {exit: 1})
}
Comment thread
danhdoan marked this conversation as resolved.

throw error
}
}

/**
* Print a human-readable pending review summary to stdout.
* Called after successful curate completion when review is required.
Expand Down
95 changes: 93 additions & 2 deletions src/oclif/commands/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,17 @@ import {
providerMissingMessage,
withDaemonRetry,
} from '../lib/daemon-client.js'
import {
attachFeedbackFromCli,
FeedbackError,
type FeedbackVerdict,
} from '../lib/harness-feedback.js'
import {writeJsonResponse} from '../lib/json-response.js'
import {DEFAULT_TIMEOUT_SECONDS, MAX_TIMEOUT_SECONDS, MIN_TIMEOUT_SECONDS, waitForTaskCompletion} from '../lib/task-client.js'

/** Parsed flags type */
type QueryFlags = {
feedback?: FeedbackVerdict
format?: 'json' | 'text'
timeout?: number
}
Expand Down Expand Up @@ -46,6 +52,11 @@ Bad:
'<%= config.bin %> <%= command.id %> "How does auth work?" --format json',
]
public static flags = {
feedback: Flags.string({
description:
'After the query completes, flag the most-recent outcome for AutoHarness learning. "bad" inserts 3 synthetic failures (weighted heavier); "good" inserts 1 synthetic success.',
options: ['good', 'bad'],
}),
format: Flags.string({
default: 'text',
description: 'Output format (text or json)',
Expand All @@ -66,16 +77,33 @@ Bad:

public async run(): Promise<void> {
const {args, flags: rawFlags} = await this.parse(Query)
const flags = rawFlags as QueryFlags
const format = (flags.format ?? 'text') as 'json' | 'text'
// oclif's `options:` validators reject unknown values before we
// reach here; the branches below narrow the string types without
// `as` casts (matching the curate command's pattern).
const feedbackVerdict: FeedbackVerdict | undefined =
rawFlags.feedback === 'good' || rawFlags.feedback === 'bad' ? rawFlags.feedback : undefined
const flags: QueryFlags = {
feedback: feedbackVerdict,
format: rawFlags.format === 'json' ? 'json' : rawFlags.format === 'text' ? 'text' : undefined,
timeout: rawFlags.timeout,
}
const format: 'json' | 'text' = flags.format ?? 'text'

if (!this.validateInput(args.query, format)) return

let providerContext: ProviderErrorContext | undefined
// Captured from the daemon callback so feedback runs AFTER
// withDaemonRetry resolves. Running it inside the callback would
// let `this.error({exit: 1})` get caught by the outer try/catch
// and routed to `reportError`, which swallows the exit code.
let capturedProjectRoot: string | undefined
let daemonSucceeded = false

try {
await withDaemonRetry(
async (client, projectRoot, worktreeRoot) => {
capturedProjectRoot = projectRoot

const active = await client.requestWithAck<ProviderConfigResponse>(
TransportStateEventNames.GET_PROVIDER_CONFIG,
)
Expand All @@ -99,6 +127,7 @@ Bad:
timeoutMs: (flags.timeout ?? DEFAULT_TIMEOUT_SECONDS) * 1000,
worktreeRoot,
})
daemonSucceeded = true
},
{
...this.getDaemonClientOptions(),
Expand All @@ -111,6 +140,68 @@ Bad:
)
} catch (error) {
this.reportError(error, format, providerContext)
return
}

if (
daemonSucceeded &&
feedbackVerdict !== undefined &&
capturedProjectRoot !== undefined
) {
await this.handleFeedback(capturedProjectRoot, feedbackVerdict, format)
}
}

/**
* Attach the `--feedback` verdict to the most-recent query outcome.
*
* Surface contract (handoff §C1):
* - HARNESS_DISABLED → warn, exit 0 (primary query already succeeded)
* - NO_RECENT_OUTCOME / NO_STORAGE → `this.error` with exit 1
*/
private async handleFeedback(
projectRoot: string,
verdict: FeedbackVerdict,
format: 'json' | 'text',
): Promise<void> {
try {
const result = await attachFeedbackFromCli(projectRoot, 'query', verdict)
if (format === 'json') {
writeJsonResponse({
command: 'query:feedback',
data: {
outcomeId: result.outcomeId,
syntheticCount: result.syntheticCount,
verdict: result.verdict,
},
success: true,
})
} else {
this.log(
`feedback attached: ${result.verdict} → outcome ${result.outcomeId} (${result.syntheticCount} synthetic row${result.syntheticCount === 1 ? '' : 's'} inserted for heuristic weighting)`,
)
}
} catch (error) {
if (error instanceof FeedbackError) {
if (error.code === 'HARNESS_DISABLED') {
if (format === 'json') {
writeJsonResponse({
command: 'query:feedback',
data: {reason: error.message, skipped: true},
success: true,
})
} else {
this.warn(`--feedback ignored: ${error.message}`)
}

return
}

// NO_RECENT_OUTCOME / NO_STORAGE — user-input error per §C1.
this.error(error.message, {exit: 1})
}

throw error
}
}

Expand Down
Loading
Loading