diff --git a/.gitignore b/.gitignore index 183f403..b471cb7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ .overnight/ .relay/ node_modules -dist/ +**/dist/ +**/types.d.ts diff --git a/packages/connectivity/src/types.d.ts b/packages/connectivity/src/types.d.ts deleted file mode 100644 index 5279555..0000000 --- a/packages/connectivity/src/types.d.ts +++ /dev/null @@ -1,88 +0,0 @@ -export type SignalAudience = 'self' | 'coordinator' | 'selected' | 'all'; -export type MessageClass = 'attention' | 'confidence' | 'conflict' | 'handoff' | 'escalation'; -export type SignalClass = 'attention.raise' | 'confidence.high' | 'confidence.medium' | 'confidence.low' | 'confidence.blocker' | 'conflict.active' | 'conflict.resolved' | 'handoff.ready' | 'handoff.partial' | 'escalation.interrupt' | 'escalation.uncertainty'; -export type SignalPriority = 'low' | 'normal' | 'high' | 'critical'; -export type SignalState = 'emitted' | 'active' | 'superseded' | 'expired' | 'resolved'; -export type SignalEvent = 'emitted' | 'superseded' | 'resolved' | 'expired'; -export type RequestedRoutingMode = 'cheap' | 'fast' | 'deep'; -export interface ConnectivitySignal { - id: string; - threadId: string; - source: string; - audience: SignalAudience; - messageClass: MessageClass; - signalClass: SignalClass; - priority: SignalPriority; - confidence?: number; - summary: string; - details?: string; - replaces?: string; - expiresAtStep?: number; - emittedAt: string; - state: SignalState; -} -export interface EmitSignalInput { - threadId: string; - source: string; - audience: SignalAudience; - messageClass: MessageClass; - signalClass: SignalClass; - priority: SignalPriority; - summary: string; - confidence?: number; - details?: string; - replaces?: string; - expiresAtStep?: number; -} -export interface SignalQuery { - threadId: string; - source?: string; - messageClass?: MessageClass | MessageClass[]; - signalClass?: SignalClass | SignalClass[]; - state?: SignalState | SignalState[]; - priority?: SignalPriority | SignalPriority[]; - since?: string; - before?: string; - limit?: number; - order?: 'newest' | 'oldest'; -} -export interface SuppressionConfig { - basis: 'step' | 'time'; - windowMs?: number; -} -export interface RoutingEscalationHook { - onEscalation(signal: ConnectivitySignal): RequestedRoutingMode | void; -} -export type SelectedAudienceResolver = (signal: ConnectivitySignal) => string[]; -export type SignalCallback = (signal: ConnectivitySignal, event: SignalEvent) => void; -export interface ConnectivityLayerConfig { - suppressionConfig?: SuppressionConfig; - routingEscalationHook?: RoutingEscalationHook; -} -export interface ConnectivityLayer { - emit(input: EmitSignalInput): ConnectivitySignal; - resolve(signalId: string): ConnectivitySignal; - get(signalId: string): ConnectivitySignal | null; - query(query: SignalQuery): ConnectivitySignal[]; - advanceStep(threadId: string): void; - registerSelectedResolver(resolver: SelectedAudienceResolver): void; - onSignal(callback: SignalCallback): void; - offSignal(callback: SignalCallback): void; -} -export declare const SIGNAL_AUDIENCES: readonly ["self", "coordinator", "selected", "all"]; -export declare const MESSAGE_CLASSES: readonly ["attention", "confidence", "conflict", "handoff", "escalation"]; -export declare const SIGNAL_CLASSES: readonly ["attention.raise", "confidence.high", "confidence.medium", "confidence.low", "confidence.blocker", "conflict.active", "conflict.resolved", "handoff.ready", "handoff.partial", "escalation.interrupt", "escalation.uncertainty"]; -export declare const SIGNAL_PRIORITIES: readonly ["low", "normal", "high", "critical"]; -export declare const SIGNAL_STATES: readonly ["emitted", "active", "superseded", "expired", "resolved"]; -export declare const SIGNAL_EVENTS: readonly ["emitted", "superseded", "resolved", "expired"]; -export declare const MESSAGE_CLASS_TO_SIGNAL_PREFIX: Record; -export declare const TERMINAL_STATES: readonly ["superseded", "expired", "resolved"]; -export declare class ConnectivityError extends Error { - constructor(message: string); -} -export declare class SignalValidationError extends ConnectivityError { - constructor(message: string); -} -export declare class SignalNotFoundError extends ConnectivityError { - constructor(signalId: string); -} diff --git a/packages/harness/src/harness.test.ts b/packages/harness/src/harness.test.ts index ea84955..3c935bf 100644 --- a/packages/harness/src/harness.test.ts +++ b/packages/harness/src/harness.test.ts @@ -1,6 +1,12 @@ import { describe, expect, it, vi } from 'vitest'; -import { createHarness, HarnessConfigError, type HarnessModelOutput, type HarnessTraceEvent } from './index.js'; +import { + createHarness, + createToolEvidenceClarificationHook, + HarnessConfigError, + type HarnessModelOutput, + type HarnessTraceEvent, +} from './index.js'; function createInput() { return { @@ -284,6 +290,221 @@ describe('harness runtime', () => { expect(result.stopReason).toBe('tool_error_unrecoverable'); }); + it('asks for clarification when tool evidence shows empty results', async () => { + const trace: HarnessTraceEvent[] = []; + const steps: HarnessModelOutput[] = [ + { type: 'tool_request', calls: [{ id: 'call-1', name: 'search', input: { query: 'repo' } }] }, + { type: 'final_answer', text: 'should not be reached' }, + ]; + + const harness = createHarness({ + model: { nextStep: vi.fn(async () => steps.shift() as HarnessModelOutput) }, + tools: { + listAvailable: async () => [{ name: 'search', description: 'Search' }], + execute: async () => ({ + callId: 'call-1', + toolName: 'search', + status: 'success', + structuredOutput: { results: [] }, + }), + }, + hooks: { clarifyOnToolResult: createToolEvidenceClarificationHook() }, + trace: { emit: (event) => trace.push(event) }, + clock: createClock([0, 1, 2, 3, 4, 5]), + }); + + const result = await harness.runTurn(createInput()); + + expect(result.outcome).toBe('needs_clarification'); + expect(result.stopReason).toBe('clarification_required'); + expect(result.assistantMessage?.text).toContain('current search query'); + expect(result.continuation?.type).toBe('clarification'); + expect(result.continuation?.state.toolEvidence).toMatchObject({ + callId: 'call-1', + toolName: 'search', + reason: 'empty_results', + }); + expect(result.usage.modelCalls).toBe(1); + expect(trace.map((event) => event.type)).toContain('clarification_requested'); + }); + + it('asks for clarification when tool evidence shows ambiguous identifiers', async () => { + const harness = createHarness({ + model: { + nextStep: async () => ({ + type: 'tool_request', + calls: [{ id: 'call-1', name: 'issueLookup', input: { id: '123' } }], + }), + }, + tools: { + listAvailable: async () => [{ name: 'issueLookup', description: 'Issue lookup' }], + execute: async () => ({ + callId: 'call-1', + toolName: 'issueLookup', + status: 'success', + structuredOutput: { ambiguous: true, candidates: [{ id: 'A-123' }, { id: 'B-123' }] }, + }), + }, + hooks: { clarifyOnToolResult: createToolEvidenceClarificationHook() }, + clock: createClock([0, 1, 2, 3, 4]), + }); + + const result = await harness.runTurn(createInput()); + + expect(result.outcome).toBe('needs_clarification'); + expect(result.assistantMessage?.text).toContain('Which exact identifier'); + expect(result.metadata?.clarification).toMatchObject({ + toolName: 'issueLookup', + reason: 'ambiguous_identifier', + }); + }); + + it('asks for clarification instead of failing on transient provider tool errors', async () => { + const onToolError = vi.fn(); + const harness = createHarness({ + model: { + nextStep: async () => ({ + type: 'tool_request', + calls: [{ id: 'call-1', name: 'providerSearch', input: { query: 'roadmap' } }], + }), + }, + tools: { + listAvailable: async () => [{ name: 'providerSearch', description: 'Provider search' }], + execute: async () => ({ + callId: 'call-1', + toolName: 'providerSearch', + status: 'error', + error: { code: 'provider_timeout', message: 'provider timed out' }, + }), + }, + hooks: { + clarifyOnToolResult: createToolEvidenceClarificationHook(), + onToolError, + }, + clock: createClock([0, 1, 2, 3, 4]), + }); + + const result = await harness.runTurn(createInput()); + + expect(onToolError).toHaveBeenCalledOnce(); + expect(result.outcome).toBe('needs_clarification'); + expect(result.stopReason).toBe('clarification_required'); + expect(result.assistantMessage?.text).toContain('transient provider error'); + }); + + it('does not crash when a tool error omits error.code', async () => { + const harness = createHarness({ + model: { + nextStep: async () => ({ + type: 'tool_request', + calls: [{ id: 'call-1', name: 'providerSearch', input: { query: 'roadmap' } }], + }), + }, + tools: { + listAvailable: async () => [{ name: 'providerSearch', description: 'Provider search' }], + execute: async () => ({ + callId: 'call-1', + toolName: 'providerSearch', + status: 'error', + error: { code: undefined as unknown as string, message: 'provider timed out' }, + }), + }, + hooks: { clarifyOnToolResult: createToolEvidenceClarificationHook() }, + clock: createClock([0, 1, 2, 3, 4]), + }); + + const result = await harness.runTurn(createInput()); + + expect(result.outcome).toBe('failed'); + expect(result.stopReason).toBe('tool_error_unrecoverable'); + }); + + it('does not ask for clarification on successful tools with blank output and no empty-result evidence', async () => { + const steps: HarnessModelOutput[] = [ + { type: 'tool_request', calls: [{ id: 'call-1', name: 'lookup', input: {} }] }, + { type: 'final_answer', text: 'Completed without clarification' }, + ]; + + const harness = createHarness({ + model: { nextStep: async () => steps.shift() as HarnessModelOutput }, + tools: { + listAvailable: async () => [{ name: 'lookup', description: 'Lookup' }], + execute: async () => ({ + callId: 'call-1', + toolName: 'lookup', + status: 'success', + output: ' ', + }), + }, + hooks: { clarifyOnToolResult: createToolEvidenceClarificationHook() }, + clock: createClock([0, 1, 2, 3, 4, 5]), + }); + + const result = await harness.runTurn(createInput()); + + expect(result.outcome).toBe('completed'); + expect(result.stopReason).toBe('answer_finalized'); + expect(result.assistantMessage?.text).toBe('Completed without clarification'); + }); + + it('prefers limit enforcement over clarification when the tool call budget is exhausted', async () => { + const harness = createHarness({ + model: { + nextStep: async () => ({ + type: 'tool_request', + calls: [{ id: 'call-1', name: 'search', input: { query: 'repo' } }], + }), + }, + tools: { + listAvailable: async () => [{ name: 'search', description: 'Search' }], + execute: async () => ({ + callId: 'call-1', + toolName: 'search', + status: 'success', + structuredOutput: { results: [] }, + }), + }, + hooks: { clarifyOnToolResult: createToolEvidenceClarificationHook() }, + limits: { maxToolCalls: 1 }, + clock: createClock([0, 1, 2, 3, 4]), + }); + + const result = await harness.runTurn(createInput()); + + expect(result.outcome).toBe('deferred'); + expect(result.stopReason).toBe('max_tool_calls_reached'); + }); + + it('ignores malformed clarification hook payloads', async () => { + const steps: HarnessModelOutput[] = [ + { type: 'tool_request', calls: [{ id: 'call-1', name: 'lookup', input: {} }] }, + { type: 'final_answer', text: 'Completed without clarification' }, + ]; + + const harness = createHarness({ + model: { nextStep: async () => steps.shift() as HarnessModelOutput }, + tools: { + listAvailable: async () => [{ name: 'lookup', description: 'Lookup' }], + execute: async () => ({ + callId: 'call-1', + toolName: 'lookup', + status: 'success', + structuredOutput: { results: [] }, + }), + }, + hooks: { + clarifyOnToolResult: async () => ({ question: 42 as unknown as string, reason: 'custom' }), + }, + clock: createClock([0, 1, 2, 3, 4, 5]), + }); + + const result = await harness.runTurn(createInput()); + + expect(result.outcome).toBe('completed'); + expect(result.stopReason).toBe('answer_finalized'); + expect(result.assistantMessage?.text).toBe('Completed without clarification'); + }); + it('retries once after invalid model output then succeeds', async () => { const onInvalidModelOutput = vi.fn(); const steps: HarnessModelOutput[] = [ diff --git a/packages/harness/src/harness.ts b/packages/harness/src/harness.ts index c7cb941..e79a758 100644 --- a/packages/harness/src/harness.ts +++ b/packages/harness/src/harness.ts @@ -14,6 +14,7 @@ import type { HarnessResult, HarnessStopReason, HarnessToolCall, + HarnessToolEvidenceClarification, HarnessToolResult, HarnessTraceEvent, HarnessTraceSummary, @@ -331,6 +332,10 @@ async function runTurn(config: NormalizedConfig, input: HarnessTurnInput): Promi state.recentToolResultHashes = []; await emit(config, input, state, { type: 'tool_failed', result }); await config.hooks?.onToolError?.(result, executionState(input, state, startedAt, config)); + finalResult = await maybeClarifyFromToolResult(config, input, state, startedAt, result); + if (finalResult) { + return finalResult; + } if (result.error?.retryable !== true) { finalResult = buildResult(input, state, { outcome: 'failed', @@ -341,6 +346,10 @@ async function runTurn(config: NormalizedConfig, input: HarnessTurnInput): Promi } } else { await emit(config, input, state, { type: 'tool_finished', result }); + finalResult = await maybeClarifyFromToolResult(config, input, state, startedAt, result); + if (finalResult) { + return finalResult; + } // Compute a hash signature of the tool result so we can detect // the model dead-looping on the same tool call. Two safety // guards (codex P1 + P2 review on PR #63): @@ -483,6 +492,73 @@ async function checkLimits( return null; } +async function maybeClarifyFromToolResult( + config: NormalizedConfig, + input: HarnessTurnInput, + state: MutableState, + startedAt: number, + result: HarnessToolResult, +): Promise { + if (!config.hooks?.clarifyOnToolResult) { + return null; + } + + const clarification = await config.hooks.clarifyOnToolResult( + result, + executionState(input, state, startedAt, config), + ); + const question = normalizeClarificationQuestion(clarification); + if (!question) { + return null; + } + + const limitResult = await checkLimits(config, input, state, startedAt); + if (limitResult) { + return limitResult; + } + + state.transcript.push({ + type: 'clarification_request', + iteration: state.iteration, + question, + }); + await emit(config, input, state, { + type: 'clarification_requested', + question, + }); + + const toolEvidence = { + callId: result.callId, + toolName: result.toolName, + status: result.status, + reason: clarification?.reason, + metadata: clarification?.metadata, + }; + return buildResult(input, state, { + outcome: 'needs_clarification', + stopReason: 'clarification_required', + assistantMessage: { text: question }, + continuation: createContinuation(config, input, 'clarification', { + stopReason: 'clarification_required', + question, + toolEvidence, + transcript: summarizeTranscript(state.transcript), + }), + metadata: { clarification: toolEvidence }, + }); +} + +function normalizeClarificationQuestion( + clarification: HarnessToolEvidenceClarification | null | undefined, +): string | null { + if (!clarification || typeof clarification.question !== 'string') { + return null; + } + + const question = clarification.question.trim(); + return question.length > 0 ? question : null; +} + async function buildLimitResult( config: NormalizedConfig, input: HarnessTurnInput, diff --git a/packages/harness/src/index.ts b/packages/harness/src/index.ts index 55e242c..69123fe 100644 --- a/packages/harness/src/index.ts +++ b/packages/harness/src/index.ts @@ -24,6 +24,13 @@ export { WORKSPACE_TOOL_NAMES, } from './tools/workspace-tool-registry.js'; export type { WorkspaceToolRegistryOptions } from './tools/workspace-tool-registry.js'; +export { + createToolEvidenceClarificationHook, + detectToolEvidenceClarification, +} from './tool-evidence-clarification.js'; +export type { + ToolEvidenceClarificationOptions, +} from './tool-evidence-clarification.js'; export { OpenRouterSingleShotAdapter, createOpenRouterSingleShotAdapter } from './router/openrouter-singleshot-adapter.js'; export type { OpenRouterSingleShotAdapterConfig } from './router/openrouter-singleshot-adapter.js'; @@ -79,6 +86,9 @@ export type { HarnessStopReason, HarnessToolAvailabilityInput, HarnessToolCall, + HarnessToolEvidenceClarification, + HarnessToolEvidenceClarificationHook, + HarnessToolEvidenceClarificationReason, HarnessToolDefinition, HarnessToolError, HarnessToolExecutionContext, diff --git a/packages/harness/src/tool-evidence-clarification.ts b/packages/harness/src/tool-evidence-clarification.ts new file mode 100644 index 0000000..bde9864 --- /dev/null +++ b/packages/harness/src/tool-evidence-clarification.ts @@ -0,0 +1,218 @@ +import type { + HarnessToolEvidenceClarification, + HarnessToolEvidenceClarificationHook, + HarnessToolEvidenceClarificationReason, + HarnessToolResult, +} from './types.js'; + +export interface ToolEvidenceClarificationOptions { + emptyResultKeys?: readonly string[]; + questionForReason?: Partial>; +} + +const DEFAULT_EMPTY_RESULT_KEYS = [ + 'results', + 'items', + 'matches', + 'entries', + 'records', + 'data', +]; + +const AMBIGUOUS_RESULT_KEYS = ['candidates', 'possibleMatches', 'ambiguousMatches']; + +const TRANSIENT_ERROR_CODES = new Set([ + 'provider_timeout', + 'timeout', + 'rate_limited', + 'rate_limit', + 'temporarily_unavailable', + 'provider_unavailable', + 'network_error', +]); + +export function createToolEvidenceClarificationHook( + options: ToolEvidenceClarificationOptions = {}, +): HarnessToolEvidenceClarificationHook { + return (result) => detectToolEvidenceClarification(result, options); +} + +export function detectToolEvidenceClarification( + result: HarnessToolResult, + options: ToolEvidenceClarificationOptions = {}, +): HarnessToolEvidenceClarification | null { + const explicit = readExplicitClarification(result); + if (explicit) { + return explicit; + } + + const reason = classifyToolEvidence(result, options); + if (!reason) { + return null; + } + + return { + reason, + question: questionFor(result, reason, options), + metadata: { toolName: result.toolName, callId: result.callId }, + }; +} + +function classifyToolEvidence( + result: HarnessToolResult, + options: ToolEvidenceClarificationOptions, +): HarnessToolEvidenceClarificationReason | null { + if (hasAmbiguousEvidence(result)) { + return 'ambiguous_identifier'; + } + + if (hasEmptyResultEvidence(result, options)) { + return 'empty_results'; + } + + if (hasTransientProviderEvidence(result)) { + return 'transient_provider_error'; + } + + return null; +} + +function hasEmptyResultEvidence( + result: HarnessToolResult, + options: ToolEvidenceClarificationOptions, +): boolean { + if (result.status !== 'success') { + return false; + } + + const structured = result.structuredOutput; + if (structured) { + if (structured.empty === true || structured.resultCount === 0 || structured.total === 0) { + return true; + } + + const keys = options.emptyResultKeys ?? DEFAULT_EMPTY_RESULT_KEYS; + if (keys.some((key) => Array.isArray(structured[key]) && structured[key].length === 0)) { + return true; + } + } + + const output = result.output?.trim(); + if (!output) { + return false; + } + + return /\b(no|zero)\s+(results?|matches?|items?|records?)\b/i.test(output); +} + +function hasAmbiguousEvidence(result: HarnessToolResult): boolean { + const structured = result.structuredOutput; + if (structured) { + if (structured.ambiguous === true) { + return true; + } + + if ( + AMBIGUOUS_RESULT_KEYS.some( + (key) => Array.isArray(structured[key]) && structured[key].length > 1, + ) + ) { + return true; + } + } + + return /\b(ambiguous|multiple\s+(matches?|results?|candidates?)|more than one)\b/i.test( + result.output ?? '', + ); +} + +function hasTransientProviderEvidence(result: HarnessToolResult): boolean { + if (result.status !== 'error') { + return false; + } + + if (result.error?.retryable === true) { + return true; + } + + const code = result.error?.code?.toLowerCase(); + return code ? TRANSIENT_ERROR_CODES.has(code) : false; +} + +function readExplicitClarification( + result: HarnessToolResult, +): HarnessToolEvidenceClarification | null { + return ( + readClarificationValue(result.metadata?.clarification, result) ?? + readClarificationValue(result.metadata?.clarificationHint, result) ?? + readClarificationValue(result.structuredOutput?.clarification, result) ?? + readClarificationValue(result.structuredOutput?.clarificationHint, result) + ); +} + +function readClarificationValue( + value: unknown, + result: HarnessToolResult, +): HarnessToolEvidenceClarification | null { + if (typeof value === 'string') { + const question = value.trim(); + return question ? { question, reason: 'custom', metadata: { toolName: result.toolName } } : null; + } + + if (!isRecord(value)) { + return null; + } + + const question = readString(value.question); + if (!question) { + return null; + } + + return { + question, + reason: readReason(value.reason), + metadata: isRecord(value.metadata) ? value.metadata : { toolName: result.toolName }, + }; +} + +function readString(value: unknown): string | null { + return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; +} + +function readReason(value: unknown): HarnessToolEvidenceClarificationReason { + switch (value) { + case 'empty_results': + case 'ambiguous_identifier': + case 'transient_provider_error': + case 'custom': + return value; + default: + return 'custom'; + } +} + +function questionFor( + result: HarnessToolResult, + reason: HarnessToolEvidenceClarificationReason, + options: ToolEvidenceClarificationOptions, +): string { + const configured = options.questionForReason?.[reason]; + if (configured && configured.trim().length > 0) { + return configured.trim(); + } + + switch (reason) { + case 'ambiguous_identifier': + return `I found multiple possible matches in ${result.toolName}. Which exact identifier should I use?`; + case 'transient_provider_error': + return `The ${result.toolName} lookup hit a transient provider error. What exact identifier or narrower filter should I retry with?`; + case 'empty_results': + return `I could not find a match with the current ${result.toolName} query. What exact identifier, name, or narrower filter should I use?`; + case 'custom': + return `What exact identifier or narrower filter should I use for ${result.toolName}?`; + } +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} diff --git a/packages/harness/src/types.ts b/packages/harness/src/types.ts index a1a10ab..81fa30e 100644 --- a/packages/harness/src/types.ts +++ b/packages/harness/src/types.ts @@ -446,6 +446,7 @@ export interface HarnessLimitReachedEvent extends HarnessBaseTraceEvent { } export interface HarnessHooks { + clarifyOnToolResult?: HarnessToolEvidenceClarificationHook; onInvalidModelOutput?: ( output: HarnessInvalidOutput, state: HarnessExecutionState, @@ -460,6 +461,27 @@ export interface HarnessHooks { ) => Promise | void; } +export type HarnessToolEvidenceClarificationReason = + | 'empty_results' + | 'ambiguous_identifier' + | 'transient_provider_error' + | 'custom'; + +export interface HarnessToolEvidenceClarification { + question: string; + reason: HarnessToolEvidenceClarificationReason; + metadata?: Record; +} + +export type HarnessToolEvidenceClarificationHook = ( + result: HarnessToolResult, + state: HarnessExecutionState, +) => + | Promise + | HarnessToolEvidenceClarification + | null + | undefined; + export interface HarnessExecutionState { assistantId: string; turnId: string;