diff --git a/.changeset/tool-call-error-terminal.md b/.changeset/tool-call-error-terminal.md new file mode 100644 index 000000000..fa07a07f8 --- /dev/null +++ b/.changeset/tool-call-error-terminal.md @@ -0,0 +1,17 @@ +--- +'@tanstack/ai-event-client': minor +'@tanstack/ai-client': minor +'@tanstack/ai': minor +--- + +Add an `'error'` terminal to `ToolCallState`. When a tool execution produces an output error, the StreamProcessor now transitions the `tool-call` part to `state: 'error'` instead of parking it at `'input-complete'`. + +Previously an errored tool call left the tool-call part at `'input-complete'` forever, so UIs that render lifecycle from the part's `state` could not distinguish "still executing" from "failed" without reverse-engineering the error-shaped `output` or the sibling `tool-result` part. The new terminal makes the tool-call state machine self-describing and symmetric with `ToolResultState` (which already has `'error'`): + +```ts +if (part.type === 'tool-call' && part.state === 'error') { + // render failure — no more inferring from output shape +} +``` + +The completion safety net (`RUN_FINISHED` / stream finalization) no longer downgrades a failed tool call back to `'input-complete'`, including when an `output-error` result arrives before `TOOL_CALL_END`. diff --git a/packages/ai-client/src/chat-client.ts b/packages/ai-client/src/chat-client.ts index 6ec7ce563..468dc7618 100644 --- a/packages/ai-client/src/chat-client.ts +++ b/packages/ai-client/src/chat-client.ts @@ -1218,11 +1218,19 @@ export class ChatClient< context, ) - // Add result via processor + // Add result via processor. `result.state` is the authoritative error + // signal; `addToolResult` infers error-ness from the error message being + // truthy. Pass an error message ONLY for output-error results (falling back + // to a default so an empty message like `throw new Error()` still reaches + // the terminal 'error' state), and `undefined` otherwise — so error + // signalling derives solely from `result.state`, never from a stray + // `result.errorText` on a successful result. this.processor.addToolResult( result.toolCallId, result.output, - result.errorText, + result.state === 'output-error' + ? result.errorText || 'Tool execution failed' + : undefined, ) // If stream is in progress, queue continuation check for after it ends diff --git a/packages/ai-client/src/types.ts b/packages/ai-client/src/types.ts index 5127a33de..fa00811d7 100644 --- a/packages/ai-client/src/types.ts +++ b/packages/ai-client/src/types.ts @@ -90,6 +90,7 @@ export type ToolCallState = | 'approval-requested' // Waiting for user approval | 'approval-responded' // User has approved/denied | 'complete' // Result is complete + | 'error' // Tool execution failed (terminal) /** * Tool result states - track the lifecycle of a tool result diff --git a/packages/ai-client/tests/chat-client-context.test.ts b/packages/ai-client/tests/chat-client-context.test.ts index d48dba51f..95dfe5a7b 100644 --- a/packages/ai-client/tests/chat-client-context.test.ts +++ b/packages/ai-client/tests/chat-client-context.test.ts @@ -309,7 +309,7 @@ describe('ChatClient runtime context', () => { expect( findToolCallPart(client, 'tc-invalid-executable-output'), ).toMatchObject({ - state: 'input-complete', + state: 'error', output: { error: expect.stringContaining('expected object'), }, @@ -321,6 +321,54 @@ describe('ChatClient runtime context', () => { }) }) + it('renders a client tool that throws an empty-message error as terminal "error" (issue #718)', async () => { + const firstChunks = createToolCallChunks([ + { + id: 'tc-empty-error', + name: 'throwing_tool', + arguments: '{}', + }, + ]) + const secondChunks = createTextChunks('done', 'msg-empty-error') + let callIndex = 0 + + const adapter: ConnectConnectionAdapter = { + async *connect(_messages, _data, abortSignal) { + const chunks = callIndex === 0 ? firstChunks : secondChunks + callIndex++ + for (const chunk of chunks) { + if (abortSignal?.aborted) { + return + } + yield chunk + } + }, + } + + const tool = toolDefinition({ + name: 'throwing_tool', + description: 'Throws an error with no message', + }).client(() => { + // Empty message — error-ness must come from the output-error state, not + // from the truthiness of the message string. + throw new Error() + }) + + const client = new ChatClient({ + connection: adapter, + tools: [tool], + }) + + await client.sendMessage('call throwing tool') + + expect(findToolCallPart(client, 'tc-empty-error')).toMatchObject({ + state: 'error', + }) + expect(findToolResultPart(client, 'tc-empty-error')).toMatchObject({ + state: 'error', + }) + }) + it('validates manual client tool results against outputSchema', async () => { const tool = toolDefinition({ name: 'manual_invalid_output_tool', @@ -359,7 +407,7 @@ describe('ChatClient runtime context', () => { }) expect(findToolCallPart(client, 'tc-manual-invalid-output')).toMatchObject({ - state: 'input-complete', + state: 'error', output: { error: expect.stringContaining('expected number'), }, diff --git a/packages/ai-event-client/src/index.ts b/packages/ai-event-client/src/index.ts index 1af14ca87..ede5544ee 100644 --- a/packages/ai-event-client/src/index.ts +++ b/packages/ai-event-client/src/index.ts @@ -254,6 +254,7 @@ export type ToolCallState = | 'approval-requested' // Waiting for user approval | 'approval-responded' // User has approved/denied | 'complete' // Result is complete + | 'error' // Tool execution failed (terminal) /** * Tool result states - track the lifecycle of a tool result diff --git a/packages/ai/docs/chat-architecture.md b/packages/ai/docs/chat-architecture.md index 687a0b50b..e23552fb3 100644 --- a/packages/ai/docs/chat-architecture.md +++ b/packages/ai/docs/chat-architecture.md @@ -354,11 +354,12 @@ Signals that the tool call's **input arguments** are finalized. ### With `result` (from TextEngine after execution) Signals that the tool has been **executed** and the result is available. -- Still transitions state to `input-complete` (if not already). +- Transitions the input state to `input-complete` (if not already). - Creates/updates two things: 1. `updateToolCallWithOutput()` -- Sets `output` on the tool-call part (for UI rendering consistency). 2. `updateToolResultPart()` -- Creates a `tool-result` part (for LLM conversation history). - The `result` field is a JSON string. +- **On an `output-error` result**, the tool-call part transitions to the terminal `error` state (symmetric with the `tool-result` part's `error` state), so UIs can render failure from `part.state` without inspecting the error-shaped `output`. The completion safety net never downgrades an `error` part back to `input-complete`. ### This distinction is critical diff --git a/packages/ai/src/activities/chat/messages.ts b/packages/ai/src/activities/chat/messages.ts index cd0108fbb..8330b24fa 100644 --- a/packages/ai/src/activities/chat/messages.ts +++ b/packages/ai/src/activities/chat/messages.ts @@ -211,6 +211,7 @@ function isToolCallIncluded(part: ToolCallPart): boolean { part.state === 'input-complete' || part.state === 'complete' || part.state === 'approval-responded' || + part.state === 'error' || part.output !== undefined ) } diff --git a/packages/ai/src/activities/chat/stream/message-updaters.ts b/packages/ai/src/activities/chat/stream/message-updaters.ts index 9cde9ffb5..ccb180d84 100644 --- a/packages/ai/src/activities/chat/stream/message-updaters.ts +++ b/packages/ai/src/activities/chat/stream/message-updaters.ts @@ -226,7 +226,7 @@ export function updateToolCallWithOutput( parts[index] = { ...toolCallPart, output: errorText ? { error: errorText } : output, - state: state ?? (errorText ? 'input-complete' : 'complete'), + state: state ?? (errorText ? 'error' : 'complete'), } } diff --git a/packages/ai/src/activities/chat/stream/processor.ts b/packages/ai/src/activities/chat/stream/processor.ts index 78c93fdb0..46535b88b 100644 --- a/packages/ai/src/activities/chat/stream/processor.ts +++ b/packages/ai/src/activities/chat/stream/processor.ts @@ -317,7 +317,7 @@ export class StreamProcessor { this.messages, toolCallId, output, - error ? 'input-complete' : undefined, + error ? 'error' : undefined, error, ) @@ -1184,7 +1184,7 @@ export class StreamProcessor { this.messages, chunk.toolCallId, output, - chunk.state === 'output-error' ? 'input-complete' : undefined, + chunk.state === 'output-error' ? 'error' : undefined, ) // Step 2: Create/update the tool-result part (for LLM conversation history) @@ -1240,7 +1240,7 @@ export class StreamProcessor { this.messages, chunk.toolCallId, output, - chunk.state === 'output-error' ? 'input-complete' : undefined, + chunk.state === 'output-error' ? 'error' : undefined, ) // Step 2: Create/update the tool-result part @@ -1690,11 +1690,22 @@ export class StreamProcessor { _index: number, toolCall: InternalToolCallState, ): void { + // Finalize the internal bookkeeping: the call's input arguments ARE + // complete regardless of whether execution later failed, so the call still + // counts as a completed tool call in getCompletedToolCalls()/getState(). toolCall.state = 'input-complete' // Try final parse toolCall.parsedArguments = this.jsonParser.parse(toolCall.arguments) + // Don't downgrade the rendered part of a call that already reached the + // terminal 'error' state (e.g. an output-error TOOL_CALL_RESULT arrived + // without a preceding TOOL_CALL_END). The RUN_FINISHED / finalizeStream + // safety net must not clobber a failed call back to 'input-complete'. + if (this.isToolCallPartErrored(toolCall.id)) { + return + } + // Update UIMessage this.messages = updateToolCallPart(this.messages, messageId, { id: toolCall.id, @@ -1714,6 +1725,22 @@ export class StreamProcessor { ) } + /** + * Whether the rendered tool-call part for the given id has reached the + * terminal 'error' state. Used to prevent the completion safety net from + * downgrading a failed call back to 'input-complete'. + */ + private isToolCallPartErrored(toolCallId: string): boolean { + return this.messages.some((msg) => + msg.parts.some( + (part) => + part.type === 'tool-call' && + part.id === toolCallId && + part.state === 'error', + ), + ) + } + /** * Emit pending text update for a specific message. * diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts index 6034e83ca..798b381b4 100644 --- a/packages/ai/src/types.ts +++ b/packages/ai/src/types.ts @@ -51,6 +51,7 @@ export type ToolCallState = | 'approval-requested' // Waiting for user approval | 'approval-responded' // User has approved/denied | 'complete' // Result is complete + | 'error' // Tool execution failed (terminal) /** * Tool result states - track the lifecycle of a tool result diff --git a/packages/ai/tests/message-updaters.test.ts b/packages/ai/tests/message-updaters.test.ts index 8f85fabae..070eefa2d 100644 --- a/packages/ai/tests/message-updaters.test.ts +++ b/packages/ai/tests/message-updaters.test.ts @@ -629,7 +629,9 @@ describe('message-updaters', () => { const part = result[0]?.parts[0] as ToolCallPart | undefined expect(part?.output).toEqual({ error: 'Tool execution failed' }) - expect(part?.state).toBe('input-complete') + // An error output drives the tool-call part to the terminal 'error' + // state (issue #718). + expect(part?.state).toBe('error') }) it('should search across all messages', () => { diff --git a/packages/ai/tests/stream-processor.test.ts b/packages/ai/tests/stream-processor.test.ts index bc6580520..c690fc604 100644 --- a/packages/ai/tests/stream-processor.test.ts +++ b/packages/ai/tests/stream-processor.test.ts @@ -1022,7 +1022,7 @@ describe('StreamProcessor', () => { .getMessages()[0]! .parts.find((p) => p.type === 'tool-call') as ToolCallPart expect((toolCallPart as any).output).toEqual({ error: 'Network error' }) - expect(toolCallPart.state).toBe('input-complete') + expect(toolCallPart.state).toBe('error') const toolResultPart = processor .getMessages()[0]! @@ -3392,7 +3392,9 @@ describe('StreamProcessor', () => { (p) => p.type === 'tool-call', ) as ToolCallPart expect(toolCallPart.output).toEqual({ error: 'boom' }) - expect(toolCallPart.state).toBe('input-complete') + // The tool-call part reaches the terminal 'error' state, symmetric with + // its sibling tool-result part (see issue #718). + expect(toolCallPart.state).toBe('error') const toolResultPart = messages[0]?.parts.find( (p) => p.type === 'tool-result', @@ -3400,6 +3402,41 @@ describe('StreamProcessor', () => { expect(toolResultPart.state).toBe('error') expect(toolResultPart.error).toBe('boom') }) + + it('keeps the tool-call part terminal at "error" through RUN_FINISHED even when output-error arrives before TOOL_CALL_END', () => { + const processor = new StreamProcessor() + + processor.processChunk(ev.runStarted()) + processor.processChunk(ev.textStart()) + processor.processChunk(ev.toolStart('tc-1', 'get_weather')) + // output-error result arrives WITHOUT a preceding TOOL_CALL_END + processor.processChunk( + chunk(EventType.TOOL_CALL_RESULT, { + messageId: 'tool-result-1', + toolCallId: 'tc-1', + content: '{"error":"boom"}', + role: 'tool', + state: 'output-error', + }), + ) + // RUN_FINISHED runs the completeAllToolCalls safety net + processor.processChunk(ev.runFinished()) + + const messages = processor.getMessages() + const toolCallPart = messages[0]?.parts.find( + (p) => p.type === 'tool-call', + ) as ToolCallPart + // Safety net must NOT downgrade the rendered failed call back to + // 'input-complete' + expect(toolCallPart.state).toBe('error') + + // ...but the internal bookkeeping must still finalize the call so it is + // surfaced by getCompletedToolCalls()/getState() — consistent with the + // END-first ordering and unchanged from before the #718 fix. + expect(processor.getState().toolCalls.get('tc-1')?.state).toBe( + 'input-complete', + ) + }) }) describe('Structured output parts', () => { diff --git a/testing/e2e/tests/tool-error.spec.ts b/testing/e2e/tests/tool-error.spec.ts index 8fb1a3065..f7ce11136 100644 --- a/testing/e2e/tests/tool-error.spec.ts +++ b/testing/e2e/tests/tool-error.spec.ts @@ -29,5 +29,9 @@ test.describe('Tool Error Handling', () => { (tc: { name: string }) => tc.name === 'failing_tool', ) expect(failingCall).toBeDefined() + // The failed tool-call part reaches the terminal 'error' state, so UIs can + // distinguish "failed" from "still executing" without reverse-engineering + // the output shape (issue #718). + expect(failingCall?.state).toBe('error') }) })