From f7f69a46e728548d8caed0f6ab5b8c462befeb7d Mon Sep 17 00:00:00 2001 From: ktwu01 Date: Sun, 14 Jun 2026 16:09:45 -0500 Subject: [PATCH] fix: recover incomplete resumed tool calls --- .changeset/recover-incomplete-tool-results.md | 6 ++ .../agent-core/src/agent/context/index.ts | 24 +++++ .../agent-core/src/agent/records/index.ts | 3 + packages/agent-core/test/agent/resume.test.ts | 92 +++++++++++++++++++ 4 files changed, 125 insertions(+) create mode 100644 .changeset/recover-incomplete-tool-results.md diff --git a/.changeset/recover-incomplete-tool-results.md b/.changeset/recover-incomplete-tool-results.md new file mode 100644 index 000000000..beaeda180 --- /dev/null +++ b/.changeset/recover-incomplete-tool-results.md @@ -0,0 +1,6 @@ +--- +"@moonshot-ai/agent-core": patch +"@moonshot-ai/kimi-code": patch +--- + +Recover resumed sessions whose last tool call was missing its recorded result after a crash. diff --git a/packages/agent-core/src/agent/context/index.ts b/packages/agent-core/src/agent/context/index.ts index 4746eeb36..02d8777d9 100644 --- a/packages/agent-core/src/agent/context/index.ts +++ b/packages/agent-core/src/agent/context/index.ts @@ -20,6 +20,9 @@ const TOOL_EMPTY_STATUS = 'Tool output is empty.'; const TOOL_EMPTY_ERROR_STATUS = 'ERROR: Tool execution failed. Tool output is empty.'; const TOOL_OUTPUT_EMPTY_TEXT = 'Tool output is empty.'; +const TOOL_RESULT_MISSING_AFTER_RESUME = + 'Tool result missing because the previous process exited before it was recorded. ' + + 'Treat this tool call as interrupted and continue from the next user instruction.'; export class ContextMemory { private _history: ContextMessage[] = []; @@ -205,6 +208,27 @@ export class ContextMemory { return this.project(this.history); } + recoverIncompleteToolResultsAfterRestore(): boolean { + const missingToolCallIds = [...this.pendingToolResultIds]; + this.openSteps.clear(); + if (missingToolCallIds.length === 0) return false; + + // Hard crashes can persist tool.call records before their matching + // tool.result records. Repair the transcript before the next prompt. + for (const toolCallId of missingToolCallIds) { + this.appendLoopEvent({ + type: 'tool.result', + parentUuid: toolCallId, + toolCallId, + result: { + isError: true, + output: TOOL_RESULT_MISSING_AFTER_RESUME, + }, + }); + } + return true; + } + useProjectedHistoryFrom(source: ContextMemory): void { this.clear(); this.pushHistory(...trimTrailingOpenToolExchange(source.project(source.history))); diff --git a/packages/agent-core/src/agent/records/index.ts b/packages/agent-core/src/agent/records/index.ts index 8bf050398..27af6da88 100644 --- a/packages/agent-core/src/agent/records/index.ts +++ b/packages/agent-core/src/agent/records/index.ts @@ -212,6 +212,9 @@ export class AgentRecords { this.persistence.rewrite(replayedRecords); await this.persistence.flush(); } + if (this.agent.context.recoverIncompleteToolResultsAfterRestore()) { + await this.persistence.flush(); + } if (this.agent.blobStore !== undefined) { for (const msg of this.agent.context.history) { await this.agent.blobStore.rehydrateParts(msg.content); diff --git a/packages/agent-core/test/agent/resume.test.ts b/packages/agent-core/test/agent/resume.test.ts index fbe340f80..5667a7491 100644 --- a/packages/agent-core/test/agent/resume.test.ts +++ b/packages/agent-core/test/agent/resume.test.ts @@ -408,6 +408,98 @@ describe('Agent resume', () => { ); }); + it('repairs restored tool calls that were missing results after a crash', async () => { + const persistence = new RecordingAgentPersistence([ + { + type: 'config.update', + cwd: process.cwd(), + modelAlias: MOCK_PROVIDER.model, + systemPrompt: DEFAULT_TEST_SYSTEM_PROMPT, + thinkingLevel: 'off', + }, + { + type: 'context.append_message', + message: { + role: 'user', + content: [{ type: 'text', text: 'Historical prompt before crash' }], + toolCalls: [], + origin: { kind: 'user' }, + }, + }, + { + type: 'context.append_loop_event', + event: { + type: 'step.begin', + uuid: 'crashed-step', + turnId: '0', + step: 1, + }, + }, + { + type: 'context.append_loop_event', + event: { + type: 'content.part', + uuid: 'crashed-text', + turnId: '0', + step: 1, + stepUuid: 'crashed-step', + part: { type: 'text', text: 'I will inspect the workspace.' }, + }, + }, + { + type: 'context.append_loop_event', + event: { + type: 'tool.call', + uuid: 'crashed-call', + turnId: '0', + step: 1, + stepUuid: 'crashed-step', + toolCallId: 'call_crashed_bash', + name: 'Bash', + args: { command: 'pwd' }, + }, + }, + ]); + const ctx = testAgent({ persistence }); + + await ctx.agent.resume(); + + expect(persistence.appended).toContainEqual( + expect.objectContaining({ + type: 'context.append_loop_event', + event: expect.objectContaining({ + type: 'tool.result', + toolCallId: 'call_crashed_bash', + result: expect.objectContaining({ + isError: true, + output: expect.stringContaining('previous process exited before it was recorded'), + }), + }), + }), + ); + expect(ctx.agent.context.messages.map((message) => message.role)).toEqual([ + 'user', + 'assistant', + 'tool', + ]); + + ctx.mockNextResponse({ type: 'text', text: 'Recovered after crash.' }); + await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Continue after crash' }] }); + await ctx.untilTurnEnd(); + + expect(ctx.llmInputs()).toMatchInlineSnapshot(` + call 1: + system: + tools: [] + messages: + user: text "Historical prompt before crash" + assistant: text "I will inspect the workspace." calls call_crashed_bash:Bash { "command": "pwd" } + tool[call_crashed_bash]: text "ERROR: Tool execution failed.\\nTool result missing because the previous process exited before it was recorded. Treat this tool call as interrupted and continue from the next user instruction." + user: text "Continue after crash" + `); + await ctx.expectResumeMatches(); + }); + it('rebuilds goal completion replay cards without adding model-visible context', async () => { const persistence = new RecordingAgentPersistence([ {