From 208ef5242360e8158a2611291bfe90eb771e49c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Sj=C3=B8lander=20Ernstsen?= Date: Fri, 12 Jun 2026 12:05:55 +0200 Subject: [PATCH] fix(mcp-chat): loop tool calls until a final answer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit processQuery ran a single round of tool calls, then made a follow-up LLM request WITHOUT tools. When a question needed more than one tool call, the model attempted another call in that tool-less follow-up; OpenAI-compatible servers don't parse tool calls when `tools` is absent, so the model's raw tool-call tokens leaked into the response content (and the follow-up content discarded the real answer). Replace the single round with a capped agentic loop (max 8 iterations) that always passes the tools on every sendMessage call and executes returned tool calls until the model responds with a final text answer, with a graceful fallback if the cap is reached. The per-call execute/ record logic is extracted into a private helper. Adds tests covering the multi-round loop and the iteration cap. Signed-off-by: Johannes Sjølander Ernstsen --- .../.changeset/fix-multi-round-tool-loop.md | 5 + workspaces/mcp-chat/app-config.yaml | 1 + .../mcp-chat/docs/SERVER_CONFIGURATION.md | 9 + .../plugins/mcp-chat-backend/config.d.ts | 8 + .../plugins/mcp-chat-backend/report.api.md | 3 + .../plugins/mcp-chat-backend/src/index.ts | 1 + .../src/services/MCPClientServiceImpl.test.ts | 261 ++++++++++++++++++ .../src/services/MCPClientServiceImpl.ts | 186 ++++++++----- .../plugins/mcp-chat-backend/src/utils.ts | 7 + 9 files changed, 413 insertions(+), 68 deletions(-) create mode 100644 workspaces/mcp-chat/.changeset/fix-multi-round-tool-loop.md diff --git a/workspaces/mcp-chat/.changeset/fix-multi-round-tool-loop.md b/workspaces/mcp-chat/.changeset/fix-multi-round-tool-loop.md new file mode 100644 index 00000000000..062992f1050 --- /dev/null +++ b/workspaces/mcp-chat/.changeset/fix-multi-round-tool-loop.md @@ -0,0 +1,5 @@ +--- +'@backstage-community/plugin-mcp-chat-backend': patch +--- + +Fix multi-step tool calls dropping the second tool call and leaking raw tool-call tokens; processQuery now loops with tools until the model returns a final answer. The loop's iteration cap is configurable via `mcpChat.maxToolIterations` (default 8). diff --git a/workspaces/mcp-chat/app-config.yaml b/workspaces/mcp-chat/app-config.yaml index 130813e2643..bfff039e559 100644 --- a/workspaces/mcp-chat/app-config.yaml +++ b/workspaces/mcp-chat/app-config.yaml @@ -27,6 +27,7 @@ integrations: # --------- MCP Chat Configuration --------- mcpChat: toolCallTimeout: 60000 + maxToolIterations: 8 providers: - id: openai baseUrl: 'https://any-openai-compatible-url/v1' diff --git a/workspaces/mcp-chat/docs/SERVER_CONFIGURATION.md b/workspaces/mcp-chat/docs/SERVER_CONFIGURATION.md index bcc2c29d33d..05448c55430 100644 --- a/workspaces/mcp-chat/docs/SERVER_CONFIGURATION.md +++ b/workspaces/mcp-chat/docs/SERVER_CONFIGURATION.md @@ -177,6 +177,15 @@ mcpChat: toolCallTimeout: 300000 # 5 minutes, in milliseconds ``` +### Max Tool Iterations + +When answering a question, the assistant runs an agentic loop that executes tool calls and feeds the results back to the model until it produces a final answer. This loop is capped at **8 iterations** by default to prevent runaway tool-calling. For questions that legitimately require many sequential tool calls, raise the cap with `maxToolIterations`: + +```yaml +mcpChat: + maxToolIterations: 16 +``` + --- For additional setup instructions and troubleshooting, refer to the [main README](../README.md). diff --git a/workspaces/mcp-chat/plugins/mcp-chat-backend/config.d.ts b/workspaces/mcp-chat/plugins/mcp-chat-backend/config.d.ts index 38439e01e32..2722f19b3d0 100644 --- a/workspaces/mcp-chat/plugins/mcp-chat-backend/config.d.ts +++ b/workspaces/mcp-chat/plugins/mcp-chat-backend/config.d.ts @@ -134,6 +134,14 @@ export interface Config { * @default 60000 */ toolCallTimeout?: number; + /** + * Maximum number of tool-call iterations the agentic loop will run before + * giving up on producing a final answer. Increase this for questions that + * legitimately require many sequential tool calls. + * @visibility backend + * @default 8 + */ + maxToolIterations?: number; /** * Custom system prompt for the AI assistant * @visibility backend diff --git a/workspaces/mcp-chat/plugins/mcp-chat-backend/report.api.md b/workspaces/mcp-chat/plugins/mcp-chat-backend/report.api.md index 2fc028b7994..1bc8fbe651f 100644 --- a/workspaces/mcp-chat/plugins/mcp-chat-backend/report.api.md +++ b/workspaces/mcp-chat/plugins/mcp-chat-backend/report.api.md @@ -122,6 +122,9 @@ export interface ConversationRecord { // @public export function createRouter(options: RouterOptions): Promise; +// @public +export const DEFAULT_MCP_MAX_TOOL_ITERATIONS = 8; + // @public export const DEFAULT_MCP_TOOL_CALL_TIMEOUT_MS = 60000; diff --git a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/index.ts b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/index.ts index d28fc6fef3d..22f79761ca7 100644 --- a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/index.ts +++ b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/index.ts @@ -110,6 +110,7 @@ export { executeToolCall, findNpxPath, DEFAULT_MCP_TOOL_CALL_TIMEOUT_MS, + DEFAULT_MCP_MAX_TOOL_ITERATIONS, } from './utils'; // ============================================================================= diff --git a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.test.ts b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.test.ts index 4cc03edf187..400640c8d5f 100644 --- a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.test.ts +++ b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.test.ts @@ -253,6 +253,58 @@ describe('MCPClientServiceImpl', () => { expect(result.toolResponses[0].serverId).toBe('error'); }); + it('should not throw when a failing tool call has malformed JSON arguments', async () => { + const toolCall: ToolCall = { + id: 'call_bad_json', + type: 'function', + function: { + name: 'failing_tool', + arguments: 'not valid json', + }, + }; + + const initialResponse: ChatResponse = { + choices: [ + { + message: { + role: 'assistant', + content: null, + tool_calls: [toolCall], + }, + }, + ], + }; + + const followUpResponse: ChatResponse = { + choices: [ + { + message: { + role: 'assistant', + content: 'Recovered after the tool error.', + }, + }, + ], + }; + + mockLLMProvider.sendMessage + .mockResolvedValueOnce(initialResponse) + .mockResolvedValueOnce(followUpResponse); + + utils.executeToolCall.mockRejectedValue( + new Error('Tool execution failed'), + ); + + const result = await service.processQuery([ + { role: 'user', content: 'Use the failing tool' }, + ]); + + expect(result.reply).toBe('Recovered after the tool error.'); + expect(result.toolResponses[0].serverId).toBe('error'); + expect(result.toolResponses[0].arguments).toEqual({ + _raw: 'not valid json', + }); + }); + it('should handle LLM provider errors', async () => { mockLLMProvider.sendMessage.mockRejectedValue( new Error('LLM connection failed'), @@ -262,6 +314,215 @@ describe('MCPClientServiceImpl', () => { service.processQuery([{ role: 'user', content: 'Hello' }]), ).rejects.toThrow('LLM connection failed'); }); + + it('should pass tools on every sendMessage call (multi-round loop)', async () => { + const toolCall1: ToolCall = { + id: 'call_1', + type: 'function', + function: { name: 'tool_one', arguments: '{"a": 1}' }, + }; + const toolCall2: ToolCall = { + id: 'call_2', + type: 'function', + function: { name: 'tool_two', arguments: '{"b": 2}' }, + }; + + const firstResponse: ChatResponse = { + choices: [ + { + message: { + role: 'assistant', + content: null, + tool_calls: [toolCall1], + }, + }, + ], + }; + const secondResponse: ChatResponse = { + choices: [ + { + message: { + role: 'assistant', + content: null, + tool_calls: [toolCall2], + }, + }, + ], + }; + const finalResponse: ChatResponse = { + choices: [ + { + message: { + role: 'assistant', + content: 'Both tools executed successfully.', + }, + }, + ], + }; + + mockLLMProvider.sendMessage + .mockResolvedValueOnce(firstResponse) + .mockResolvedValueOnce(secondResponse) + .mockResolvedValueOnce(finalResponse); + + const result = await service.processQuery([ + { role: 'user', content: 'Use both tools' }, + ]); + + expect(result.reply).toBe('Both tools executed successfully.'); + expect(result.toolCalls).toHaveLength(2); + expect(result.toolResponses).toHaveLength(2); + + // All three sendMessage calls must receive tools (the key fix) + expect(mockLLMProvider.sendMessage).toHaveBeenCalledTimes(3); + for (const call of mockLLMProvider.sendMessage.mock.calls) { + expect(call[1]).toBeDefined(); + expect(Array.isArray(call[1])).toBe(true); + } + + expect(utils.executeToolCall).toHaveBeenCalledTimes(2); + }); + + it('should record one assistant message with all tool calls followed by tool messages', async () => { + const toolCall1: ToolCall = { + id: 'call_a', + type: 'function', + function: { name: 'tool_a', arguments: '{"x": 1}' }, + }; + const toolCall2: ToolCall = { + id: 'call_b', + type: 'function', + function: { name: 'tool_b', arguments: '{"y": 2}' }, + }; + + const firstResponse: ChatResponse = { + choices: [ + { + message: { + role: 'assistant', + content: null, + tool_calls: [toolCall1, toolCall2], + }, + }, + ], + }; + const finalResponse: ChatResponse = { + choices: [ + { + message: { + role: 'assistant', + content: 'Done.', + }, + }, + ], + }; + + mockLLMProvider.sendMessage + .mockResolvedValueOnce(firstResponse) + .mockResolvedValueOnce(finalResponse); + + utils.executeToolCall.mockImplementation((toolCall: ToolCall) => + Promise.resolve({ + id: toolCall.id, + name: toolCall.function.name, + arguments: {}, + result: `result for ${toolCall.id}`, + serverId: 'test-server', + }), + ); + + await service.processQuery([{ role: 'user', content: 'Use both tools' }]); + + // Inspect the conversation handed to the second (final) LLM round. + const secondRoundMessages = mockLLMProvider.sendMessage.mock.calls[1][0]; + const appended = secondRoundMessages.slice(-3); + + expect(appended[0]).toEqual({ + role: 'assistant', + content: null, + tool_calls: [toolCall1, toolCall2], + }); + expect(appended[1]).toEqual({ + role: 'tool', + content: 'result for call_a', + tool_call_id: 'call_a', + }); + expect(appended[2]).toEqual({ + role: 'tool', + content: 'result for call_b', + tool_call_id: 'call_b', + }); + }); + + it('should return fallback message when max tool iterations reached', async () => { + const toolCall: ToolCall = { + id: 'call_loop', + type: 'function', + function: { name: 'infinite_tool', arguments: '{}' }, + }; + const toolResponse: ChatResponse = { + choices: [ + { + message: { + role: 'assistant', + content: null, + tool_calls: [toolCall], + }, + }, + ], + }; + + // Always return a tool call so the loop never terminates naturally + mockLLMProvider.sendMessage.mockResolvedValue(toolResponse); + + const result = await service.processQuery([ + { role: 'user', content: 'Loop forever' }, + ]); + + expect(result.reply).toContain( + "couldn't compile a final answer within the allowed number of steps", + ); + expect(mockLLMProvider.sendMessage).toHaveBeenCalledTimes(8); + }); + + it('should honor a configured maxToolIterations cap', async () => { + mockConfig.getOptionalNumber.mockImplementation((key: string) => + key === 'mcpChat.maxToolIterations' ? 3 : undefined, + ); + service = new MCPClientServiceImpl({ + logger: mockLogger, + config: mockConfig, + }); + + const toolCall: ToolCall = { + id: 'call_loop', + type: 'function', + function: { name: 'infinite_tool', arguments: '{}' }, + }; + const toolResponse: ChatResponse = { + choices: [ + { + message: { + role: 'assistant', + content: null, + tool_calls: [toolCall], + }, + }, + ], + }; + + // Always return a tool call so the loop never terminates naturally + mockLLMProvider.sendMessage.mockResolvedValue(toolResponse); + + const result = await service.processQuery([ + { role: 'user', content: 'Loop forever' }, + ]); + + expect(result.reply).toContain( + "couldn't compile a final answer within the allowed number of steps", + ); + expect(mockLLMProvider.sendMessage).toHaveBeenCalledTimes(3); + }); }); describe('Status Reporting', () => { diff --git a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.ts b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.ts index e66c50619e2..c70cebee1aa 100644 --- a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.ts +++ b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.ts @@ -31,6 +31,7 @@ import { findNpxPath, loadServerConfigs, DEFAULT_MCP_TOOL_CALL_TIMEOUT_MS, + DEFAULT_MCP_MAX_TOOL_ITERATIONS, } from '../utils'; import { LLMProvider } from '../providers/base-provider'; import { OpenAIResponsesProvider } from '../providers/openai-responses-provider'; @@ -46,6 +47,8 @@ import { MCPServerType, MCPServerFullConfig, ResponsesApiMcpCall, + ToolCall, + ToolExecutionResult, } from '../types'; /** @@ -76,6 +79,7 @@ export class MCPClientServiceImpl implements MCPClientService { private serverConfigs: MCPServerFullConfig[] = []; private allowedToolsByServer: Map = new Map(); private readonly toolCallTimeout: number; + private readonly maxToolIterations: number; constructor(options: Options) { this.logger = options.logger; @@ -83,6 +87,9 @@ export class MCPClientServiceImpl implements MCPClientService { this.toolCallTimeout = this.config.getOptionalNumber('mcpChat.toolCallTimeout') ?? DEFAULT_MCP_TOOL_CALL_TIMEOUT_MS; + this.maxToolIterations = + this.config.getOptionalNumber('mcpChat.maxToolIterations') ?? + DEFAULT_MCP_MAX_TOOL_ITERATIONS; this.llmProvider = this.initializeLLMProvider(); this.mcpServers = this.initializeMCPServers(); this.systemPrompt = @@ -520,85 +527,128 @@ export class MCPClientServiceImpl implements MCPClientService { // Remove serverId from tools when sending to LLM const llmTools: Tool[] = filteredTools.map(({ serverId, ...tool }) => tool); - const response = await this.llmProvider.sendMessage(messages, llmTools); - const replyMessage = response.choices[0].message; - this.logger.info( - `LLM response received with ${ - replyMessage.tool_calls?.length || 0 - } tool calls`, - ); - const toolCalls = replyMessage.tool_calls || []; + const maxToolIterations = this.maxToolIterations; + const toolCalls: ToolCall[] = []; + const toolResponses: ToolExecutionResult[] = []; + let reply = ''; + let completed = false; - if (toolCalls.length > 0) { - const toolResponses = []; + for (let iteration = 0; iteration < maxToolIterations; iteration++) { + const response = await this.llmProvider.sendMessage(messages, llmTools); + const replyMessage = response.choices[0].message; + const iterationToolCalls = replyMessage.tool_calls || []; + this.logger.info( + `LLM response (iteration ${ + iteration + 1 + }/${maxToolIterations}) received with ${ + iterationToolCalls.length + } tool calls`, + ); - for (const toolCall of toolCalls) { - try { - const toolResponse = await executeToolCall( - toolCall, - this.tools, - this.mcpClients, - this.toolCallTimeout, - ); - toolResponses.push(toolResponse); + if (iterationToolCalls.length === 0) { + reply = replyMessage.content || ''; + completed = true; + break; + } - messages.push({ - role: 'assistant', - content: null, - tool_calls: [toolCall], - }); + toolCalls.push(...iterationToolCalls); - messages.push({ - role: 'tool', - content: toolResponse.result, - tool_call_id: toolCall.id, - }); - } catch (error) { - const errorMessage = `Error executing tool '${ - toolCall.function.name - }': ${error instanceof Error ? error.message : error}`; - - this.logger.warn(errorMessage); - - // Still add the tool call and error response to maintain conversation flow - const errorResponse = { - id: toolCall.id, - name: toolCall.function.name, - arguments: JSON.parse(toolCall.function.arguments || '{}'), - result: errorMessage, - serverId: 'error', - }; - - toolResponses.push(errorResponse); - - messages.push({ - role: 'assistant', - content: null, - tool_calls: [toolCall], - }); + // Per the OpenAI tool-calling protocol, a single assistant message holds + // all tool calls from this iteration, followed by one tool message per + // call. Keeping them in one message keeps the history valid for the + // next round. + messages.push({ + role: 'assistant', + content: replyMessage.content ?? null, + tool_calls: iterationToolCalls, + }); - messages.push({ - role: 'tool', - content: errorMessage, - tool_call_id: toolCall.id, - }); - } + for (const toolCall of iterationToolCalls) { + await this.executeAndRecordToolResult( + toolCall, + messages, + toolResponses, + ); } + } - const followUp = await this.llmProvider.sendMessage(messages); + if (!completed) { + this.logger.warn( + `Reached max tool iterations (${maxToolIterations}) without a final answer`, + ); + reply = + "I gathered information using several tools but couldn't compile a final answer within the allowed number of steps. Please try rephrasing or narrowing your question."; + } - return { - reply: followUp.choices[0].message.content || '', - toolCalls, - toolResponses, + return { reply, toolCalls, toolResponses }; + } + + /** + * Executes a single tool call and appends its result (or error) as a `tool` + * message to the conversation. The assistant message carrying the tool calls + * is pushed once per iteration by the caller, so this only records the + * result, keeping the message history valid for the next LLM round. + */ + private async executeAndRecordToolResult( + toolCall: ToolCall, + messages: ChatMessage[], + toolResponses: ToolExecutionResult[], + ): Promise { + try { + const toolResponse = await executeToolCall( + toolCall, + this.tools, + this.mcpClients, + this.toolCallTimeout, + ); + toolResponses.push(toolResponse); + + messages.push({ + role: 'tool', + content: toolResponse.result, + tool_call_id: toolCall.id, + }); + } catch (error) { + const errorMessage = `Error executing tool '${toolCall.function.name}': ${ + error instanceof Error ? error.message : error + }`; + + this.logger.warn(errorMessage); + + // Still record the error response to maintain conversation flow + const errorResponse: ToolExecutionResult = { + id: toolCall.id, + name: toolCall.function.name, + arguments: this.safeParseArguments(toolCall.function.arguments), + result: errorMessage, + serverId: 'error', }; + + toolResponses.push(errorResponse); + + messages.push({ + role: 'tool', + content: errorMessage, + tool_call_id: toolCall.id, + }); } + } - return { - reply: replyMessage.content || '', - toolCalls: [], - toolResponses: [], - }; + /** + * Parses a tool call's JSON-encoded arguments without throwing. The model + * can emit malformed JSON; since this is used while recording a tool error, + * a parse failure must not escape and break the "record and continue" flow. + * Falls back to the raw string (or `{}` when absent) on failure. + */ + private safeParseArguments(rawArguments: string): Record { + if (!rawArguments) { + return {}; + } + try { + return JSON.parse(rawArguments); + } catch { + return { _raw: rawArguments }; + } } /** diff --git a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/utils.ts b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/utils.ts index 94d1ff158ef..e55ca3702f2 100644 --- a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/utils.ts +++ b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/utils.ts @@ -34,6 +34,13 @@ import { RootConfigService } from '@backstage/backend-plugin-api'; */ export const DEFAULT_MCP_TOOL_CALL_TIMEOUT_MS = 60000; +/** + * Default maximum number of tool-call iterations the agentic loop will run + * before giving up on producing a final answer. + * @public + */ +export const DEFAULT_MCP_MAX_TOOL_ITERATIONS = 8; + /** * Loads MCP server configurations from Backstage config. * Reads from the `mcpChat.mcpServers` configuration section.