From 208ef5242360e8158a2611291bfe90eb771e49c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Sj=C3=B8lander=20Ernstsen?= <jsje@ramboll.com>
Date: Fri, 12 Jun 2026 12:05:55 +0200
Subject: [PATCH] fix(mcp-chat): loop tool calls until a final answer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

processQuery ran a single round of tool calls, then made a follow-up
LLM request WITHOUT tools. When a question needed more than one tool
call, the model attempted another call in that tool-less follow-up;
OpenAI-compatible servers don't parse tool calls when `tools` is
absent, so the model's raw tool-call tokens leaked into the response
content (and the follow-up content discarded the real answer).

Replace the single round with a capped agentic loop (max 8 iterations)
that always passes the tools on every sendMessage call and executes
returned tool calls until the model responds with a final text answer,
with a graceful fallback if the cap is reached. The per-call execute/
record logic is extracted into a private helper.

Adds tests covering the multi-round loop and the iteration cap.

Signed-off-by: Johannes Sjølander Ernstsen <jsje@ramboll.com>
---
 .../.changeset/fix-multi-round-tool-loop.md   |   5 +
 workspaces/mcp-chat/app-config.yaml           |   1 +
 .../mcp-chat/docs/SERVER_CONFIGURATION.md     |   9 +
 .../plugins/mcp-chat-backend/config.d.ts      |   8 +
 .../plugins/mcp-chat-backend/report.api.md    |   3 +
 .../plugins/mcp-chat-backend/src/index.ts     |   1 +
 .../src/services/MCPClientServiceImpl.test.ts | 261 ++++++++++++++++++
 .../src/services/MCPClientServiceImpl.ts      | 186 ++++++++-----
 .../plugins/mcp-chat-backend/src/utils.ts     |   7 +
 9 files changed, 413 insertions(+), 68 deletions(-)
 create mode 100644 workspaces/mcp-chat/.changeset/fix-multi-round-tool-loop.md

diff --git a/workspaces/mcp-chat/.changeset/fix-multi-round-tool-loop.md b/workspaces/mcp-chat/.changeset/fix-multi-round-tool-loop.md
new file mode 100644
index 00000000000..062992f1050
--- /dev/null
+++ b/workspaces/mcp-chat/.changeset/fix-multi-round-tool-loop.md
@@ -0,0 +1,5 @@
+---
+'@backstage-community/plugin-mcp-chat-backend': patch
+---
+
+Fix multi-step tool calls dropping the second tool call and leaking raw tool-call tokens; processQuery now loops with tools until the model returns a final answer. The loop's iteration cap is configurable via `mcpChat.maxToolIterations` (default 8).
diff --git a/workspaces/mcp-chat/app-config.yaml b/workspaces/mcp-chat/app-config.yaml
index 130813e2643..bfff039e559 100644
--- a/workspaces/mcp-chat/app-config.yaml
+++ b/workspaces/mcp-chat/app-config.yaml
@@ -27,6 +27,7 @@ integrations:
 # --------- MCP Chat Configuration ---------
 mcpChat:
   toolCallTimeout: 60000
+  maxToolIterations: 8
   providers:
     - id: openai
       baseUrl: 'https://any-openai-compatible-url/v1'
diff --git a/workspaces/mcp-chat/docs/SERVER_CONFIGURATION.md b/workspaces/mcp-chat/docs/SERVER_CONFIGURATION.md
index bcc2c29d33d..05448c55430 100644
--- a/workspaces/mcp-chat/docs/SERVER_CONFIGURATION.md
+++ b/workspaces/mcp-chat/docs/SERVER_CONFIGURATION.md
@@ -177,6 +177,15 @@ mcpChat:
   toolCallTimeout: 300000 # 5 minutes, in milliseconds
 ```
 
+### Max Tool Iterations
+
+When answering a question, the assistant runs an agentic loop that executes tool calls and feeds the results back to the model until it produces a final answer. This loop is capped at **8 iterations** by default to prevent runaway tool-calling. For questions that legitimately require many sequential tool calls, raise the cap with `maxToolIterations`:
+
+```yaml
+mcpChat:
+  maxToolIterations: 16
+```
+
 ---
 
 For additional setup instructions and troubleshooting, refer to the [main README](../README.md).
diff --git a/workspaces/mcp-chat/plugins/mcp-chat-backend/config.d.ts b/workspaces/mcp-chat/plugins/mcp-chat-backend/config.d.ts
index 38439e01e32..2722f19b3d0 100644
--- a/workspaces/mcp-chat/plugins/mcp-chat-backend/config.d.ts
+++ b/workspaces/mcp-chat/plugins/mcp-chat-backend/config.d.ts
@@ -134,6 +134,14 @@ export interface Config {
      * @default 60000
      */
     toolCallTimeout?: number;
+    /**
+     * Maximum number of tool-call iterations the agentic loop will run before
+     * giving up on producing a final answer. Increase this for questions that
+     * legitimately require many sequential tool calls.
+     * @visibility backend
+     * @default 8
+     */
+    maxToolIterations?: number;
     /**
      * Custom system prompt for the AI assistant
      * @visibility backend
diff --git a/workspaces/mcp-chat/plugins/mcp-chat-backend/report.api.md b/workspaces/mcp-chat/plugins/mcp-chat-backend/report.api.md
index 2fc028b7994..1bc8fbe651f 100644
--- a/workspaces/mcp-chat/plugins/mcp-chat-backend/report.api.md
+++ b/workspaces/mcp-chat/plugins/mcp-chat-backend/report.api.md
@@ -122,6 +122,9 @@ export interface ConversationRecord {
 // @public
 export function createRouter(options: RouterOptions): Promise<express.Router>;
 
+// @public
+export const DEFAULT_MCP_MAX_TOOL_ITERATIONS = 8;
+
 // @public
 export const DEFAULT_MCP_TOOL_CALL_TIMEOUT_MS = 60000;
 
diff --git a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/index.ts b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/index.ts
index d28fc6fef3d..22f79761ca7 100644
--- a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/index.ts
+++ b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/index.ts
@@ -110,6 +110,7 @@ export {
   executeToolCall,
   findNpxPath,
   DEFAULT_MCP_TOOL_CALL_TIMEOUT_MS,
+  DEFAULT_MCP_MAX_TOOL_ITERATIONS,
 } from './utils';
 
 // =============================================================================
diff --git a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.test.ts b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.test.ts
index 4cc03edf187..400640c8d5f 100644
--- a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.test.ts
+++ b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.test.ts
@@ -253,6 +253,58 @@ describe('MCPClientServiceImpl', () => {
       expect(result.toolResponses[0].serverId).toBe('error');
     });
 
+    it('should not throw when a failing tool call has malformed JSON arguments', async () => {
+      const toolCall: ToolCall = {
+        id: 'call_bad_json',
+        type: 'function',
+        function: {
+          name: 'failing_tool',
+          arguments: 'not valid json',
+        },
+      };
+
+      const initialResponse: ChatResponse = {
+        choices: [
+          {
+            message: {
+              role: 'assistant',
+              content: null,
+              tool_calls: [toolCall],
+            },
+          },
+        ],
+      };
+
+      const followUpResponse: ChatResponse = {
+        choices: [
+          {
+            message: {
+              role: 'assistant',
+              content: 'Recovered after the tool error.',
+            },
+          },
+        ],
+      };
+
+      mockLLMProvider.sendMessage
+        .mockResolvedValueOnce(initialResponse)
+        .mockResolvedValueOnce(followUpResponse);
+
+      utils.executeToolCall.mockRejectedValue(
+        new Error('Tool execution failed'),
+      );
+
+      const result = await service.processQuery([
+        { role: 'user', content: 'Use the failing tool' },
+      ]);
+
+      expect(result.reply).toBe('Recovered after the tool error.');
+      expect(result.toolResponses[0].serverId).toBe('error');
+      expect(result.toolResponses[0].arguments).toEqual({
+        _raw: 'not valid json',
+      });
+    });
+
     it('should handle LLM provider errors', async () => {
       mockLLMProvider.sendMessage.mockRejectedValue(
         new Error('LLM connection failed'),
@@ -262,6 +314,215 @@ describe('MCPClientServiceImpl', () => {
         service.processQuery([{ role: 'user', content: 'Hello' }]),
       ).rejects.toThrow('LLM connection failed');
     });
+
+    it('should pass tools on every sendMessage call (multi-round loop)', async () => {
+      const toolCall1: ToolCall = {
+        id: 'call_1',
+        type: 'function',
+        function: { name: 'tool_one', arguments: '{"a": 1}' },
+      };
+      const toolCall2: ToolCall = {
+        id: 'call_2',
+        type: 'function',
+        function: { name: 'tool_two', arguments: '{"b": 2}' },
+      };
+
+      const firstResponse: ChatResponse = {
+        choices: [
+          {
+            message: {
+              role: 'assistant',
+              content: null,
+              tool_calls: [toolCall1],
+            },
+          },
+        ],
+      };
+      const secondResponse: ChatResponse = {
+        choices: [
+          {
+            message: {
+              role: 'assistant',
+              content: null,
+              tool_calls: [toolCall2],
+            },
+          },
+        ],
+      };
+      const finalResponse: ChatResponse = {
+        choices: [
+          {
+            message: {
+              role: 'assistant',
+              content: 'Both tools executed successfully.',
+            },
+          },
+        ],
+      };
+
+      mockLLMProvider.sendMessage
+        .mockResolvedValueOnce(firstResponse)
+        .mockResolvedValueOnce(secondResponse)
+        .mockResolvedValueOnce(finalResponse);
+
+      const result = await service.processQuery([
+        { role: 'user', content: 'Use both tools' },
+      ]);
+
+      expect(result.reply).toBe('Both tools executed successfully.');
+      expect(result.toolCalls).toHaveLength(2);
+      expect(result.toolResponses).toHaveLength(2);
+
+      // All three sendMessage calls must receive tools (the key fix)
+      expect(mockLLMProvider.sendMessage).toHaveBeenCalledTimes(3);
+      for (const call of mockLLMProvider.sendMessage.mock.calls) {
+        expect(call[1]).toBeDefined();
+        expect(Array.isArray(call[1])).toBe(true);
+      }
+
+      expect(utils.executeToolCall).toHaveBeenCalledTimes(2);
+    });
+
+    it('should record one assistant message with all tool calls followed by tool messages', async () => {
+      const toolCall1: ToolCall = {
+        id: 'call_a',
+        type: 'function',
+        function: { name: 'tool_a', arguments: '{"x": 1}' },
+      };
+      const toolCall2: ToolCall = {
+        id: 'call_b',
+        type: 'function',
+        function: { name: 'tool_b', arguments: '{"y": 2}' },
+      };
+
+      const firstResponse: ChatResponse = {
+        choices: [
+          {
+            message: {
+              role: 'assistant',
+              content: null,
+              tool_calls: [toolCall1, toolCall2],
+            },
+          },
+        ],
+      };
+      const finalResponse: ChatResponse = {
+        choices: [
+          {
+            message: {
+              role: 'assistant',
+              content: 'Done.',
+            },
+          },
+        ],
+      };
+
+      mockLLMProvider.sendMessage
+        .mockResolvedValueOnce(firstResponse)
+        .mockResolvedValueOnce(finalResponse);
+
+      utils.executeToolCall.mockImplementation((toolCall: ToolCall) =>
+        Promise.resolve({
+          id: toolCall.id,
+          name: toolCall.function.name,
+          arguments: {},
+          result: `result for ${toolCall.id}`,
+          serverId: 'test-server',
+        }),
+      );
+
+      await service.processQuery([{ role: 'user', content: 'Use both tools' }]);
+
+      // Inspect the conversation handed to the second (final) LLM round.
+      const secondRoundMessages = mockLLMProvider.sendMessage.mock.calls[1][0];
+      const appended = secondRoundMessages.slice(-3);
+
+      expect(appended[0]).toEqual({
+        role: 'assistant',
+        content: null,
+        tool_calls: [toolCall1, toolCall2],
+      });
+      expect(appended[1]).toEqual({
+        role: 'tool',
+        content: 'result for call_a',
+        tool_call_id: 'call_a',
+      });
+      expect(appended[2]).toEqual({
+        role: 'tool',
+        content: 'result for call_b',
+        tool_call_id: 'call_b',
+      });
+    });
+
+    it('should return fallback message when max tool iterations reached', async () => {
+      const toolCall: ToolCall = {
+        id: 'call_loop',
+        type: 'function',
+        function: { name: 'infinite_tool', arguments: '{}' },
+      };
+      const toolResponse: ChatResponse = {
+        choices: [
+          {
+            message: {
+              role: 'assistant',
+              content: null,
+              tool_calls: [toolCall],
+            },
+          },
+        ],
+      };
+
+      // Always return a tool call so the loop never terminates naturally
+      mockLLMProvider.sendMessage.mockResolvedValue(toolResponse);
+
+      const result = await service.processQuery([
+        { role: 'user', content: 'Loop forever' },
+      ]);
+
+      expect(result.reply).toContain(
+        "couldn't compile a final answer within the allowed number of steps",
+      );
+      expect(mockLLMProvider.sendMessage).toHaveBeenCalledTimes(8);
+    });
+
+    it('should honor a configured maxToolIterations cap', async () => {
+      mockConfig.getOptionalNumber.mockImplementation((key: string) =>
+        key === 'mcpChat.maxToolIterations' ? 3 : undefined,
+      );
+      service = new MCPClientServiceImpl({
+        logger: mockLogger,
+        config: mockConfig,
+      });
+
+      const toolCall: ToolCall = {
+        id: 'call_loop',
+        type: 'function',
+        function: { name: 'infinite_tool', arguments: '{}' },
+      };
+      const toolResponse: ChatResponse = {
+        choices: [
+          {
+            message: {
+              role: 'assistant',
+              content: null,
+              tool_calls: [toolCall],
+            },
+          },
+        ],
+      };
+
+      // Always return a tool call so the loop never terminates naturally
+      mockLLMProvider.sendMessage.mockResolvedValue(toolResponse);
+
+      const result = await service.processQuery([
+        { role: 'user', content: 'Loop forever' },
+      ]);
+
+      expect(result.reply).toContain(
+        "couldn't compile a final answer within the allowed number of steps",
+      );
+      expect(mockLLMProvider.sendMessage).toHaveBeenCalledTimes(3);
+    });
   });
 
   describe('Status Reporting', () => {
diff --git a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.ts b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.ts
index e66c50619e2..c70cebee1aa 100644
--- a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.ts
+++ b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/services/MCPClientServiceImpl.ts
@@ -31,6 +31,7 @@ import {
   findNpxPath,
   loadServerConfigs,
   DEFAULT_MCP_TOOL_CALL_TIMEOUT_MS,
+  DEFAULT_MCP_MAX_TOOL_ITERATIONS,
 } from '../utils';
 import { LLMProvider } from '../providers/base-provider';
 import { OpenAIResponsesProvider } from '../providers/openai-responses-provider';
@@ -46,6 +47,8 @@ import {
   MCPServerType,
   MCPServerFullConfig,
   ResponsesApiMcpCall,
+  ToolCall,
+  ToolExecutionResult,
 } from '../types';
 
 /**
@@ -76,6 +79,7 @@ export class MCPClientServiceImpl implements MCPClientService {
   private serverConfigs: MCPServerFullConfig[] = [];
   private allowedToolsByServer: Map<string, string[]> = new Map();
   private readonly toolCallTimeout: number;
+  private readonly maxToolIterations: number;
 
   constructor(options: Options) {
     this.logger = options.logger;
@@ -83,6 +87,9 @@ export class MCPClientServiceImpl implements MCPClientService {
     this.toolCallTimeout =
       this.config.getOptionalNumber('mcpChat.toolCallTimeout') ??
       DEFAULT_MCP_TOOL_CALL_TIMEOUT_MS;
+    this.maxToolIterations =
+      this.config.getOptionalNumber('mcpChat.maxToolIterations') ??
+      DEFAULT_MCP_MAX_TOOL_ITERATIONS;
     this.llmProvider = this.initializeLLMProvider();
     this.mcpServers = this.initializeMCPServers();
     this.systemPrompt =
@@ -520,85 +527,128 @@ export class MCPClientServiceImpl implements MCPClientService {
     // Remove serverId from tools when sending to LLM
     const llmTools: Tool[] = filteredTools.map(({ serverId, ...tool }) => tool);
 
-    const response = await this.llmProvider.sendMessage(messages, llmTools);
-    const replyMessage = response.choices[0].message;
-    this.logger.info(
-      `LLM response received with ${
-        replyMessage.tool_calls?.length || 0
-      } tool calls`,
-    );
-    const toolCalls = replyMessage.tool_calls || [];
+    const maxToolIterations = this.maxToolIterations;
+    const toolCalls: ToolCall[] = [];
+    const toolResponses: ToolExecutionResult[] = [];
+    let reply = '';
+    let completed = false;
 
-    if (toolCalls.length > 0) {
-      const toolResponses = [];
+    for (let iteration = 0; iteration < maxToolIterations; iteration++) {
+      const response = await this.llmProvider.sendMessage(messages, llmTools);
+      const replyMessage = response.choices[0].message;
+      const iterationToolCalls = replyMessage.tool_calls || [];
+      this.logger.info(
+        `LLM response (iteration ${
+          iteration + 1
+        }/${maxToolIterations}) received with ${
+          iterationToolCalls.length
+        } tool calls`,
+      );
 
-      for (const toolCall of toolCalls) {
-        try {
-          const toolResponse = await executeToolCall(
-            toolCall,
-            this.tools,
-            this.mcpClients,
-            this.toolCallTimeout,
-          );
-          toolResponses.push(toolResponse);
+      if (iterationToolCalls.length === 0) {
+        reply = replyMessage.content || '';
+        completed = true;
+        break;
+      }
 
-          messages.push({
-            role: 'assistant',
-            content: null,
-            tool_calls: [toolCall],
-          });
+      toolCalls.push(...iterationToolCalls);
 
-          messages.push({
-            role: 'tool',
-            content: toolResponse.result,
-            tool_call_id: toolCall.id,
-          });
-        } catch (error) {
-          const errorMessage = `Error executing tool '${
-            toolCall.function.name
-          }': ${error instanceof Error ? error.message : error}`;
-
-          this.logger.warn(errorMessage);
-
-          // Still add the tool call and error response to maintain conversation flow
-          const errorResponse = {
-            id: toolCall.id,
-            name: toolCall.function.name,
-            arguments: JSON.parse(toolCall.function.arguments || '{}'),
-            result: errorMessage,
-            serverId: 'error',
-          };
-
-          toolResponses.push(errorResponse);
-
-          messages.push({
-            role: 'assistant',
-            content: null,
-            tool_calls: [toolCall],
-          });
+      // Per the OpenAI tool-calling protocol, a single assistant message holds
+      // all tool calls from this iteration, followed by one tool message per
+      // call. Keeping them in one message keeps the history valid for the
+      // next round.
+      messages.push({
+        role: 'assistant',
+        content: replyMessage.content ?? null,
+        tool_calls: iterationToolCalls,
+      });
 
-          messages.push({
-            role: 'tool',
-            content: errorMessage,
-            tool_call_id: toolCall.id,
-          });
-        }
+      for (const toolCall of iterationToolCalls) {
+        await this.executeAndRecordToolResult(
+          toolCall,
+          messages,
+          toolResponses,
+        );
       }
+    }
 
-      const followUp = await this.llmProvider.sendMessage(messages);
+    if (!completed) {
+      this.logger.warn(
+        `Reached max tool iterations (${maxToolIterations}) without a final answer`,
+      );
+      reply =
+        "I gathered information using several tools but couldn't compile a final answer within the allowed number of steps. Please try rephrasing or narrowing your question.";
+    }
 
-      return {
-        reply: followUp.choices[0].message.content || '',
-        toolCalls,
-        toolResponses,
+    return { reply, toolCalls, toolResponses };
+  }
+
+  /**
+   * Executes a single tool call and appends its result (or error) as a `tool`
+   * message to the conversation. The assistant message carrying the tool calls
+   * is pushed once per iteration by the caller, so this only records the
+   * result, keeping the message history valid for the next LLM round.
+   */
+  private async executeAndRecordToolResult(
+    toolCall: ToolCall,
+    messages: ChatMessage[],
+    toolResponses: ToolExecutionResult[],
+  ): Promise<void> {
+    try {
+      const toolResponse = await executeToolCall(
+        toolCall,
+        this.tools,
+        this.mcpClients,
+        this.toolCallTimeout,
+      );
+      toolResponses.push(toolResponse);
+
+      messages.push({
+        role: 'tool',
+        content: toolResponse.result,
+        tool_call_id: toolCall.id,
+      });
+    } catch (error) {
+      const errorMessage = `Error executing tool '${toolCall.function.name}': ${
+        error instanceof Error ? error.message : error
+      }`;
+
+      this.logger.warn(errorMessage);
+
+      // Still record the error response to maintain conversation flow
+      const errorResponse: ToolExecutionResult = {
+        id: toolCall.id,
+        name: toolCall.function.name,
+        arguments: this.safeParseArguments(toolCall.function.arguments),
+        result: errorMessage,
+        serverId: 'error',
       };
+
+      toolResponses.push(errorResponse);
+
+      messages.push({
+        role: 'tool',
+        content: errorMessage,
+        tool_call_id: toolCall.id,
+      });
     }
+  }
 
-    return {
-      reply: replyMessage.content || '',
-      toolCalls: [],
-      toolResponses: [],
-    };
+  /**
+   * Parses a tool call's JSON-encoded arguments without throwing. The model
+   * can emit malformed JSON; since this is used while recording a tool error,
+   * a parse failure must not escape and break the "record and continue" flow.
+   * Falls back to the raw string (or `{}` when absent) on failure.
+   */
+  private safeParseArguments(rawArguments: string): Record<string, unknown> {
+    if (!rawArguments) {
+      return {};
+    }
+    try {
+      return JSON.parse(rawArguments);
+    } catch {
+      return { _raw: rawArguments };
+    }
   }
 
   /**
diff --git a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/utils.ts b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/utils.ts
index 94d1ff158ef..e55ca3702f2 100644
--- a/workspaces/mcp-chat/plugins/mcp-chat-backend/src/utils.ts
+++ b/workspaces/mcp-chat/plugins/mcp-chat-backend/src/utils.ts
@@ -34,6 +34,13 @@ import { RootConfigService } from '@backstage/backend-plugin-api';
  */
 export const DEFAULT_MCP_TOOL_CALL_TIMEOUT_MS = 60000;
 
+/**
+ * Default maximum number of tool-call iterations the agentic loop will run
+ * before giving up on producing a final answer.
+ * @public
+ */
+export const DEFAULT_MCP_MAX_TOOL_ITERATIONS = 8;
+
 /**
  * Loads MCP server configurations from Backstage config.
  * Reads from the `mcpChat.mcpServers` configuration section.