From c7ab798b4b830658cfad0171f2831df6fe296a9a Mon Sep 17 00:00:00 2001
From: bjoern <developer@call-home.ch>
Date: Thu, 5 Feb 2026 21:05:45 +0100
Subject: [PATCH 1/8] Improve tool calling response handling

- Add fallback parsing for Ollama models that return tool calls as JSON
  text in message content instead of using the structured tool_calls field
- Return tool results directly to CLI instead of making a follow-up LLM
  call, reducing latency and preventing hallucinated rewrites of output
- Add dedicated Glob tool returning plain text (one path per line) instead
  of JSON, with workspace_list accepting both 'pattern' and 'patterns'
- Clarify why Glob is not aliased to workspace_list (format mismatch)
---
 src/clients/ollama-utils.js | 69 +++++++++++++++++++++++++++++++++++++
 src/orchestrator/index.js   | 47 ++++++++++++++++++++++++-
 src/tools/index.js          |  4 +++
 src/tools/indexer.js        | 50 ++++++++++++++++++++++++---
 4 files changed, 165 insertions(+), 5 deletions(-)

diff --git a/src/clients/ollama-utils.js b/src/clients/ollama-utils.js
index 7582f05..2cd95c9 100644
--- a/src/clients/ollama-utils.js
+++ b/src/clients/ollama-utils.js
@@ -93,6 +93,65 @@ function convertAnthropicToolsToOllama(anthropicTools) {
   }));
 }
 
+/**
+ * Extract tool call from text when LLM outputs JSON instead of using tool_calls
+ * Handles formats like: {"name": "Read", "parameters": {...}}
+ *
+ * @param {string} text - Text content that may contain JSON tool call
+ * @returns {object|null} - Tool call object in Ollama format, or null if not found
+ */
+function extractToolCallFromText(text) {
+  if (!text || typeof text !== 'string') return null;
+
+  // Find potential JSON start - look for {"name" pattern
+  const startMatch = text.match(/\{\s*"name"\s*:/);
+  if (!startMatch) return null;
+
+  const startIdx = startMatch.index;
+
+  // Find matching closing brace using brace counting
+  let braceCount = 0;
+  let endIdx = -1;
+  for (let i = startIdx; i < text.length; i++) {
+    if (text[i] === '{') braceCount++;
+    else if (text[i] === '}') {
+      braceCount--;
+      if (braceCount === 0) {
+        endIdx = i + 1;
+        break;
+      }
+    }
+  }
+
+  if (endIdx === -1) return null;
+
+  const jsonStr = text.substring(startIdx, endIdx);
+
+  try {
+    const parsed = JSON.parse(jsonStr);
+
+    if (!parsed.name || !parsed.parameters) {
+      return null;
+    }
+
+    logger.info({
+      toolName: parsed.name,
+      params: parsed.parameters,
+      originalText: text.substring(0, 200)
+    }, "Extracted tool call from text content (fallback parsing)");
+
+    return {
+      function: {
+        name: parsed.name,
+        arguments: parsed.parameters
+      }
+    };
+  } catch (e) {
+    logger.debug({ error: e.message, text: text.substring(0, 200) }, "Failed to parse extracted tool call");
+    return null;
+  }
+}
+
 /**
  * Convert Ollama tool call response to Anthropic format
  *
@@ -126,6 +185,15 @@ function convertOllamaToolCallsToAnthropic(ollamaResponse) {
   const toolCalls = message.tool_calls || [];
   const textContent = message.content || "";
 
+  // FALLBACK: If no tool_calls but text contains JSON tool call, parse it
+  if (toolCalls.length === 0 && textContent) {
+    const extracted = extractToolCallFromText(textContent);
+    if (extracted) {
+      logger.info({ extractedTool: extracted.function?.name }, "Using fallback text parsing for tool call");
+      toolCalls = [extracted];
+    }
+  }
+
   const contentBlocks = [];
 
   // Add text content if present
@@ -217,4 +285,5 @@ module.exports = {
   convertOllamaToolCallsToAnthropic,
   buildAnthropicResponseFromOllama,
   modelNameSupportsTools,
+  extractToolCallFromText,
 };
diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js
index 55a47a5..748b0da 100644
--- a/src/orchestrator/index.js
+++ b/src/orchestrator/index.js
@@ -2612,7 +2612,52 @@ IMPORTANT TOOL USAGE RULES:
         }
       }
 
-      continue;
+      logger.info({
+        sessionId: session?.id ?? null,
+        step: steps,
+        toolCallsExecuted: toolCallsExecuted,
+        totalToolCallsInThisStep: toolCalls.length,
+        messageCount: cleanPayload.messages.length,
+        lastMessageRole: cleanPayload.messages[cleanPayload.messages.length - 1]?.role,
+      }, "Tool execution complete");
+
+      // Return tool results directly to CLI - no more LLM call needed
+      // The tool result IS the answer (e.g., file contents for Read)
+      if (accumulatedToolResults.length > 0) {
+        auditLog("=== RETURNING TOOL RESULTS DIRECTLY TO CLI ===", {
+          sessionId: session?.id ?? null,
+          toolResultCount: accumulatedToolResults.length,
+          toolNames: accumulatedToolResults.map(r => r.tool_name)
+        });
+
+        // Convert tool_result blocks to text blocks for CLI display
+        // The CLI only understands text/tool_use in responses, not tool_result
+        const directResponse = {
+          id: `msg_${Date.now()}`,
+          type: "message",
+          role: "assistant",
+          content: accumulatedToolResults.map(r => ({
+            type: "text",
+            text: r.content
+          })),
+          model: requestedModel,
+          stop_reason: "end_turn",
+          usage: { input_tokens: 0, output_tokens: 0 }
+        };
+
+        return {
+          response: {
+            status: 200,
+            body: directResponse,
+            terminationReason: "tool_result_direct",
+          },
+          steps,
+          durationMs: Date.now() - start,
+          terminationReason: "tool_result_direct",
+        };
+      }
+
+      continue; // Only if no tool results (shouldn't happen)
     }
 
     let anthropicPayload;
diff --git a/src/tools/index.js b/src/tools/index.js
index 11227f0..a6a6296 100644
--- a/src/tools/index.js
+++ b/src/tools/index.js
@@ -88,6 +88,10 @@ const TOOL_ALIASES = {
   runtests: "workspace_test_run",
   testsummary: "workspace_test_summary",
   testhistory: "workspace_test_history",
+  // Glob has dedicated tool in src/tools/indexer.js (registerGlobTool)
+  // - returns plain text format instead of JSON
+  // glob: "workspace_list",
+  // Glob: "workspace_list",
 };
 
 function coerceString(value) {
diff --git a/src/tools/indexer.js b/src/tools/indexer.js
index eb0a981..2db3504 100644
--- a/src/tools/indexer.js
+++ b/src/tools/indexer.js
@@ -16,11 +16,13 @@ function registerWorkspaceListTool() {
   registerTool(
     "workspace_list",
     async ({ args = {} }) => {
+      // Support both 'pattern' (Glob tool) and 'patterns' (workspace_list)
+      const rawPatterns = args.pattern ?? args.patterns;
       const patterns =
-        typeof args.patterns === "string"
-          ? [args.patterns]
-          : Array.isArray(args.patterns)
-          ? args.patterns
+        typeof rawPatterns === "string"
+          ? [rawPatterns]
+          : Array.isArray(rawPatterns)
+          ? rawPatterns
           : undefined;
       const ignore =
         typeof args.ignore === "string"
@@ -260,6 +262,45 @@ function registerSymbolReferencesTool() {
   );
 }
 
+
+/**
+ * Dedicated Glob tool for Claude Code compatibility (maybe others?).
+ *
+ * Why this exists (instead of using workspace_list alias):
+ * - Claude Code's Glob tool returns plain text (one path per line)
+ * - workspace_list returns JSON with entries array
+ * - Models expect plain text format from Glob tool
+ *
+ * See also: TOOL_ALIASES in src/tools/index.js (commented glob entries)
+ */
+function registerGlobTool() {
+  registerTool(
+    "Glob",
+    async ({ args = {} }) => {
+      const pattern = args.pattern;
+      const basePath = args.path;
+
+      let patterns;
+      if (basePath) {
+        const cleanPath = basePath.replace(/\/+$/, "");
+        patterns = pattern ? [`${cleanPath}/${pattern}`] : [`${cleanPath}/**/*`];
+      } else {
+        patterns = pattern ? [pattern] : undefined;
+      }
+
+      const entries = await listWorkspaceFiles({ patterns, limit: 1000 });
+
+      // Plain text output: one path per line (Claude Code format)
+      return {
+        ok: true,
+        status: 200,
+        content: entries.map((e) => e.path).join("\n"),
+      };
+    },
+    { category: "indexing" },
+  );
+}
+
 function registerGotoDefinitionTool() {
   registerTool(
     "workspace_goto_definition",
@@ -353,6 +394,7 @@ function registerIndexerTools() {
   registerSymbolSearchTool();
   registerSymbolReferencesTool();
   registerGotoDefinitionTool();
+  registerGlobTool();
 }
 
 module.exports = {

From ef51084aef6db6e56196f8bbc66ae3d48a329ac5 Mon Sep 17 00:00:00 2001
From: bjoern <developer@call-home.ch>
Date: Fri, 6 Feb 2026 18:29:59 +0100
Subject: [PATCH 2/8] Improve tool calling response handling

- Additional logging for tool call parsing and execution
- Hard-coded shell commands for reliable tool execution
- Deduplication of tool calls within a single response
- Collect and return results from all called tools
- Ollama uses specified Ollama model
- Fix double-serialized JSON parameters from some providers
---
 src/api/middleware/logging.js         |  96 +++++++--
 src/api/middleware/request-logging.js |  18 +-
 src/api/router.js                     |  77 ++++++-
 src/config/index.js                   |   3 +
 src/orchestrator/index.js             | 297 +++++++++++++++++++++++++-
 src/tools/index.js                    | 128 ++++++++++-
 src/tools/stubs.js                    |  29 +++
 src/tools/workspace.js                |   2 +-
 8 files changed, 613 insertions(+), 37 deletions(-)

diff --git a/src/api/middleware/logging.js b/src/api/middleware/logging.js
index e53faee..dc72b03 100644
--- a/src/api/middleware/logging.js
+++ b/src/api/middleware/logging.js
@@ -12,26 +12,92 @@ function maskHeaders(headers = {}) {
   return clone;
 }
 
-const loggingMiddleware = pinoHttp({
+const baseLoggingMiddleware = pinoHttp({
   logger,
-  customProps: (req) => ({
+  autoLogging: false, // Disable automatic logging so we can log manually with bodies
+  customProps: (req, res) => ({
     sessionId: req.sessionId ?? null,
   }),
-  customLogLevel: (req, res, err) => {
-    if (err || res.statusCode >= 500) return "error";
-    if (res.statusCode >= 400) return "warn";
-    return "info";
-  },
-  wrapSerializers: true,
-  serializers: {
-    req(req) {
-      return {
+});
+
+// Wrapper middleware to capture and log full request/response bodies
+function loggingMiddleware(req, res, next) {
+  const startTime = Date.now();
+
+  // Log request with full body immediately
+  logger.info({
+    sessionId: req.sessionId ?? null,
+    req: {
+      method: req.method,
+      url: req.url,
+      headers: maskHeaders(req.headers),
+    },
+    requestBody: req.body, // Full request body without truncation
+  }, 'request started');
+
+  // Intercept res.write for streaming responses
+  const originalWrite = res.write;
+  const chunks = [];
+  res.write = function (chunk) {
+    if (chunk) {
+      chunks.push(Buffer.from(chunk));
+    }
+    return originalWrite.apply(this, arguments);
+  };
+
+  // Intercept res.send to capture the body
+  const originalSend = res.send;
+  res.send = function (body) {
+    res._capturedBody = body;
+
+    // Parse if it's a JSON string for better logging
+    if (typeof body === 'string') {
+      try {
+        res._capturedBody = JSON.parse(body);
+      } catch (e) {
+        res._capturedBody = body;
+      }
+    }
+
+    return originalSend.call(this, body);
+  };
+
+  // Log response when finished
+  res.on('finish', () => {
+    const responseTime = Date.now() - startTime;
+
+    // Capture streaming body if not already captured via send()
+    if (chunks.length > 0 && !res._capturedBody) {
+      const fullBody = Buffer.concat(chunks).toString('utf8');
+      res._capturedBody = {
+        type: 'stream',
+        contentType: res.getHeader('content-type'),
+        size: fullBody.length,
+        preview: fullBody.substring(0, 1000)
+      };
+    }
+
+    const logLevel = res.statusCode >= 500 ? 'error' : res.statusCode >= 400 ? 'warn' : 'info';
+
+    logger[logLevel]({
+      sessionId: req.sessionId ?? null,
+      req: {
         method: req.method,
         url: req.url,
         headers: maskHeaders(req.headers),
-      };
-    },
-  },
-});
+      },
+      res: {
+        statusCode: res.statusCode,
+        headers: res.getHeaders ? res.getHeaders() : res.headers,
+      },
+      requestBody: req.body, // Full request body without truncation
+      responseBody: res._capturedBody, // Full response body without truncation
+      responseTime,
+    }, 'request completed');
+  });
+
+  // Still call base middleware to set up req.log
+  baseLoggingMiddleware(req, res, next);
+}
 
 module.exports = loggingMiddleware;
diff --git a/src/api/middleware/request-logging.js b/src/api/middleware/request-logging.js
index 8352e1a..cf2709e 100644
--- a/src/api/middleware/request-logging.js
+++ b/src/api/middleware/request-logging.js
@@ -25,13 +25,14 @@ function requestLoggingMiddleware(req, res, next) {
   // Add to response headers
   res.setHeader("X-Request-ID", requestId);
 
-  // Log request start
+// Log request start with full body
   logger.info(
     {
       requestId,
       method: req.method,
       path: req.path || req.url,
       query: req.query,
+      body: req.body, // Full request body without truncation
       ip: req.ip || req.socket.remoteAddress,
       userAgent: req.headers["user-agent"],
     },
@@ -43,7 +44,18 @@ function requestLoggingMiddleware(req, res, next) {
   res.send = function (body) {
     const duration = Date.now() - startTime;
 
-    // Log request completion
+    // Parse body if it's a string
+    let responseBody = body;
+    if (typeof body === 'string') {
+      try {
+        responseBody = JSON.parse(body);
+      } catch (e) {
+        // Keep as string if not JSON
+        responseBody = body;
+      }
+    }
+
+    // Log request completion with full request and response bodies
     logger.info(
       {
         requestId,
@@ -52,6 +64,8 @@ function requestLoggingMiddleware(req, res, next) {
         status: res.statusCode,
         duration,
         contentLength: res.getHeader("content-length"),
+        requestBody: req.body, // Full request body for reference
+        responseBody, // Full response body without truncation
       },
       "Request completed"
     );
diff --git a/src/api/router.js b/src/api/router.js
index b3ed198..057341d 100644
--- a/src/api/router.js
+++ b/src/api/router.js
@@ -7,6 +7,7 @@ const openaiRouter = require("./openai-router");
 const providersRouter = require("./providers-handler");
 const { getRoutingHeaders, getRoutingStats, analyzeComplexity } = require("../routing");
 const { validateCwd } = require("../workspace");
+const logger = require("../logger");
 
 const router = express.Router();
 
@@ -121,6 +122,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
     const wantsStream = Boolean(req.query?.stream === 'true' || req.body?.stream);
     const hasTools = Array.isArray(req.body?.tools) && req.body.tools.length > 0;
 
+    logger.info({
+      sessionId: req.headers['x-claude-session-id'],
+      wantsStream,
+      hasTools,
+      willUseStreamingPath: wantsStream || hasTools
+    }, "=== REQUEST ROUTING DECISION ===");
+
     // Analyze complexity for routing headers (Phase 3)
     const complexity = analyzeComplexity(req.body);
     const routingHeaders = getRoutingHeaders({
@@ -338,6 +346,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
 
     // Legacy streaming wrapper (for tool-based requests that requested streaming)
     if (wantsStream && hasTools) {
+      logger.info({
+        sessionId: req.headers['x-claude-session-id'],
+        pathType: 'legacy_streaming_wrapper',
+        wantsStream,
+        hasTools
+      }, "=== USING LEGACY STREAMING WRAPPER (TOOL-BASED WITH STREAMING) ===");
+
       metrics.recordStreamingStart();
       res.set({
         "Content-Type": "text/event-stream",
@@ -359,6 +374,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
       // Use proper Anthropic SSE format
       const msg = result.body;
 
+      logger.info({
+        sessionId: req.headers['x-claude-session-id'],
+        eventType: 'message_start',
+        streamingWithTools: true,
+        hasContent: !!(msg.content && msg.content.length > 0)
+      }, "=== SENDING SSE MESSAGE_START ===");
+      
       // 1. message_start
       res.write(`event: message_start\n`);
       res.write(`data: ${JSON.stringify({
@@ -419,9 +441,52 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
 
           res.write(`event: content_block_stop\n`);
           res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
+        } else if (block.type === "tool_result") {
+          // === TOOL_RESULT SSE STREAMING - ENTERED ===
+          logger.info({
+            blockIndex: i,
+            blockType: block.type,
+            toolUseId: block.tool_use_id,
+            contentType: typeof block.content,
+            contentLength: typeof block.content === 'string' ? block.content.length : JSON.stringify(block.content).length
+          }, "=== SSE: STREAMING TOOL_RESULT BLOCK - START ===");
+
+          // Stream tool_result blocks so CLI can display actual tool output
+          res.write(`event: content_block_start\n`);
+          res.write(`data: ${JSON.stringify({
+            type: "content_block_start",
+            index: i,
+            content_block: { type: "tool_result", tool_use_id: block.tool_use_id, content: "" }
+          })}\n\n`);
+
+          // Stream the actual content
+          const content = typeof block.content === 'string'
+            ? block.content
+            : JSON.stringify(block.content);
+
+          logger.info({
+            blockIndex: i,
+            contentLength: content.length,
+            contentPreview: content.substring(0, 200)
+          }, "=== SSE: STREAMING TOOL_RESULT CONTENT ===");
+
+          res.write(`event: content_block_delta\n`);
+          res.write(`data: ${JSON.stringify({
+            type: "content_block_delta",
+            index: i,
+            delta: { type: "tool_result_delta", content: content }
+          })}\n\n`);
+
+          res.write(`event: content_block_stop\n`);
+          res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
+
+          // === TOOL_RESULT SSE STREAMING - COMPLETED ===
+          logger.info({
+            blockIndex: i,
+            toolUseId: block.tool_use_id
+          }, "=== SSE: STREAMING TOOL_RESULT BLOCK - END ===");
         }
       }
-
       // 3. message_delta with stop_reason
       res.write(`event: message_delta\n`);
       res.write(`data: ${JSON.stringify({
@@ -454,6 +519,16 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
       });
     }
 
+
+    // DIAGNOSTIC: Log response being sent to client
+    logger.info({
+      status: result.status,
+      hasBody: !!result.body,
+      bodyKeys: result.body ? Object.keys(result.body) : [],
+      bodyType: typeof result.body,
+      contentLength: result.body ? JSON.stringify(result.body).length : 0
+    }, "=== SENDING RESPONSE TO CLIENT ===");
+
     metrics.recordResponse(result.status);
     res.status(result.status).send(result.body);
   } catch (error) {
diff --git a/src/config/index.js b/src/config/index.js
index 466585d..8e31472 100644
--- a/src/config/index.js
+++ b/src/config/index.js
@@ -176,6 +176,7 @@ if (!["server", "client", "passthrough"].includes(toolExecutionMode)) {
     "TOOL_EXECUTION_MODE must be one of: server, client, passthrough (default: server)"
   );
 }
+console.log(`[CONFIG] Tool execution mode: ${toolExecutionMode}`);
 
 // Memory system configuration (Titans-inspired long-term memory)
 const memoryEnabled = process.env.MEMORY_ENABLED !== "false"; // default true
@@ -348,6 +349,8 @@ const databricksUrl =
     ? `${rawBaseUrl}${endpointPath.startsWith("/") ? "" : "/"}${endpointPath}`
     : null;
 
+// Set MODEL_DEFAULT env var to use a specific model (e.g. "llama3.1" for Ollama).
+// Without it, the default falls back to a Databricks Claude model regardless of MODEL_PROVIDER.
 const defaultModel =
   process.env.MODEL_DEFAULT ??
   (modelProvider === "azure-anthropic" ? "claude-opus-4-5" : "databricks-claude-sonnet-4-5");
diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js
index 748b0da..0d7d889 100644
--- a/src/orchestrator/index.js
+++ b/src/orchestrator/index.js
@@ -919,6 +919,10 @@ function sanitizePayload(payload) {
         : "claude-opus-4-5";
     clean.model = azureDefaultModel;
   } else if (providerType === "ollama") {
+    // Override client model with Ollama config model
+    const ollamaConfiguredModel = config.ollama?.model;
+    clean.model = ollamaConfiguredModel;
+
     // Ollama format conversion
     // Check if model supports tools
     const { modelNameSupportsTools } = require("../clients/ollama-utils");
@@ -1024,8 +1028,15 @@ function sanitizePayload(payload) {
       }
 
       // Very short messages (< 20 chars) without code/technical keywords
+      // BUT: Common shell commands should NOT be treated as conversational
+      const shellCommands = /^(pwd|ls|cd|cat|echo|grep|find|ps|top|df|du|whoami|which|env)[\s\.\!\?]*$/;
+      if (shellCommands.test(trimmed)) {
+        logger.info({ matched: "shell_command", trimmed }, "Ollama conversational check - SHELL COMMAND detected, keeping tools");
+        return false; // NOT conversational - needs tools!
+      }
+
       if (trimmed.length < 20 && !/code|file|function|error|bug|fix|write|read|create/.test(trimmed)) {
-        logger.debug({ matched: "short", trimmed, length: trimmed.length }, "Ollama conversational check - matched");
+        logger.warn({ matched: "short", trimmed, length: trimmed.length }, "Ollama conversational check - SHORT MESSAGE matched, DELETING TOOLS");
         return true;
       }
 
@@ -1035,13 +1046,15 @@ function sanitizePayload(payload) {
 
     if (isConversational) {
       // Strip all tools for simple conversational messages
+      const originalToolCount = Array.isArray(clean.tools) ? clean.tools.length : 0;
       delete clean.tools;
       delete clean.tool_choice;
-      logger.debug({
+      logger.warn({
         model: config.ollama?.model,
-        message: "Removed tools for conversational message"
-      }, "Ollama conversational mode");
-    } else if (modelSupportsTools && Array.isArray(clean.tools) && clean.tools.length > 0) {
+        message: "Removed tools for conversational message",
+        originalToolCount,
+        userMessage: clean.messages?.[clean.messages.length - 1]?.content?.substring(0, 50),
+      }, "Ollama conversational mode - ALL TOOLS DELETED!");    } else if (modelSupportsTools && Array.isArray(clean.tools) && clean.tools.length > 0) {
       // Ollama performance degrades with too many tools
       // Limit to essential tools only
       const OLLAMA_ESSENTIAL_TOOLS = new Set([
@@ -1052,7 +1065,8 @@ function sanitizePayload(payload) {
         "Glob",
         "Grep",
         "WebSearch",
-        "WebFetch"
+        "WebFetch",
+        "shell",  // Tool is registered as "shell" internally
       ]);
 
       const limitedTools = clean.tools.filter(tool =>
@@ -1351,6 +1365,20 @@ async function runAgentLoop({
   const toolCallNames = new Map();
   const toolCallHistory = new Map(); // Track tool calls to detect loops: signature -> count
   let loopWarningInjected = false; // Track if we've already warned about loops
+  const accumulatedToolResults = []; // Track tool results to include in response for CLI display
+
+  // Log agent loop start
+  logger.info(
+    {
+      sessionId: session?.id ?? null,
+      model: requestedModel,
+      maxSteps: settings.maxSteps,
+      maxDurationMs: settings.maxDurationMs,
+      wantsThinking,
+      providerType,
+    },
+    "Agent loop started",
+  );
 
   while (steps < settings.maxSteps) {
     if (Date.now() - start > settings.maxDurationMs) {
@@ -1796,6 +1824,15 @@ IMPORTANT TOOL USAGE RULES:
       });
     }
   }
+    logger.info({
+      messageContent: databricksResponse.json?.message?.content
+        ? (typeof databricksResponse.json.message.content === 'string'
+          ? databricksResponse.json.message.content.substring(0, 500)
+          : JSON.stringify(databricksResponse.json.message.content).substring(0, 500))
+        : 'NO_CONTENT',
+      hasToolCalls: !!databricksResponse.json?.message?.tool_calls,
+      toolCallCount: databricksResponse.json?.message?.tool_calls?.length || 0
+    }, "=== RAW LLM RESPONSE CONTENT ===");
 
     // Handle streaming responses (pass through without buffering)
     if (databricksResponse.stream) {
@@ -1895,11 +1932,13 @@ IMPORTANT TOOL USAGE RULES:
           _anthropic_block: block,
         }));
 
-      logger.debug(
+      logger.info(
         {
           sessionId: session?.id ?? null,
+          step: steps,
           contentBlocks: contentArray.length,
           toolCallsFound: toolCalls.length,
+          toolNames: toolCalls.map(tc => tc.function?.name || tc.name),
           stopReason: databricksResponse.json?.stop_reason,
         },
         "Azure Anthropic response parsed",
@@ -1909,6 +1948,98 @@ IMPORTANT TOOL USAGE RULES:
       const choice = databricksResponse.json?.choices?.[0];
       message = choice?.message ?? {};
       toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
+
+      // Deduplicate tool calls for OpenAI format too
+      if (toolCalls.length > 0) {
+        const uniqueToolCalls = [];
+        const seenSignatures = new Set();
+        let duplicatesRemoved = 0;
+
+        for (const call of toolCalls) {
+          const signature = getToolCallSignature(call);
+          if (!seenSignatures.has(signature)) {
+            seenSignatures.add(signature);
+            uniqueToolCalls.push(call);
+          } else {
+            duplicatesRemoved++;
+            logger.warn({
+              sessionId: session?.id ?? null,
+              toolName: call.function?.name || call.name,
+              toolId: call.id,
+              signature: signature.substring(0, 32),
+            }, "Duplicate tool call removed (same tool with identical parameters in single response)");
+          }
+        }
+
+        toolCalls = uniqueToolCalls;
+
+        logger.info(
+          {
+            sessionId: session?.id ?? null,
+            step: steps,
+            toolCallsFound: toolCalls.length,
+            duplicatesRemoved,
+            toolNames: toolCalls.map(tc => tc.function?.name || tc.name),
+          },
+          "LLM Response: Tool calls requested (after deduplication)",
+        );
+      } else if (providerType === "ollama") {
+        // Ollama format: { message: { role, content, tool_calls }, done }
+        message = databricksResponse.json?.message ?? {};
+        toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
+
+        logger.info({
+          hasMessage: !!databricksResponse.json?.message,
+          hasToolCalls: toolCalls.length > 0,
+          toolCallCount: toolCalls.length,
+          toolNames: toolCalls.map(tc => tc.function?.name),
+          done: databricksResponse.json?.done,
+          fullToolCalls: JSON.stringify(toolCalls),
+          fullResponseMessage: JSON.stringify(databricksResponse.json?.message)
+        }, "=== OLLAMA TOOL CALLS EXTRACTION ===");
+      } else {
+        // OpenAI/Databricks format: { choices: [{ message: { tool_calls: [...] } }] }
+        const choice = databricksResponse.json?.choices?.[0];
+        message = choice?.message ?? {};
+        toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
+
+        // Deduplicate tool calls for OpenAI format too
+        if (toolCalls.length > 0) {
+          const uniqueToolCalls = [];
+          const seenSignatures = new Set();
+          let duplicatesRemoved = 0;
+
+          for (const call of toolCalls) {
+            const signature = getToolCallSignature(call);
+
+            if (!seenSignatures.has(signature)) {
+              seenSignatures.add(signature);
+              uniqueToolCalls.push(call);
+            } else {
+              duplicatesRemoved++;
+              logger.warn({
+                sessionId: session?.id ?? null,
+                toolName: call.function?.name || call.name,
+                toolId: call.id,
+                signature: signature.substring(0, 32),
+              }, "Duplicate tool call removed (same tool with identical parameters in single response)");
+            }
+          }
+
+          toolCalls = uniqueToolCalls;
+
+          logger.info(
+            {
+              sessionId: session?.id ?? null,
+              step: steps,
+              toolCallsFound: toolCalls.length,
+              duplicatesRemoved,
+              toolNames: toolCalls.map(tc => tc.function?.name || tc.name),
+            },
+            "LLM Response: Tool calls requested (after deduplication)",
+          );
+        }
+      }
     }
 
     // Guard: drop hallucinated tool calls when no tools were sent to the model.
@@ -2179,6 +2310,7 @@ IMPORTANT TOOL USAGE RULES:
               session,
               cwd,
               requestMessages: cleanPayload.messages,
+              providerType,
             }))
           );
 
@@ -2225,6 +2357,15 @@ IMPORTANT TOOL USAGE RULES:
 
             cleanPayload.messages.push(toolMessage);
 
+            logger.info(
+              {
+                toolName: execution.name,
+                content: typeof toolMessage.content === 'string'
+                ? toolMessage.content.substring(0, 500)
+                : JSON.stringify(toolMessage.content).substring(0, 500)
+              }, "Tool result content sent to LLM",
+            );
+
             // Convert to Anthropic format for session storage
             let sessionToolResultContent;
             if (providerType === "azure-anthropic") {
@@ -2412,6 +2553,7 @@ IMPORTANT TOOL USAGE RULES:
           session,
           cwd,
           requestMessages: cleanPayload.messages,
+          providerType,
         });
 
         let toolMessage;
@@ -2502,6 +2644,39 @@ IMPORTANT TOOL USAGE RULES:
           },
         });
 
+        // Accumulate tool results for CLI display
+        // Build a standardized tool_result block in Anthropic format
+        logger.info({
+          sessionId: session?.id ?? null,
+          callId: call.id,
+          executionId: execution.id,
+          toolName: call.function?.name ?? call.name ?? execution.name,
+          executionOk: execution.ok,
+          contentType: typeof execution.content,
+          accumulatedCountBefore: accumulatedToolResults.length
+        }, "=== ACCUMULATING TOOL RESULT FOR CLI - START ===");
+
+        const toolUseId = call.id ?? execution.id;
+        const toolResultContent = typeof execution.content === "string"
+          ? execution.content
+          : JSON.stringify(execution.content);
+        accumulatedToolResults.push({
+          type: "tool_result",
+          tool_use_id: toolUseId,
+          tool_name: call.function?.name ?? call.name ?? execution.name,
+          content: toolResultContent,
+          is_error: execution.ok === false,
+        });
+
+        logger.info({
+          sessionId: session?.id ?? null,
+          toolUseId,
+          toolName: call.function?.name ?? call.name ?? execution.name,
+          contentLength: toolResultContent.length,
+          contentPreview: toolResultContent.substring(0, 200),
+          accumulatedCountAfter: accumulatedToolResults.length
+        }, "=== ACCUMULATING TOOL RESULT FOR CLI - END ===");
+
         if (execution.ok) {
           logger.debug(
             {
@@ -2624,11 +2799,11 @@ IMPORTANT TOOL USAGE RULES:
       // Return tool results directly to CLI - no more LLM call needed
       // The tool result IS the answer (e.g., file contents for Read)
       if (accumulatedToolResults.length > 0) {
-        auditLog("=== RETURNING TOOL RESULTS DIRECTLY TO CLI ===", {
+        logger.info({
           sessionId: session?.id ?? null,
           toolResultCount: accumulatedToolResults.length,
           toolNames: accumulatedToolResults.map(r => r.tool_name)
-        });
+        }, "=== RETURNING TOOL RESULTS DIRECTLY TO CLI ===");
 
         // Convert tool_result blocks to text blocks for CLI display
         // The CLI only understands text/tool_use in responses, not tool_result
@@ -2657,6 +2832,15 @@ IMPORTANT TOOL USAGE RULES:
         };
       }
 
+      logger.info({
+        sessionId: session?.id ?? null,
+        step: steps,
+        toolCallsExecuted: toolCallsExecuted,
+        totalToolCallsInThisStep: toolCalls.length,
+        messageCount: cleanPayload.messages.length,
+        lastMessageRole: cleanPayload.messages[cleanPayload.messages.length - 1]?.role,
+      }, "Tool execution complete - processing next toolCall");
+
       continue; // Only if no tool results (shouldn't happen)
     }
 
@@ -3104,6 +3288,7 @@ IMPORTANT TOOL USAGE RULES:
             session,
             cwd,
             requestMessages: cleanPayload.messages,
+            providerType,            
           });
 
           const toolResultMessage = createFallbackToolResultMessage(providerType, {
@@ -3246,6 +3431,100 @@ IMPORTANT TOOL USAGE RULES:
       },
       "Agent loop completed successfully",
     );
+
+    // Include accumulated tool results in the response for CLI display
+    // This ensures the client sees actual tool output, not just LLM summaries
+    logger.info({
+      sessionId: session?.id ?? null,
+      accumulatedToolResultsCount: accumulatedToolResults.length,
+      hasAnthropicPayload: !!anthropicPayload,
+      currentContentType: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? 'array' : typeof anthropicPayload.content) : 'none',
+      currentContentLength: anthropicPayload?.content?.length || 0
+    }, "=== BEFORE TOOL RESULTS INCLUSION CHECK ===");
+
+    if (accumulatedToolResults.length > 0) {
+      logger.info({
+        sessionId: session?.id ?? null,
+        toolResultCount: accumulatedToolResults.length,
+        toolNames: accumulatedToolResults.map(r => r.tool_name),
+        toolUseIds: accumulatedToolResults.map(r => r.tool_use_id)
+      }, "=== ENTERING TOOL RESULTS INCLUSION BLOCK ===");
+
+      // Ensure content is an array
+      if (!Array.isArray(anthropicPayload.content)) {
+        logger.info({
+          sessionId: session?.id ?? null,
+          originalContentType: typeof anthropicPayload.content,
+          originalContentValue: anthropicPayload.content ? String(anthropicPayload.content).substring(0, 100) : 'null'
+        }, "=== CONTENT NOT ARRAY - CONVERTING ===");
+
+        anthropicPayload.content = anthropicPayload.content
+          ? [{ type: "text", text: String(anthropicPayload.content) }]
+          : [];
+
+        logger.info({
+          sessionId: session?.id ?? null,
+          convertedContentLength: anthropicPayload.content.length
+        }, "=== CONTENT CONVERTED TO ARRAY ===");
+      } else {
+        logger.info({
+          sessionId: session?.id ?? null,
+          existingContentLength: anthropicPayload.content.length,
+          existingContentTypes: anthropicPayload.content.map(b => b.type)
+        }, "=== CONTENT ALREADY ARRAY ===");
+      }
+
+      // Prepend tool results before text content so they appear in order
+      const contentBeforePrepend = anthropicPayload.content.length;      
+      anthropicPayload.content = [...accumulatedToolResults, ...anthropicPayload.content];
+
+      logger.info({
+        sessionId: session?.id ?? null,
+        toolResultCount: accumulatedToolResults.length,
+        toolNames: accumulatedToolResults.map(r => r.tool_name),
+        contentBeforePrepend,
+        contentAfterPrepend: anthropicPayload.content.length,
+        finalContentTypes: anthropicPayload.content.map(b => b.type)
+      }, "=== TOOL RESULTS PREPENDED TO RESPONSE ===");
+
+      for (const block of anthropicPayload.content) {
+        if (block.type === "tool_result") {
+          logger.info({
+            toolName: block.tool_name,
+            content: typeof block.content === 'string'
+              ? block.content.substring(0, 500)
+              : JSON.stringify(block.content).substring(0, 500)
+          }, "=== TOOL RESULT CONTENT SENT TO CLI ===");          
+        } else if (block.type === "text") {
+          logger.info({
+            text: block.text.substring(0, 500)
+          }, "=== TEXT CONTENT SENT TO CLI ===");
+        }
+      }
+
+    } else {
+      logger.info({
+        sessionId: session?.id ?? null
+      }, "=== NO TOOL RESULTS TO INCLUDE (accumulatedToolResults empty) ===");
+    }
+
+    logger.info({
+      sessionId: session?.id ?? null,
+      finalContentLength: anthropicPayload?.content?.length || 0,
+      finalContentTypes: anthropicPayload?.content?.map(b => b.type) || []
+    }, "=== AFTER TOOL RESULTS INCLUSION CHECK ===");
+
+    // DIAGNOSTIC: Log response being returned
+    logger.info({
+      sessionId: session?.id ?? null,
+      status: 200,
+      hasBody: !!anthropicPayload,
+      bodyKeys: anthropicPayload ? Object.keys(anthropicPayload) : [],
+      contentType: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? 'array' : typeof anthropicPayload.content) : 'none',
+      contentLength: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? anthropicPayload.content.length : String(anthropicPayload.content).length) : 0,
+      stopReason: anthropicPayload?.stop_reason
+    }, "=== RETURNING RESPONSE TO CLIENT ===");
+
     return {
       response: {
         status: 200,
diff --git a/src/tools/index.js b/src/tools/index.js
index a6a6296..3d76777 100644
--- a/src/tools/index.js
+++ b/src/tools/index.js
@@ -94,6 +94,37 @@ const TOOL_ALIASES = {
   // Glob: "workspace_list",
 };
 
+/**
+ * Recursively parse string values that look like JSON arrays/objects.
+ * Some providers double-serialize nested parameters (e.g. questions: "[{...}]"
+ * instead of questions: [{...}]), which causes schema validation failures.
+ */
+function deepParseStringifiedJson(obj) {
+  if (typeof obj !== "object" || obj === null) return obj;
+  if (Array.isArray(obj)) return obj.map(deepParseStringifiedJson);
+
+  const result = {};
+  for (const [key, value] of Object.entries(obj)) {
+    if (typeof value === "string") {
+      const trimmed = value.trim();
+      if (
+        (trimmed.startsWith("[") && trimmed.endsWith("]")) ||
+        (trimmed.startsWith("{") && trimmed.endsWith("}"))
+      ) {
+        try {
+          result[key] = deepParseStringifiedJson(JSON.parse(trimmed));
+          continue;
+        } catch {
+          // Not valid JSON, keep as string
+        }
+      }
+    }
+    result[key] =
+      typeof value === "object" ? deepParseStringifiedJson(value) : value;
+  }
+  return result;
+}
+
 function coerceString(value) {
   if (value === undefined || value === null) return "";
   if (typeof value === "string") return value;
@@ -128,24 +159,65 @@ function normalizeHandlerResult(result) {
   return { ok, status, content, metadata };
 }
 
-function parseArguments(call) {
+function parseArguments(call, providerType = null) {
   const raw = call?.function?.arguments;
-  if (typeof raw !== "string" || raw.trim().length === 0) return {};
+
+  // DEBUG: Log full call structure for diagnosis
+  logger.info({
+    providerType,
+    fullCall: JSON.stringify(call),
+    hasFunction: !!call?.function,
+    functionKeys: call?.function ? Object.keys(call.function) : [],
+    argumentsType: typeof raw,
+    argumentsValue: raw,
+    argumentsIsNull: raw === null,
+    argumentsIsUndefined: raw === undefined,
+  }, "=== PARSING TOOL ARGUMENTS ===");
+
+  // Ollama sends arguments as an object, OpenAI as a JSON string
+  if (typeof raw === "object" && raw !== null) {
+    if (providerType !== "ollama") {
+      logger.warn({
+        providerType,
+        expectedProvider: "ollama",
+        argumentsType: typeof raw,
+        arguments: raw
+      }, `Received object arguments but provider is ${providerType || "unknown"}, expected ollama format. Continuing with object.`);
+    } else {
+      logger.info({
+        type: "object",
+        arguments: raw
+      }, "Tool arguments already parsed (Ollama format)");
+    }
+    return deepParseStringifiedJson(raw);
+  }
+
+  if (typeof raw !== "string" || raw.trim().length === 0) {
+    logger.warn({
+      argumentsType: typeof raw,
+      argumentsEmpty: !raw || raw.trim().length === 0,
+      providerType
+    }, "Arguments not a string or empty - returning {}");
+    return {};
+  }
+
   try {
-    return JSON.parse(raw);
+    const parsed = JSON.parse(raw);
+    logger.info({ parsed }, "Parsed JSON string arguments");
+    return deepParseStringifiedJson(parsed);
   } catch (err) {
-    logger.warn({ err }, "Failed to parse tool arguments");
+    logger.warn({ err, raw }, "Failed to parse tool arguments");
     return {};
   }
 }
 
-function normaliseToolCall(call) {
+function normaliseToolCall(call, providerType = null) {
   const name = call?.function?.name ?? call?.name;
   const id = call?.id ?? `${name ?? "tool"}_${Date.now()}`;
   return {
     id,
     name,
-    arguments: parseArguments(call),
+    arguments: parseArguments(call, providerType),
     raw: call,
   };
 }
@@ -186,7 +258,8 @@ function listTools() {
 }
 
 async function executeToolCall(call, context = {}) {
-  const normalisedCall = normaliseToolCall(call);
+  const providerType = context?.providerType || context?.provider || null;  
+  const normalisedCall = normaliseToolCall(call, providerType);
   let registered = registry.get(normalisedCall.name);
   if (!registered) {
     const aliasTarget = TOOL_ALIASES[normalisedCall.name.toLowerCase()];
@@ -229,6 +302,10 @@ async function executeToolCall(call, context = {}) {
   }
 
   if (!registered) {
+    logger.warn({
+      tool: normalisedCall.name,
+      id: normalisedCall.id
+    }, "Tool not registered");
     const content = coerceString({
       error: "tool_not_registered",
       tool: normalisedCall.name,
@@ -245,6 +322,17 @@ async function executeToolCall(call, context = {}) {
     };
   }
 
+  // Log tool invocation with full details for debugging
+  logger.info({
+    tool: normalisedCall.name,
+    id: normalisedCall.id,
+    args: normalisedCall.arguments,
+    argsKeys: Object.keys(normalisedCall.arguments || {}),
+    rawCall: JSON.stringify(normalisedCall.raw)
+  }, "=== EXECUTING TOOL ===");
+
+  startTime = Date.now()
+
   try {
     const result = await registered.handler(
       {
@@ -260,6 +348,18 @@ async function executeToolCall(call, context = {}) {
     // Apply tool output truncation for token efficiency
     const truncatedContent = truncateToolOutput(normalisedCall.name, formatted.content);
 
+    const durationMs = Date.now() - startTime;
+
+    // Log successful execution
+    logger.info({
+      tool: normalisedCall.name,
+      id: normalisedCall.id,
+      status: formatted.status,
+      durationMs,
+      outputLength: truncatedContent?.length || 0,
+      truncated: truncatedContent !== formatted.content
+    }, "Tool execution completed");
+
     return {
       id: normalisedCall.id,
       name: normalisedCall.name,
@@ -271,11 +371,20 @@ async function executeToolCall(call, context = {}) {
         registered: true,
         truncated: truncatedContent !== formatted.content,
         originalLength: formatted.content?.length,
-        truncatedLength: truncatedContent?.length
+        truncatedLength: truncatedContent?.length,
+        durationMs
       },
     };
   } catch (err) {
-    logger.error({ err, tool: normalisedCall.name }, "Tool execution failed");
+    const durationMs = Date.now() - startTime;
+
+    logger.error({
+      err,
+      tool: normalisedCall.name,
+      id: normalisedCall.id,
+      durationMs
+    }, "Tool execution failed");
+
     return {
       id: normalisedCall.id,
       name: normalisedCall.name,
@@ -290,6 +399,7 @@ async function executeToolCall(call, context = {}) {
       metadata: {
         registered: true,
         error: true,
+        durationMs
       },
       error: err,
     };
diff --git a/src/tools/stubs.js b/src/tools/stubs.js
index c026e8e..d2f1bd3 100644
--- a/src/tools/stubs.js
+++ b/src/tools/stubs.js
@@ -41,12 +41,41 @@ function createStubHandler(name, description) {
   });
 }
 
+function askUserQuestionHandler({ args }) {
+  let questions = args?.questions ?? [];
+
+  if (typeof questions === "string") {
+    try { questions = JSON.parse(questions); } catch { questions = []; }
+  }
+
+  if (!Array.isArray(questions)) questions = [questions];
+  const lines = questions.map((q, i) => {
+    const header = q.header ? `[${q.header}] ` : "";
+    const opts = (q.options ?? [])
+      .map((o, j) => `  ${j + 1}. ${o.label} — ${o.description}`)
+      .join("\n");
+    return `${header}${q.question}\n${opts}`;
+  });
+
+  return {
+    ok: true,
+    status: 200,
+    content: lines.join("\n\n"),
+  };
+}
+
 function registerStubTools() {
   STUB_TOOLS.forEach((tool) => {
     if (!hasTool(tool.name)) {
       registerTool(tool.name, createStubHandler(tool.name, tool.description), tool);
     }
   });
+
+  if (!hasTool("AskUserQuestion")) {
+    registerTool("AskUserQuestion", askUserQuestionHandler, {
+      description: "Returns the model's question to the user as assistant output.",
+    });
+  }
 }
 
 module.exports = {
diff --git a/src/tools/workspace.js b/src/tools/workspace.js
index 3eaeeb0..9971ba3 100644
--- a/src/tools/workspace.js
+++ b/src/tools/workspace.js
@@ -145,7 +145,7 @@ function registerWorkspaceTools() {
   registerTool(
     "edit_patch",
     async ({ args = {} }, context = {}) => {
-      const relativePath = validateString(args.path ?? args.file, "path");
+      const relativePath = validateString(args.path ?? args.file ?? args.file_path, "path");
       const patch = validateString(args.patch, "patch");
       const encoding = normalizeEncoding(args.encoding);
 

From 42632fe1e59ad41ffede0591452d8f1362004e67 Mon Sep 17 00:00:00 2001
From: bjoern <developer@call-home.ch>
Date: Mon, 9 Feb 2026 13:08:59 +0100
Subject: [PATCH 3/8] Remove tool_result_direct short-circuit and improve
 agentic loop

Tool results now loop back to the model for natural language synthesis
instead of being returned raw to the CLI. This fixes the bug where
conversational messages (e.g. "hi") triggered tool calls and dumped
raw output.

Additional improvements:
- Context-aware tiered compression that scales with model context window
- Empty response detection with retry-then-fallback
- _noToolInjection flag to prevent provider-level tool re-injection
- Auto-approve external file reads in tool executor
- Conversation context search in workspace_search
---
 src/clients/databricks.js       |  20 +--
 src/clients/standard-tools.js   |   2 +-
 src/context/compression.js      | 130 ++++++++++----
 src/orchestrator/index.js       | 308 +++++++++++++++-----------------
 src/providers/context-window.js | 144 +++++++++++++++
 src/tools/index.js              |  26 ++-
 src/tools/indexer.js            |  66 ++++++-
 src/tools/workspace.js          |  10 +-
 8 files changed, 486 insertions(+), 220 deletions(-)
 create mode 100644 src/providers/context-window.js

diff --git a/src/clients/databricks.js b/src/clients/databricks.js
index 9b536cd..6655777 100644
--- a/src/clients/databricks.js
+++ b/src/clients/databricks.js
@@ -181,7 +181,7 @@ async function invokeDatabricks(body) {
   const databricksBody = { ...body };
 
   // Inject standard tools if client didn't send any (passthrough mode)
-  if (!Array.isArray(databricksBody.tools) || databricksBody.tools.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(databricksBody.tools) || databricksBody.tools.length === 0)) {
     databricksBody.tools = STANDARD_TOOLS;
     logger.info({
       injectedToolCount: STANDARD_TOOLS.length,
@@ -222,7 +222,7 @@ async function invokeAzureAnthropic(body) {
   }
 
   // Inject standard tools if client didn't send any (passthrough mode)
-  if (!Array.isArray(body.tools) || body.tools.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(body.tools) || body.tools.length === 0)) {
     body.tools = STANDARD_TOOLS;
     logger.info({
       injectedToolCount: STANDARD_TOOLS.length,
@@ -342,7 +342,7 @@ async function invokeOllama(body) {
   if (!supportsTools) {
     // Model doesn't support tools - don't inject them
     toolsToSend = null;
-  } else if (injectToolsOllama && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
+  } else if (injectToolsOllama && !body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     // Model supports tools and none provided - inject them
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
@@ -422,7 +422,7 @@ async function invokeOpenRouter(body) {
   let toolsToSend = body.tools;
   let toolsInjected = false;
 
-  if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
@@ -503,7 +503,7 @@ async function invokeAzureOpenAI(body) {
   let toolsToSend = body.tools;
   let toolsInjected = false;
 
-  if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
@@ -854,7 +854,7 @@ async function invokeOpenAI(body) {
   let toolsToSend = body.tools;
   let toolsInjected = false;
 
-  if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
@@ -956,7 +956,7 @@ async function invokeLlamaCpp(body) {
   let toolsInjected = false;
 
   const injectToolsLlamacpp = process.env.INJECT_TOOLS_LLAMACPP !== "false";
-  if (injectToolsLlamacpp && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
+  if (injectToolsLlamacpp && !body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
     logger.info({
@@ -1039,7 +1039,7 @@ async function invokeLMStudio(body) {
   let toolsToSend = body.tools;
   let toolsInjected = false;
 
-  if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
     logger.info({
@@ -1086,7 +1086,7 @@ async function invokeBedrock(body) {
   let toolsToSend = body.tools;
   let toolsInjected = false;
 
-  if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
     logger.info({
@@ -1370,7 +1370,7 @@ async function invokeZai(body) {
     zaiBody.model = mappedModel;
 
     // Inject standard tools if client didn't send any (passthrough mode)
-    if (!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0) {
+    if (!body._noToolInjection && (!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0)) {
       zaiBody.tools = STANDARD_TOOLS;
       logger.info({
         injectedToolCount: STANDARD_TOOLS.length,
diff --git a/src/clients/standard-tools.js b/src/clients/standard-tools.js
index 61ac791..6cfd833 100644
--- a/src/clients/standard-tools.js
+++ b/src/clients/standard-tools.js
@@ -24,7 +24,7 @@ const STANDARD_TOOLS = [
   },
   {
     name: "Read",
-    description: "Reads a file from the local filesystem. You can access any file directly by using this tool. For files outside the workspace, the user must approve access first.",
+    description: "Reads a file from the local filesystem. You can access any file directly by using this tool.\n\nEXTERNAL FILE APPROVAL FLOW: When reading a file outside the workspace, the tool will return an [APPROVAL REQUIRED] message instead of the file content. When this happens you MUST: (1) Tell the user the file is outside the workspace and ask for permission. (2) If the user approves, call this tool again with the SAME file_path and set user_approved=true. (3) Only then will the file content be returned.",
     input_schema: {
       type: "object",
       properties: {
diff --git a/src/context/compression.js b/src/context/compression.js
index 518aaba..47b0413 100644
--- a/src/context/compression.js
+++ b/src/context/compression.js
@@ -2,24 +2,63 @@
  * History Compression for Token Optimization
  *
  * Compresses conversation history to reduce token usage while
- * maintaining context quality. Uses sliding window approach:
- * - Keep recent turns verbatim
- * - Summarize older turns
- * - Compress tool results
+ * maintaining context quality. Uses sliding window approach with
+ * percentage-based tiered compression that scales with recency
+ * and the model's context window size.
  *
+ * Tiers:
+ * - veryRecent (last 4 messages): keep 90% of content
+ * - recent (messages 5-10): keep 50% of content
+ * - old (11+): keep 20% of content
  */
 
 const logger = require('../logger');
 const config = require('../config');
 
+// Compression tiers: ratio = percentage of content to keep, minFloor = minimum chars
+const COMPRESSION_TIERS = {
+  veryRecent: { ratio: 0.9, minFloor: 500 },
+  recent:     { ratio: 0.5, minFloor: 300 },
+  old:        { ratio: 0.2, minFloor: 200 },
+};
+
+// How many of the recent messages count as "very recent"
+const VERY_RECENT_COUNT = 4;
+
 /**
- * Compress conversation history to fit within token budget
+ * Compute the maximum character cap for a tier based on context window size.
+ *
+ * @param {number} contextWindowTokens - Model's context window in tokens (-1 = unknown)
+ * @param {string} tierName - "veryRecent", "recent", or "old"
+ * @returns {number} Maximum characters for tool result content in this tier
+ */
+function computeMaxCap(contextWindowTokens, tierName) {
+  // Convert tokens to chars (~4 chars/token), default to 8K tokens if unknown
+  const contextChars = (contextWindowTokens === -1 ? 8000 : contextWindowTokens) * 4;
+  const budgetRatios = {
+    veryRecent: 0.25,
+    recent: 0.10,
+    old: 0.03,
+  };
+  return Math.floor(contextChars * (budgetRatios[tierName] ?? 0.03));
+}
+
+/**
+ * Compute the character limit for a piece of content based on tier and context window.
  *
- * Strategy:
- * 1. Keep last N turns verbatim (fresh context)
- * 2. Summarize older turns (compressed history)
- * 3. Compress tool results to key information only
- * 4. Remove redundant exchanges
+ * @param {string} text - The text content
+ * @param {string} tierName - Tier name
+ * @param {number} contextWindowTokens - Context window in tokens
+ * @returns {number} Character limit
+ */
+function computeLimit(text, tierName, contextWindowTokens) {
+  const tier = COMPRESSION_TIERS[tierName] || COMPRESSION_TIERS.old;
+  const maxCap = computeMaxCap(contextWindowTokens, tierName);
+  return Math.min(maxCap, Math.max(tier.minFloor, Math.floor(text.length * tier.ratio)));
+}
+
+/**
+ * Compress conversation history to fit within token budget
  *
  * @param {Array} messages - Conversation history
  * @param {Object} options - Compression options
@@ -28,6 +67,8 @@ const config = require('../config');
 function compressHistory(messages, options = {}) {
   if (!messages || messages.length === 0) return messages;
 
+  const contextWindowTokens = options.contextWindowTokens ?? -1;
+
   const opts = {
     keepRecentTurns: options.keepRecentTurns ?? config.historyCompression?.keepRecentTurns ?? 10,
     summarizeOlder: options.summarizeOlder ?? config.historyCompression?.summarizeOlder ?? true,
@@ -58,12 +99,16 @@ function compressHistory(messages, options = {}) {
       compressed.push(summary);
     }
   } else {
-    // Just compress tool results in old messages
-    compressed = oldMessages.map(msg => compressMessage(msg));
+    // Compress tool results in old messages using "old" tier
+    compressed = oldMessages.map(msg => compressMessage(msg, "old", contextWindowTokens));
   }
 
-  // Add recent messages (may compress tool results but keep content)
-  const recentCompressed = recentMessages.map(msg => compressToolResults(msg));
+  // Add recent messages with tiered compression
+  const recentCompressed = recentMessages.map((msg, i) => {
+    const isVeryRecent = i >= recentMessages.length - VERY_RECENT_COUNT;
+    const tierName = isVeryRecent ? "veryRecent" : "recent";
+    return compressToolResults(msg, tierName, contextWindowTokens);
+  });
 
   const finalMessages = [...compressed, ...recentCompressed];
 
@@ -82,7 +127,8 @@ function compressHistory(messages, options = {}) {
       percentage: ((saved / originalLength) * 100).toFixed(1),
       splitIndex,
       oldMessages: oldMessages.length,
-      recentMessages: recentMessages.length
+      recentMessages: recentMessages.length,
+      contextWindowTokens,
     }, 'History compression applied');
   }
 
@@ -149,26 +195,28 @@ function summarizeOldHistory(messages) {
 }
 
 /**
- * Compress a single message
- *
- * Reduces message size while preserving essential information.
+ * Compress a single message (used for old messages outside the recent window)
  *
  * @param {Object} message - Message to compress
+ * @param {string} tierName - Compression tier
+ * @param {number} contextWindowTokens - Context window in tokens
  * @returns {Object} Compressed message
  */
-function compressMessage(message) {
+function compressMessage(message, tierName = "old", contextWindowTokens = -1) {
   if (!message) return message;
 
+  const limit = computeLimit("x".repeat(300), tierName, contextWindowTokens);
+
   const compressed = {
     role: message.role
   };
 
   // Compress content based on type
   if (typeof message.content === 'string') {
-    compressed.content = compressText(message.content, 300);
+    compressed.content = compressText(message.content, limit);
   } else if (Array.isArray(message.content)) {
     compressed.content = message.content
-      .map(block => compressContentBlock(block))
+      .map(block => compressContentBlock(block, tierName, contextWindowTokens))
       .filter(Boolean);
   } else {
     compressed.content = message.content;
@@ -180,13 +228,12 @@ function compressMessage(message) {
 /**
  * Compress tool results in a message while keeping other content
  *
- * Tool results can be very large. This compresses them while
- * keeping user and assistant text intact.
- *
  * @param {Object} message - Message to process
+ * @param {string} tierName - Compression tier
+ * @param {number} contextWindowTokens - Context window in tokens
  * @returns {Object} Message with compressed tool results
  */
-function compressToolResults(message) {
+function compressToolResults(message, tierName = "recent", contextWindowTokens = -1) {
   if (!message) return message;
 
   const compressed = {
@@ -199,7 +246,7 @@ function compressToolResults(message) {
     compressed.content = message.content.map(block => {
       // Compress tool_result blocks
       if (block.type === 'tool_result') {
-        return compressToolResultBlock(block);
+        return compressToolResultBlock(block, tierName, contextWindowTokens);
       }
       // Keep other blocks as-is
       return block;
@@ -215,16 +262,20 @@ function compressToolResults(message) {
  * Compress a content block
  *
  * @param {Object} block - Content block
+ * @param {string} tierName - Compression tier
+ * @param {number} contextWindowTokens - Context window in tokens
  * @returns {Object|null} Compressed block or null if removed
  */
-function compressContentBlock(block) {
+function compressContentBlock(block, tierName = "old", contextWindowTokens = -1) {
   if (!block) return null;
 
+  const limit = computeLimit("x".repeat(300), tierName, contextWindowTokens);
+
   switch (block.type) {
     case 'text':
       return {
         type: 'text',
-        text: compressText(block.text, 300)
+        text: compressText(block.text, limit)
       };
 
     case 'tool_use':
@@ -237,7 +288,7 @@ function compressContentBlock(block) {
       };
 
     case 'tool_result':
-      return compressToolResultBlock(block);
+      return compressToolResultBlock(block, tierName, contextWindowTokens);
 
     default:
       return block;
@@ -247,13 +298,15 @@ function compressContentBlock(block) {
 /**
  * Compress tool result block
  *
- * Tool results can be very large (file contents, bash output).
- * Compress while preserving essential information.
+ * Uses dynamic limits based on compression tier and context window size
+ * instead of a hardcoded character limit.
  *
  * @param {Object} block - tool_result block
+ * @param {string} tierName - Compression tier
+ * @param {number} contextWindowTokens - Context window in tokens
  * @returns {Object} Compressed tool_result
  */
-function compressToolResultBlock(block) {
+function compressToolResultBlock(block, tierName = "old", contextWindowTokens = -1) {
   if (!block || block.type !== 'tool_result') return block;
 
   const compressed = {
@@ -261,17 +314,20 @@ function compressToolResultBlock(block) {
     tool_use_id: block.tool_use_id,
   };
 
-  // Compress content
+  // Compress content using dynamic limits
   if (typeof block.content === 'string') {
-    compressed.content = compressText(block.content, 500);
+    const limit = computeLimit(block.content, tierName, contextWindowTokens);
+    compressed.content = compressText(block.content, limit);
   } else if (Array.isArray(block.content)) {
     compressed.content = block.content.map(item => {
       if (typeof item === 'string') {
-        return compressText(item, 500);
+        const limit = computeLimit(item, tierName, contextWindowTokens);
+        return compressText(item, limit);
       } else if (item.type === 'text') {
+        const limit = computeLimit(item.text || "", tierName, contextWindowTokens);
         return {
           type: 'text',
-          text: compressText(item.text, 500)
+          text: compressText(item.text, limit)
         };
       }
       return item;
@@ -456,4 +512,6 @@ module.exports = {
   calculateCompressionStats,
   needsCompression,
   summarizeOldHistory,
+  COMPRESSION_TIERS,
+  computeMaxCap,
 };
diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js
index 0d7d889..8e8b1f5 100644
--- a/src/orchestrator/index.js
+++ b/src/orchestrator/index.js
@@ -10,6 +10,7 @@ const tokens = require("../utils/tokens");
 const systemPrompt = require("../prompts/system");
 const historyCompression = require("../context/compression");
 const tokenBudget = require("../context/budget");
+const { getContextWindow } = require("../providers/context-window");
 const { classifyRequestType, selectToolsSmartly } = require("../tools/smart-selection");
 const { compressMessages: headroomCompress, isEnabled: isHeadroomEnabled } = require("../headroom");
 const { createAuditLogger } = require("../logger/audit-logger");
@@ -1049,6 +1050,7 @@ function sanitizePayload(payload) {
       const originalToolCount = Array.isArray(clean.tools) ? clean.tools.length : 0;
       delete clean.tools;
       delete clean.tool_choice;
+      clean._noToolInjection = true;
       logger.warn({
         model: config.ollama?.model,
         message: "Removed tools for conversational message",
@@ -1154,6 +1156,9 @@ function sanitizePayload(payload) {
     }
 
     clean.tools = selectedTools.length > 0 ? selectedTools : undefined;
+    if (!selectedTools.length) {
+      clean._noToolInjection = true;
+    }
   }
 
   clean.stream = payload.stream ?? false;
@@ -1356,6 +1361,9 @@ async function runAgentLoop({
   console.log('[DEBUG] runAgentLoop ENTERED - providerType:', providerType, 'messages:', cleanPayload.messages?.length);
   logger.info({ providerType, messageCount: cleanPayload.messages?.length }, 'runAgentLoop ENTERED');
   const settings = resolveLoopOptions(options);
+  // Detect context window size for intelligent compression
+  const contextWindowTokens = await getContextWindow();
+  console.log('[DEBUG] Context window detected:', contextWindowTokens, 'tokens for provider:', providerType);
   // Initialize audit logger (no-op if disabled)
   const auditLogger = createAuditLogger(config.audit);
   const start = Date.now();
@@ -1365,7 +1373,7 @@ async function runAgentLoop({
   const toolCallNames = new Map();
   const toolCallHistory = new Map(); // Track tool calls to detect loops: signature -> count
   let loopWarningInjected = false; // Track if we've already warned about loops
-  const accumulatedToolResults = []; // Track tool results to include in response for CLI display
+  let emptyResponseRetried = false; // Track if we've retried after an empty LLM response
 
   // Log agent loop start
   logger.info(
@@ -1415,7 +1423,6 @@ async function runAgentLoop({
     }
 
     steps += 1;
-    console.log('[LOOP DEBUG] Entered while loop - step:', steps);
     logger.debug(
       {
         sessionId: session?.id ?? null,
@@ -1446,7 +1453,8 @@ async function runAgentLoop({
           cleanPayload.messages = historyCompression.compressHistory(originalMessages, {
             keepRecentTurns: config.historyCompression?.keepRecentTurns ?? 10,
             summarizeOlder: config.historyCompression?.summarizeOlder ?? true,
-            enabled: true
+            enabled: true,
+            contextWindowTokens,
           });
 
           if (cleanPayload.messages !== originalMessages) {
@@ -2057,6 +2065,67 @@ IMPORTANT TOOL USAGE RULES:
       // If there's also no text content, treat as empty response (handled below)
     }
 
+    // === EMPTY RESPONSE DETECTION (primary) ===
+    // Check raw extracted message for empty content before tool handling or conversion
+    const rawTextContent = (() => {
+      if (typeof message.content === 'string') return message.content.trim();
+      if (Array.isArray(message.content)) {
+        return message.content
+          .filter(b => b.type === 'text')
+          .map(b => b.text || '')
+          .join('')
+          .trim();
+      }
+      return '';
+    })();
+
+    if (toolCalls.length === 0 && !rawTextContent) {
+      console.log('[EMPTY RESPONSE] No text content and no tool calls - step:', steps, 'retried:', emptyResponseRetried);
+      logger.warn({
+        sessionId: session?.id ?? null,
+        step: steps,
+        messageKeys: Object.keys(message),
+        contentType: typeof message.content,
+        rawContentPreview: String(message.content || '').substring(0, 100),
+      }, "Empty LLM response detected (no text, no tool calls)");
+
+      // Retry once with a nudge
+      if (steps < settings.maxSteps && !emptyResponseRetried) {
+        emptyResponseRetried = true;
+        cleanPayload.messages.push({
+          role: "assistant",
+          content: "",
+        });
+        cleanPayload.messages.push({
+          role: "user",
+          content: "Please provide a response to the user's message.",
+        });
+        logger.info({ sessionId: session?.id ?? null }, "Retrying after empty response with nudge");
+        continue;
+      }
+
+      // Fallback after retry also returned empty
+      logger.warn({ sessionId: session?.id ?? null, steps }, "Empty response persisted after retry");
+      return {
+        response: {
+          status: 200,
+          body: {
+            id: `msg_${Date.now()}`,
+            type: "message",
+            role: "assistant",
+            model: requestedModel,
+            content: [{ type: "text", text: "I wasn't able to generate a response. Could you try rephrasing your message?" }],
+            stop_reason: "end_turn",
+            usage: { input_tokens: 0, output_tokens: 0 },
+          },
+          terminationReason: "empty_response_fallback",
+        },
+        steps,
+        durationMs: Date.now() - start,
+        terminationReason: "empty_response_fallback",
+      };
+    }
+
     if (toolCalls.length > 0) {
       // Convert OpenAI/OpenRouter format to Anthropic format for session storage
       let sessionContent;
@@ -2556,6 +2625,15 @@ IMPORTANT TOOL USAGE RULES:
           providerType,
         });
 
+        logger.debug(
+          {
+            id: execution.id ?? null,
+            name: execution.name ?? null,
+            arguments: execution.arguments ?? null,
+            content: execution.content ?? null,
+            is_error: execution.ok === false,
+          }, "executeToolCall response" );
+
         let toolMessage;
         if (providerType === "azure-anthropic") {
           const parsedContent = parseExecutionContent(execution.content);
@@ -2644,39 +2722,6 @@ IMPORTANT TOOL USAGE RULES:
           },
         });
 
-        // Accumulate tool results for CLI display
-        // Build a standardized tool_result block in Anthropic format
-        logger.info({
-          sessionId: session?.id ?? null,
-          callId: call.id,
-          executionId: execution.id,
-          toolName: call.function?.name ?? call.name ?? execution.name,
-          executionOk: execution.ok,
-          contentType: typeof execution.content,
-          accumulatedCountBefore: accumulatedToolResults.length
-        }, "=== ACCUMULATING TOOL RESULT FOR CLI - START ===");
-
-        const toolUseId = call.id ?? execution.id;
-        const toolResultContent = typeof execution.content === "string"
-          ? execution.content
-          : JSON.stringify(execution.content);
-        accumulatedToolResults.push({
-          type: "tool_result",
-          tool_use_id: toolUseId,
-          tool_name: call.function?.name ?? call.name ?? execution.name,
-          content: toolResultContent,
-          is_error: execution.ok === false,
-        });
-
-        logger.info({
-          sessionId: session?.id ?? null,
-          toolUseId,
-          toolName: call.function?.name ?? call.name ?? execution.name,
-          contentLength: toolResultContent.length,
-          contentPreview: toolResultContent.substring(0, 200),
-          accumulatedCountAfter: accumulatedToolResults.length
-        }, "=== ACCUMULATING TOOL RESULT FOR CLI - END ===");
-
         if (execution.ok) {
           logger.debug(
             {
@@ -2796,52 +2841,7 @@ IMPORTANT TOOL USAGE RULES:
         lastMessageRole: cleanPayload.messages[cleanPayload.messages.length - 1]?.role,
       }, "Tool execution complete");
 
-      // Return tool results directly to CLI - no more LLM call needed
-      // The tool result IS the answer (e.g., file contents for Read)
-      if (accumulatedToolResults.length > 0) {
-        logger.info({
-          sessionId: session?.id ?? null,
-          toolResultCount: accumulatedToolResults.length,
-          toolNames: accumulatedToolResults.map(r => r.tool_name)
-        }, "=== RETURNING TOOL RESULTS DIRECTLY TO CLI ===");
-
-        // Convert tool_result blocks to text blocks for CLI display
-        // The CLI only understands text/tool_use in responses, not tool_result
-        const directResponse = {
-          id: `msg_${Date.now()}`,
-          type: "message",
-          role: "assistant",
-          content: accumulatedToolResults.map(r => ({
-            type: "text",
-            text: r.content
-          })),
-          model: requestedModel,
-          stop_reason: "end_turn",
-          usage: { input_tokens: 0, output_tokens: 0 }
-        };
-
-        return {
-          response: {
-            status: 200,
-            body: directResponse,
-            terminationReason: "tool_result_direct",
-          },
-          steps,
-          durationMs: Date.now() - start,
-          terminationReason: "tool_result_direct",
-        };
-      }
-
-      logger.info({
-        sessionId: session?.id ?? null,
-        step: steps,
-        toolCallsExecuted: toolCallsExecuted,
-        totalToolCallsInThisStep: toolCalls.length,
-        messageCount: cleanPayload.messages.length,
-        lastMessageRole: cleanPayload.messages[cleanPayload.messages.length - 1]?.role,
-      }, "Tool execution complete - processing next toolCall");
-
-      continue; // Only if no tool results (shouldn't happen)
+      continue; // Loop back to invoke model with tool results in context
     }
 
     let anthropicPayload;
@@ -3103,6 +3103,68 @@ IMPORTANT TOOL USAGE RULES:
       anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content);
     }
 
+    // === EMPTY RESPONSE DETECTION (safety net — post-conversion) ===
+    // Primary detection is earlier (before tool handling). This catches edge cases
+    // where conversion produces empty content from non-empty raw data.
+    const hasTextContent = (() => {
+      if (Array.isArray(anthropicPayload.content)) {
+        return anthropicPayload.content.some(b => b.type === "text" && b.text?.trim());
+      }
+      if (typeof anthropicPayload.content === "string") {
+        return anthropicPayload.content.trim().length > 0;
+      }
+      return false;
+    })();
+
+    const hasToolUseBlocks = Array.isArray(anthropicPayload.content) &&
+      anthropicPayload.content.some(b => b.type === "tool_use");
+
+    if (!hasToolUseBlocks && !hasTextContent) {
+      logger.warn({
+        sessionId: session?.id ?? null,
+        step: steps,
+        messageKeys: Object.keys(anthropicPayload),
+        contentType: typeof anthropicPayload.content,
+        contentLength: Array.isArray(anthropicPayload.content) ? anthropicPayload.content.length : String(anthropicPayload.content || "").length,
+      }, "Empty LLM response detected (no text, no tool calls)");
+
+      // Retry once with a nudge
+      if (steps < settings.maxSteps && !emptyResponseRetried) {
+        emptyResponseRetried = true;
+        cleanPayload.messages.push({
+          role: "assistant",
+          content: "",
+        });
+        cleanPayload.messages.push({
+          role: "user",
+          content: "Please provide a response to the user's message.",
+        });
+        logger.info({ sessionId: session?.id ?? null }, "Retrying after empty response with nudge");
+        continue;  // Go back to top of while loop
+      }
+
+      // If retry also returned empty, return a fallback message
+      logger.warn({ sessionId: session?.id ?? null, steps }, "Empty response persisted after retry");
+      return {
+        response: {
+          status: 200,
+          body: {
+            id: `msg_${Date.now()}`,
+            type: "message",
+            role: "assistant",
+            model: requestedModel,
+            content: [{ type: "text", text: "I wasn't able to generate a response. Could you try rephrasing your message?" }],
+            stop_reason: "end_turn",
+            usage: { input_tokens: 0, output_tokens: 0 },
+          },
+          terminationReason: "empty_response_fallback",
+        },
+        steps,
+        durationMs: Date.now() - start,
+        terminationReason: "empty_response_fallback",
+      };
+    }
+
     // Ensure content is an array before calling .find()
     const content = Array.isArray(anthropicPayload.content) ? anthropicPayload.content : [];
     const fallbackCandidate = content.find(
@@ -3432,88 +3494,6 @@ IMPORTANT TOOL USAGE RULES:
       "Agent loop completed successfully",
     );
 
-    // Include accumulated tool results in the response for CLI display
-    // This ensures the client sees actual tool output, not just LLM summaries
-    logger.info({
-      sessionId: session?.id ?? null,
-      accumulatedToolResultsCount: accumulatedToolResults.length,
-      hasAnthropicPayload: !!anthropicPayload,
-      currentContentType: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? 'array' : typeof anthropicPayload.content) : 'none',
-      currentContentLength: anthropicPayload?.content?.length || 0
-    }, "=== BEFORE TOOL RESULTS INCLUSION CHECK ===");
-
-    if (accumulatedToolResults.length > 0) {
-      logger.info({
-        sessionId: session?.id ?? null,
-        toolResultCount: accumulatedToolResults.length,
-        toolNames: accumulatedToolResults.map(r => r.tool_name),
-        toolUseIds: accumulatedToolResults.map(r => r.tool_use_id)
-      }, "=== ENTERING TOOL RESULTS INCLUSION BLOCK ===");
-
-      // Ensure content is an array
-      if (!Array.isArray(anthropicPayload.content)) {
-        logger.info({
-          sessionId: session?.id ?? null,
-          originalContentType: typeof anthropicPayload.content,
-          originalContentValue: anthropicPayload.content ? String(anthropicPayload.content).substring(0, 100) : 'null'
-        }, "=== CONTENT NOT ARRAY - CONVERTING ===");
-
-        anthropicPayload.content = anthropicPayload.content
-          ? [{ type: "text", text: String(anthropicPayload.content) }]
-          : [];
-
-        logger.info({
-          sessionId: session?.id ?? null,
-          convertedContentLength: anthropicPayload.content.length
-        }, "=== CONTENT CONVERTED TO ARRAY ===");
-      } else {
-        logger.info({
-          sessionId: session?.id ?? null,
-          existingContentLength: anthropicPayload.content.length,
-          existingContentTypes: anthropicPayload.content.map(b => b.type)
-        }, "=== CONTENT ALREADY ARRAY ===");
-      }
-
-      // Prepend tool results before text content so they appear in order
-      const contentBeforePrepend = anthropicPayload.content.length;      
-      anthropicPayload.content = [...accumulatedToolResults, ...anthropicPayload.content];
-
-      logger.info({
-        sessionId: session?.id ?? null,
-        toolResultCount: accumulatedToolResults.length,
-        toolNames: accumulatedToolResults.map(r => r.tool_name),
-        contentBeforePrepend,
-        contentAfterPrepend: anthropicPayload.content.length,
-        finalContentTypes: anthropicPayload.content.map(b => b.type)
-      }, "=== TOOL RESULTS PREPENDED TO RESPONSE ===");
-
-      for (const block of anthropicPayload.content) {
-        if (block.type === "tool_result") {
-          logger.info({
-            toolName: block.tool_name,
-            content: typeof block.content === 'string'
-              ? block.content.substring(0, 500)
-              : JSON.stringify(block.content).substring(0, 500)
-          }, "=== TOOL RESULT CONTENT SENT TO CLI ===");          
-        } else if (block.type === "text") {
-          logger.info({
-            text: block.text.substring(0, 500)
-          }, "=== TEXT CONTENT SENT TO CLI ===");
-        }
-      }
-
-    } else {
-      logger.info({
-        sessionId: session?.id ?? null
-      }, "=== NO TOOL RESULTS TO INCLUDE (accumulatedToolResults empty) ===");
-    }
-
-    logger.info({
-      sessionId: session?.id ?? null,
-      finalContentLength: anthropicPayload?.content?.length || 0,
-      finalContentTypes: anthropicPayload?.content?.map(b => b.type) || []
-    }, "=== AFTER TOOL RESULTS INCLUSION CHECK ===");
-
     // DIAGNOSTIC: Log response being returned
     logger.info({
       sessionId: session?.id ?? null,
diff --git a/src/providers/context-window.js b/src/providers/context-window.js
new file mode 100644
index 0000000..dcea89d
--- /dev/null
+++ b/src/providers/context-window.js
@@ -0,0 +1,144 @@
+/**
+ * Context Window Detection
+ *
+ * Queries the active provider for its context window size (in tokens).
+ * Returns -1 if unknown. Caches the result for the lifetime of the process.
+ */
+
+const config = require("../config");
+const logger = require("../logger");
+
+// Known context sizes for proprietary models (tokens)
+const KNOWN_CONTEXT_SIZES = {
+  // Anthropic
+  "claude-3-opus": 200000,
+  "claude-3-sonnet": 200000,
+  "claude-3-haiku": 200000,
+  "claude-3.5-sonnet": 200000,
+  "claude-4": 200000,
+  // OpenAI
+  "gpt-4o": 128000,
+  "gpt-4o-mini": 128000,
+  "gpt-4-turbo": 128000,
+  "gpt-4": 8192,
+  "gpt-3.5-turbo": 16385,
+};
+
+// null = not yet detected, -1 = detected but unknown, >0 = known
+let cachedContextWindow = null;
+
+async function detectContextWindow() {
+  const provider = config.modelProvider.type;
+
+  try {
+    if (provider === "ollama") {
+      return await detectOllamaContextWindow();
+    }
+    if (provider === "openrouter") {
+      return await detectOpenRouterContextWindow();
+    }
+    if (provider === "openai") {
+      return detectFromKnownSizes(config.openai.model);
+    }
+    // azure-anthropic, bedrock — use known Anthropic sizes
+    if (["azure-anthropic", "bedrock"].includes(provider)) {
+      return 200000;
+    }
+    if (provider === "azure-openai") {
+      return detectFromKnownSizes(config.azureOpenAI.deployment);
+    }
+    if (provider === "llamacpp" || provider === "lmstudio") {
+      return -1; // No standard API to query
+    }
+    if (provider === "zai") {
+      return 128000; // GLM-4 family
+    }
+    if (provider === "vertex") {
+      return 1000000; // Gemini models
+    }
+  } catch (err) {
+    logger.warn({ err, provider }, "Failed to detect context window");
+  }
+
+  return -1;
+}
+
+async function detectOllamaContextWindow() {
+  const endpoint = `${config.ollama.endpoint}/api/show`;
+  const response = await fetch(endpoint, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({ name: config.ollama.model }),
+    signal: AbortSignal.timeout(5000),
+  });
+  if (!response.ok) return -1;
+  const data = await response.json();
+
+  // Ollama prefixes context_length with the architecture name
+  // (e.g. "llama.context_length", "qwen2.context_length", "gemma.context_length")
+  // Search for any key ending in ".context_length" or exactly "context_length"
+  if (data.model_info && typeof data.model_info === "object") {
+    for (const [key, value] of Object.entries(data.model_info)) {
+      if (key === "context_length" || key.endsWith(".context_length")) {
+        if (typeof value === "number" && value > 0) return value;
+      }
+    }
+  }
+
+  // Fallback: parse from parameters string (e.g. "num_ctx 32768")
+  const match = data.parameters?.match(/num_ctx\s+(\d+)/);
+  if (match) return parseInt(match[1], 10);
+  return -1;
+}
+
+async function detectOpenRouterContextWindow() {
+  const baseEndpoint = config.openrouter.endpoint || "https://openrouter.ai/api/v1/chat/completions";
+  // Derive the models endpoint from the chat endpoint
+  const modelsEndpoint = baseEndpoint.replace(/\/v1\/chat\/completions$/, "/v1/models");
+  const response = await fetch(modelsEndpoint, {
+    headers: { Authorization: `Bearer ${config.openrouter.apiKey}` },
+    signal: AbortSignal.timeout(5000),
+  });
+  if (!response.ok) return -1;
+  const data = await response.json();
+  const model = data.data?.find((m) => m.id === config.openrouter.model);
+  return model?.context_length ?? -1;
+}
+
+function detectFromKnownSizes(modelName) {
+  if (!modelName) return -1;
+  const lower = modelName.toLowerCase();
+  for (const [key, size] of Object.entries(KNOWN_CONTEXT_SIZES)) {
+    if (lower.includes(key)) return size;
+  }
+  return -1;
+}
+
+async function getContextWindow() {
+  if (cachedContextWindow !== null) return cachedContextWindow;
+  cachedContextWindow = await detectContextWindow();
+  if (cachedContextWindow === -1) {
+    logger.warn(
+      { provider: config.modelProvider.type },
+      "Could not detect context window size — falling back to 8K tokens. " +
+      "Compression may be more aggressive than necessary.",
+    );
+  } else {
+    logger.info(
+      { contextWindow: cachedContextWindow, provider: config.modelProvider.type },
+      "Context window detected",
+    );
+  }
+  return cachedContextWindow;
+}
+
+function resetCache() {
+  cachedContextWindow = null;
+}
+
+module.exports = {
+  getContextWindow,
+  detectContextWindow,
+  resetCache,
+  KNOWN_CONTEXT_SIZES,
+};
diff --git a/src/tools/index.js b/src/tools/index.js
index 3d76777..95f4807 100644
--- a/src/tools/index.js
+++ b/src/tools/index.js
@@ -343,7 +343,31 @@ async function executeToolCall(call, context = {}) {
       },
       context,
     );
-    const formatted = normalizeHandlerResult(result);
+    let formatted = normalizeHandlerResult(result);
+
+    // Auto-approve external file reads: the user already asked to read the file,
+    // so re-execute transparently with user_approved=true instead of relying
+    // on the LLM to manage a multi-step approval conversation.
+    if (
+      formatted.content &&
+      typeof formatted.content === "string" &&
+      formatted.content.startsWith("[APPROVAL REQUIRED]")
+    ) {
+      logger.info(
+        { tool: normalisedCall.name, id: normalisedCall.id },
+        "Auto-approving external file read (user initiated the request)",
+      );
+      const approvedResult = await registered.handler(
+        {
+          id: normalisedCall.id,
+          name: normalisedCall.name,
+          args: { ...normalisedCall.arguments, user_approved: true },
+          raw: normalisedCall.raw,
+        },
+        context,
+      );
+      formatted = normalizeHandlerResult(approvedResult);
+    }
 
     // Apply tool output truncation for token efficiency
     const truncatedContent = truncateToolOutput(normalisedCall.name, formatted.content);
diff --git a/src/tools/indexer.js b/src/tools/indexer.js
index 2db3504..bf13ca8 100644
--- a/src/tools/indexer.js
+++ b/src/tools/indexer.js
@@ -55,10 +55,62 @@ function registerWorkspaceListTool() {
   );
 }
 
+/**
+ * Search recent conversation context for content matching a query.
+ *
+ * Scans the last 10 messages for tool_result content that matches
+ * the query words. Returns matches sorted by relevance.
+ *
+ * @param {string} query - Search query
+ * @param {Array} messages - Recent conversation messages
+ * @returns {Array} Matching context snippets
+ */
+function searchRecentContext(query, messages) {
+  if (!query || !messages || !Array.isArray(messages)) return [];
+
+  const queryLower = query.toLowerCase();
+  const queryWords = queryLower.split(/\s+/).filter((w) => w.length > 2);
+  if (queryWords.length === 0) return [];
+
+  const matches = [];
+
+  // Scan last 10 messages for tool_result content
+  const recent = messages.slice(-10);
+  for (const msg of recent) {
+    if (msg.role !== "tool" && msg.role !== "user") continue;
+
+    const content =
+      typeof msg.content === "string"
+        ? msg.content
+        : Array.isArray(msg.content)
+          ? msg.content
+              .filter((b) => b.type === "tool_result" || b.type === "text")
+              .map((b) => b.content ?? b.text ?? "")
+              .join("\n")
+          : "";
+
+    if (!content || content.length < 20) continue;
+
+    // Check if any query words appear in the content
+    const contentLower = content.toLowerCase();
+    const matchCount = queryWords.filter((w) => contentLower.includes(w)).length;
+
+    if (matchCount > 0 && matchCount / queryWords.length >= 0.3) {
+      matches.push({
+        source: "conversation_context",
+        relevance: matchCount / queryWords.length,
+        preview: content.substring(0, 500),
+      });
+    }
+  }
+
+  return matches.sort((a, b) => b.relevance - a.relevance).slice(0, 3);
+}
+
 function registerWorkspaceSearchTool() {
   registerTool(
     "workspace_search",
-    async ({ args = {} }) => {
+    async ({ args = {} }, context = {}) => {
       const query = args.query ?? args.term ?? args.pattern;
       const regex = args.regex === true || args.is_regex === true;
       const limit = Number.isInteger(args.limit) ? args.limit : undefined;
@@ -69,6 +121,9 @@ function registerWorkspaceSearchTool() {
           ? args.ignore
           : undefined;
 
+      // Check recent conversation context for matching content
+      const contextMatches = searchRecentContext(query, context.requestMessages);
+
       const result = await searchWorkspace({
         query,
         regex,
@@ -76,12 +131,21 @@ function registerWorkspaceSearchTool() {
         ignore,
       });
 
+      // Prepend context matches if found
+      if (contextMatches.length > 0) {
+        result.context_matches = contextMatches;
+        result.note =
+          "Results from recently read files are listed in context_matches. " +
+          "Prefer these over workspace matches when answering about previously read content.";
+      }
+
       return {
         ok: true,
         status: 200,
         content: JSON.stringify(result, null, 2),
         metadata: {
           total: result.matches.length,
+          contextTotal: contextMatches.length,
           engine: result.engine,
         },
       };
diff --git a/src/tools/workspace.js b/src/tools/workspace.js
index 9971ba3..37933ae 100644
--- a/src/tools/workspace.js
+++ b/src/tools/workspace.js
@@ -43,13 +43,9 @@ function registerWorkspaceTools() {
           const expanded = expandTilde(targetPath);
           const resolved = path.resolve(expanded);
           return {
-            ok: false,
-            status: 403,
-            content: JSON.stringify({
-              error: "external_path_requires_approval",
-              message: `The file "${targetPath}" resolves to "${resolved}" which is outside the workspace. You MUST ask the user for permission before reading this file. If the user approves, call this tool again with the same path and set user_approved to true.`,
-              resolved_path: resolved,
-            }),
+            ok: true,
+            status: 200,
+            content: `[APPROVAL REQUIRED] The file "${resolved}" is outside the workspace and cannot be read without user permission.\n\nYou must now ask the user: "The file ${resolved} is outside the workspace. May I read it?"\n\nIf the user says yes, call the Read tool again with file_path="${targetPath}" and user_approved=true.`,
           };
         }
         // User approved — read external file

From 34847ebb094757b2c2fbca1b914f9b0af2bc9e32 Mon Sep 17 00:00:00 2001
From: bjoern <developer@call-home.ch>
Date: Tue, 10 Feb 2026 16:52:56 +0100
Subject: [PATCH 4/8] Fix stripThinkingBlocks() destroying markdown bullet
 points in Ollama responses

The heuristic-based stripThinkingBlocks() matched standard markdown bullets
(- item, * item) as "thinking block markers" and dropped all subsequent content.
Replace with stripThinkTags() that only strips <think>...</think> tags used by
models like DeepSeek and Qwen for chain-of-thought reasoning.
---
 src/config/index.js       |  16 ++++-
 src/orchestrator/index.js | 145 ++++++++++++++++++++++++++------------
 2 files changed, 113 insertions(+), 48 deletions(-)

diff --git a/src/config/index.js b/src/config/index.js
index 8e31472..6ecbb48 100644
--- a/src/config/index.js
+++ b/src/config/index.js
@@ -1,7 +1,9 @@
 const path = require("path");
 const dotenv = require("dotenv");
 
-dotenv.config();
+// .env must be authoritative over shell env vars (e.g. stale exports in .bashrc).
+// Skip override in test mode so tests can set process.env before requiring config.
+dotenv.config({ override: process.env.NODE_ENV !== "test" });
 
 function trimTrailingSlash(value) {
   if (typeof value !== "string") return value;
@@ -140,6 +142,10 @@ const vertexModel = process.env.VERTEX_MODEL?.trim() || "gemini-2.0-flash";
 // Values: "default" (use MODEL_DEFAULT), "none" (skip LLM call), or a model name
 const suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim();
 
+// Topic detection model override
+// Values: "default" (use main model) or a model name to redirect topic detection to a lighter model
+const topicDetectionModel = (process.env.TOPIC_DETECTION_MODEL ?? "default").trim();
+
 // Hot reload configuration
 const hotReloadEnabled = process.env.HOT_RELOAD_ENABLED !== "false"; // default true
 const hotReloadDebounceMs = Number.parseInt(process.env.HOT_RELOAD_DEBOUNCE_MS ?? "1000", 10);
@@ -177,6 +183,12 @@ if (!["server", "client", "passthrough"].includes(toolExecutionMode)) {
   );
 }
 console.log(`[CONFIG] Tool execution mode: ${toolExecutionMode}`);
+if (suggestionModeModel.toLowerCase() !== "default") {
+  console.log(`[CONFIG] Suggestion mode model: ${suggestionModeModel}`);
+}
+if (topicDetectionModel.toLowerCase() !== "default") {
+  console.log(`[CONFIG] Topic detection model: ${topicDetectionModel}`);
+}
 
 // Memory system configuration (Titans-inspired long-term memory)
 const memoryEnabled = process.env.MEMORY_ENABLED !== "false"; // default true
@@ -604,6 +616,7 @@ var config = {
     type: modelProvider,
     defaultModel,
     suggestionModeModel,
+    topicDetectionModel,
     // Hybrid routing settings
     preferOllama,
     fallbackEnabled,
@@ -894,6 +907,7 @@ function reloadConfig() {
   config.modelProvider.fallbackEnabled = process.env.FALLBACK_ENABLED !== "false";
   config.modelProvider.fallbackProvider = (process.env.FALLBACK_PROVIDER ?? "databricks").toLowerCase();
   config.modelProvider.suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim();
+  config.modelProvider.topicDetectionModel = (process.env.TOPIC_DETECTION_MODEL ?? "default").trim();
 
   // Log level
   config.logger.level = process.env.LOG_LEVEL ?? "info";
diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js
index 8e8b1f5..4b9a448 100644
--- a/src/orchestrator/index.js
+++ b/src/orchestrator/index.js
@@ -670,53 +670,11 @@ function normaliseToolChoice(choice) {
 }
 
 /**
- * Strip thinking-style reasoning from Ollama model outputs
- * Patterns to remove:
- * - Lines starting with bullet points (●, •, -, *)
- * - Explanatory reasoning before the actual response
- * - Multiple newlines used to separate thinking from response
+ * Strip <think>...</think> tags that some models (DeepSeek, Qwen) emit for chain-of-thought reasoning.
  */
-function stripThinkingBlocks(text) {
+function stripThinkTags(text) {
   if (typeof text !== "string") return text;
-
-  // Split into lines
-  const lines = text.split("\n");
-  const cleanedLines = [];
-  let inThinkingBlock = false;
-  let consecutiveEmptyLines = 0;
-
-  for (const line of lines) {
-    const trimmed = line.trim();
-
-    // Detect thinking block markers (bullet points followed by reasoning)
-    if (/^[●•\-\*]\s/.test(trimmed)) {
-      inThinkingBlock = true;
-      continue;
-    }
-
-    // Empty lines might separate thinking from response
-    if (trimmed === "") {
-      consecutiveEmptyLines++;
-      // If we've seen 2+ empty lines, likely end of thinking block
-      if (consecutiveEmptyLines >= 2) {
-        inThinkingBlock = false;
-      }
-      continue;
-    }
-
-    // Reset empty line counter
-    consecutiveEmptyLines = 0;
-
-    // Skip lines that are part of thinking block
-    if (inThinkingBlock) {
-      continue;
-    }
-
-    // Keep this line
-    cleanedLines.push(line);
-  }
-
-  return cleanedLines.join("\n").trim();
+  return text.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
 }
 
 function ollamaToAnthropicResponse(ollamaResponse, requestedModel) {
@@ -733,7 +691,7 @@ function ollamaToAnthropicResponse(ollamaResponse, requestedModel) {
 
   // Add text content if present, after stripping thinking blocks
   if (typeof rawContent === "string" && rawContent.trim()) {
-    const cleanedContent = stripThinkingBlocks(rawContent);
+    const cleanedContent = stripThinkTags(rawContent);
     if (cleanedContent) {
       contentItems.push({ type: "text", text: cleanedContent });
     }
@@ -1262,6 +1220,19 @@ function sanitizePayload(payload) {
     clean._suggestionModeModel = smConfig;
   }
 
+  // === Topic detection: tag request and override model if configured ===
+  if (clean._requestMode === "main") {
+    const { isTopicDetection: isTopic } = detectTopicDetection(clean);
+    if (isTopic) {
+      clean._requestMode = "topic";
+      const tdConfig = config.modelProvider?.topicDetectionModel ?? "default";
+      if (tdConfig.toLowerCase() !== "default") {
+        clean.model = tdConfig;
+        clean._topicDetectionModel = tdConfig;
+      }
+    }
+  }
+
   return clean;
 }
 
@@ -1371,7 +1342,7 @@ async function runAgentLoop({
   let toolCallsExecuted = 0;
   let fallbackPerformed = false;
   const toolCallNames = new Map();
-  const toolCallHistory = new Map(); // Track tool calls to detect loops: signature -> count
+  const toolCallHistory = new Map(); // Track tool calls to detect loops: signature -> counta
   let loopWarningInjected = false; // Track if we've already warned about loops
   let emptyResponseRetried = false; // Track if we've retried after an empty LLM response
 
@@ -3599,6 +3570,86 @@ function detectSuggestionMode(messages) {
   return { isSuggestionMode: false };
 }
 
+/**
+ * Detect if the current request is a topic detection/classification call.
+ * These requests typically have a system prompt asking to classify conversation
+ * topics, with no tools and very short messages. They waste GPU time on large
+ * models (30-90s just to classify a topic).
+ *
+ * Detection heuristics:
+ *  1. System prompt contains topic classification instructions
+ *  2. No tools in the payload (topic detection never needs tools)
+ *  3. Short message count (typically 1-3 messages)
+ *
+ * @param {Object} payload - The request payload
+ * @returns {{ isTopicDetection: boolean }}
+ */
+function detectTopicDetection(payload) {
+  if (!payload) return { isTopicDetection: false };
+
+  // Topic detection requests have no tools
+  if (Array.isArray(payload.tools) && payload.tools.length > 0) {
+    return { isTopicDetection: false };
+  }
+
+  // Check system prompt for topic classification patterns
+  const systemText = typeof payload.system === 'string'
+    ? payload.system
+    : Array.isArray(payload.system)
+      ? payload.system.map(b => b.text || '').join(' ')
+      : '';
+
+  // Also check first message if system prompt is embedded there
+  let firstMsgText = '';
+  if (Array.isArray(payload.messages) && payload.messages.length > 0) {
+    const first = payload.messages[0];
+    if (first?.role === 'user' || first?.role === 'system') {
+      firstMsgText = typeof first.content === 'string'
+        ? first.content
+        : Array.isArray(first.content)
+          ? first.content.map(b => b.text || '').join(' ')
+          : '';
+    }
+  }
+
+  const combined = systemText + ' ' + firstMsgText;
+  const lc = combined.toLowerCase();
+
+  // Match patterns that Claude Code uses for topic detection
+  const topicPatterns = [
+    'new conversation topic',
+    'topic change',
+    'classify the topic',
+    'classify this message',
+    'conversation topic',
+    'topic classification',
+    'determines the topic',
+    'determine the topic',
+    'categorize the topic',
+    'what topic',
+    'identify the topic',
+  ];
+
+  const hasTopicPattern = topicPatterns.some(p => lc.includes(p));
+
+  if (hasTopicPattern) {
+    return { isTopicDetection: true };
+  }
+
+  // Additional heuristic: very short payload with no tools and system prompt
+  // mentioning "topic" or "classify"
+  if (
+    !payload.tools &&
+    Array.isArray(payload.messages) &&
+    payload.messages.length <= 3 &&
+    (lc.includes('topic') || lc.includes('classify'))
+  ) {
+    return { isTopicDetection: true };
+  }
+
+  return { isTopicDetection: false };
+}
+
 async function processMessage({ payload, headers, session, cwd, options = {} }) {
   const requestedModel =
     payload?.model ??

From 0ed12bd21f3345f270ac183534f79053a6ec8611 Mon Sep 17 00:00:00 2001
From: bjoern <developer@call-home.ch>
Date: Wed, 11 Feb 2026 12:45:46 +0100
Subject: [PATCH 5/8] fix: set NODE_ENV=test in all test scripts

The dotenv override (override: NODE_ENV !== "test") requires
NODE_ENV=test to prevent .env values from stomping on test-set
environment variables.
---
 package.json | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/package.json b/package.json
index 73e6797..43f1479 100644
--- a/package.json
+++ b/package.json
@@ -14,12 +14,12 @@
     "dev": "nodemon index.js",
     "lint": "eslint src index.js",
     "test": "npm run test:unit && npm run test:performance",
-    "test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/llamacpp-integration.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
-    "test:memory": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
-    "test:new-features": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js",
-    "test:performance": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/hybrid-routing-performance.test.js && DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-tests.js",
-    "test:benchmark": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-benchmark.js",
-    "test:quick": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js",
+    "test:unit": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/llamacpp-integration.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
+    "test:memory": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
+    "test:new-features": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js",
+    "test:performance": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/hybrid-routing-performance.test.js && NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-tests.js",
+    "test:benchmark": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-benchmark.js",
+    "test:quick": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js",
     "test:all": "npm run test:unit && npm run test:performance && npm run test:benchmark"
   },
   "keywords": [

From 54b93ac9f2457a707334d2b824ac249efb00b9ca Mon Sep 17 00:00:00 2001
From: bjoern <developer@call-home.ch>
Date: Sun, 15 Feb 2026 22:06:03 +0100
Subject: [PATCH 6/8] Fix duplicated OpenAI response parsing block in provider
 branching

The Ollama else-if branch was incorrectly nested inside the OpenAI
deduplication if-block, causing the OpenAI parsing + dedup code to
appear twice. Restructured so Ollama is a peer branch alongside
Anthropic and OpenAI/Databricks, each with its own dedup logic.

Addresses PR #39 review feedback from veerareddyvishal144.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/orchestrator/index.js | 106 ++++++++++++++++++--------------------
 1 file changed, 50 insertions(+), 56 deletions(-)

diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js
index 4b9a448..3141c4e 100644
--- a/src/orchestrator/index.js
+++ b/src/orchestrator/index.js
@@ -1922,6 +1922,56 @@ IMPORTANT TOOL USAGE RULES:
         },
         "Azure Anthropic response parsed",
       );
+    } else if (providerType === "ollama") {
+      // Ollama format: { message: { role, content, tool_calls }, done }
+      message = databricksResponse.json?.message ?? {};
+      toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
+
+      logger.info({
+        hasMessage: !!databricksResponse.json?.message,
+        hasToolCalls: toolCalls.length > 0,
+        toolCallCount: toolCalls.length,
+        toolNames: toolCalls.map(tc => tc.function?.name),
+        done: databricksResponse.json?.done,
+        fullToolCalls: JSON.stringify(toolCalls),
+        fullResponseMessage: JSON.stringify(databricksResponse.json?.message)
+      }, "=== OLLAMA TOOL CALLS EXTRACTION ===");
+
+      // Deduplicate tool calls for Ollama format
+      if (toolCalls.length > 0) {
+        const uniqueToolCalls = [];
+        const seenSignatures = new Set();
+        let duplicatesRemoved = 0;
+
+        for (const call of toolCalls) {
+          const signature = getToolCallSignature(call);
+          if (!seenSignatures.has(signature)) {
+            seenSignatures.add(signature);
+            uniqueToolCalls.push(call);
+          } else {
+            duplicatesRemoved++;
+            logger.warn({
+              sessionId: session?.id ?? null,
+              toolName: call.function?.name || call.name,
+              toolId: call.id,
+              signature: signature.substring(0, 32),
+            }, "Duplicate tool call removed (same tool with identical parameters in single response)");
+          }
+        }
+
+        toolCalls = uniqueToolCalls;
+
+        logger.info(
+          {
+            sessionId: session?.id ?? null,
+            step: steps,
+            toolCallsFound: toolCalls.length,
+            duplicatesRemoved,
+            toolNames: toolCalls.map(tc => tc.function?.name || tc.name),
+          },
+          "LLM Response: Tool calls requested (after deduplication)",
+        );
+      }
     } else {
       // OpenAI/Databricks format: { choices: [{ message: { tool_calls: [...] } }] }
       const choice = databricksResponse.json?.choices?.[0];
@@ -1962,62 +2012,6 @@ IMPORTANT TOOL USAGE RULES:
           },
           "LLM Response: Tool calls requested (after deduplication)",
         );
-      } else if (providerType === "ollama") {
-        // Ollama format: { message: { role, content, tool_calls }, done }
-        message = databricksResponse.json?.message ?? {};
-        toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
-
-        logger.info({
-          hasMessage: !!databricksResponse.json?.message,
-          hasToolCalls: toolCalls.length > 0,
-          toolCallCount: toolCalls.length,
-          toolNames: toolCalls.map(tc => tc.function?.name),
-          done: databricksResponse.json?.done,
-          fullToolCalls: JSON.stringify(toolCalls),
-          fullResponseMessage: JSON.stringify(databricksResponse.json?.message)
-        }, "=== OLLAMA TOOL CALLS EXTRACTION ===");
-      } else {
-        // OpenAI/Databricks format: { choices: [{ message: { tool_calls: [...] } }] }
-        const choice = databricksResponse.json?.choices?.[0];
-        message = choice?.message ?? {};
-        toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
-
-        // Deduplicate tool calls for OpenAI format too
-        if (toolCalls.length > 0) {
-          const uniqueToolCalls = [];
-          const seenSignatures = new Set();
-          let duplicatesRemoved = 0;
-
-          for (const call of toolCalls) {
-            const signature = getToolCallSignature(call);
-
-            if (!seenSignatures.has(signature)) {
-              seenSignatures.add(signature);
-              uniqueToolCalls.push(call);
-            } else {
-              duplicatesRemoved++;
-              logger.warn({
-                sessionId: session?.id ?? null,
-                toolName: call.function?.name || call.name,
-                toolId: call.id,
-                signature: signature.substring(0, 32),
-              }, "Duplicate tool call removed (same tool with identical parameters in single response)");
-            }
-          }
-
-          toolCalls = uniqueToolCalls;
-
-          logger.info(
-            {
-              sessionId: session?.id ?? null,
-              step: steps,
-              toolCallsFound: toolCalls.length,
-              duplicatesRemoved,
-              toolNames: toolCalls.map(tc => tc.function?.name || tc.name),
-            },
-            "LLM Response: Tool calls requested (after deduplication)",
-          );
-        }
       }
     }
 

From 0dac27cb98fc277c2c9830311b6d8a193169222e Mon Sep 17 00:00:00 2001
From: bjoern <developer@call-home.ch>
Date: Fri, 13 Feb 2026 09:50:01 +0100
Subject: [PATCH 7/8] Add missing env vars to .env.example

Add TOPIC_DETECTION_MODEL, MODEL_DEFAULT, Ollama tuning
(OLLAMA_TIMEOUT_MS, OLLAMA_KEEP_ALIVE), and LLM Audit logging
section to .env.example for discoverability.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .env.example | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/.env.example b/.env.example
index 8f9c917..ac19e2f 100644
--- a/.env.example
+++ b/.env.example
@@ -10,6 +10,11 @@
 # Default: databricks
 MODEL_PROVIDER=ollama
 
+# Default model to use (overrides provider-specific defaults)
+# Without this, defaults to a Databricks Claude model regardless of MODEL_PROVIDER.
+# Set this when using Ollama or other local providers to match your installed model.
+# MODEL_DEFAULT=qwen2.5-coder:latest
+
 # ==============================================================================
 # Ollama Configuration (Hybrid Routing)
 # ==============================================================================
@@ -24,6 +29,13 @@ OLLAMA_MODEL=qwen2.5-coder:latest
 # Ollama endpoint (default: http://localhost:11434)
 OLLAMA_ENDPOINT=http://localhost:11434
 
+# Ollama request timeout in milliseconds (default: 120000)
+# OLLAMA_TIMEOUT_MS=120000
+
+# Ollama keep_alive parameter - how long to keep model loaded in memory
+# Values: -1 (forever), 0 (unload immediately), or duration string (e.g. "5m", "1h")
+# OLLAMA_KEEP_ALIVE=-1
+
 # Ollama embeddings configuration (for Cursor @Codebase semantic search)
 # Embedding models for local, privacy-first semantic search
 # Popular models:
@@ -225,6 +237,13 @@ TOOL_EXECUTION_MODE=server
 #   <model> - Use a specific model (e.g. "llama3.1" for a lighter model)
 SUGGESTION_MODE_MODEL=default
 
+# Topic detection model override
+# Redirects topic classification to a lighter/faster model to reduce GPU load.
+# Values:
+#   default - Use the main model (no change)
+#   <model> - Use a specific model (e.g. "llama3.2:1b" for a lightweight classifier)
+TOPIC_DETECTION_MODEL=default
+
 # Enable/disable automatic tool injection for local models
 INJECT_TOOLS_LLAMACPP=true
 INJECT_TOOLS_OLLAMA=true
@@ -328,6 +347,37 @@ HOT_RELOAD_ENABLED=true
 # Debounce delay in ms (prevents rapid reloads)
 HOT_RELOAD_DEBOUNCE_MS=1000
 
+# ==============================================================================
+# LLM Audit Logging
+# ==============================================================================
+
+# Enable LLM audit logging (default: false)
+# Logs all LLM requests/responses for debugging and analysis
+LLM_AUDIT_ENABLED=false
+
+# Audit log file path
+# LLM_AUDIT_LOG_FILE=./logs/llm-audit.log
+
+# Maximum content length per field (characters) - controls log file size
+# LLM_AUDIT_MAX_CONTENT_LENGTH=5000
+# LLM_AUDIT_MAX_SYSTEM_LENGTH=2000
+# LLM_AUDIT_MAX_USER_LENGTH=3000
+# LLM_AUDIT_MAX_RESPONSE_LENGTH=3000
+
+# Log rotation settings
+# LLM_AUDIT_MAX_FILES=30
+# LLM_AUDIT_MAX_SIZE=100M
+
+# Include annotation metadata in audit logs (default: true)
+# LLM_AUDIT_ANNOTATIONS=true
+
+# Deduplication - reduces log size by referencing repeated content
+# LLM_AUDIT_DEDUP_ENABLED=true
+# LLM_AUDIT_DEDUP_MIN_SIZE=500
+# LLM_AUDIT_DEDUP_CACHE_SIZE=100
+# LLM_AUDIT_DEDUP_SANITIZE=true
+# LLM_AUDIT_DEDUP_SESSION_CACHE=true
+
 # ==============================================================================
 # Quick Start Examples
 # ==============================================================================

From ef7fe7c45613428329a985f10d3419029c2c4a63 Mon Sep 17 00:00:00 2001
From: vishal veerareddy <vishalveera.reddy@servicenow.com>
Date: Wed, 18 Feb 2026 13:00:40 +0530
Subject: [PATCH 8/8] added dbpath

---
 src/budget/index.js | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/budget/index.js b/src/budget/index.js
index 2d05b79..c52f80c 100644
--- a/src/budget/index.js
+++ b/src/budget/index.js
@@ -26,13 +26,14 @@ class BudgetManager {
 
       this.db = new Database(dbPath);
       this.initDatabase();
+      logger.info({ dbPath }, 'Budget manager initialized');
     } catch (err) {
       logger.warn({ err: err.message }, "BudgetManager: better-sqlite3 not available");
       this.enabled = false;
       return;
     }
 
-    logger.info({ dbPath }, 'Budget manager initialized');
+   
   }
 
   initDatabase() {