diff --git a/src/main/java/org/codelibs/fess/chat/ChatClient.java b/src/main/java/org/codelibs/fess/chat/ChatClient.java index 426d3c8fa..f8acd1f34 100644 --- a/src/main/java/org/codelibs/fess/chat/ChatClient.java +++ b/src/main/java/org/codelibs/fess/chat/ChatClient.java @@ -443,6 +443,8 @@ public ChatResult streamChatEnhanced(final String sessionId, final String userMe searchRequestedTime = querySearchResult.getRequestedTime(); callback.onPhaseComplete(ChatPhaseCallback.PHASE_SEARCH); + logger.info("[RAG] Search completed. query={}, resultCount={}, elapsedTime={}ms", query, searchResults.size(), + System.currentTimeMillis() - phaseStartTime); if (logger.isDebugEnabled()) { logger.debug("[RAG] Phase {} completed. query={}, resultCount={}, phaseElapsedTime={}ms", ChatPhaseCallback.PHASE_SEARCH, query, searchResults.size(), System.currentTimeMillis() - phaseStartTime); @@ -542,8 +544,9 @@ public ChatResult streamChatEnhanced(final String sessionId, final String userMe session.addMessage(assistantMessage); - logger.info("[RAG] Enhanced chat completed. sessionId={}, intent={}, sourcesCount={}, responseLength={}, elapsedTime={}ms", - session.getSessionId(), intentResult.getIntent(), sources.size(), fullResponse.length(), + logger.info( + "[RAG] Enhanced chat completed. sessionId={}, userId={}, intent={}, sourcesCount={}, responseLength={}, elapsedTime={}ms", + session.getSessionId(), userId, intentResult.getIntent(), sources.size(), fullResponse.length(), System.currentTimeMillis() - startTime); return new ChatResult(session.getSessionId(), assistantMessage, sources); diff --git a/src/main/java/org/codelibs/fess/llm/AbstractLlmClient.java b/src/main/java/org/codelibs/fess/llm/AbstractLlmClient.java index 804f7af7a..3aec994c3 100644 --- a/src/main/java/org/codelibs/fess/llm/AbstractLlmClient.java +++ b/src/main/java/org/codelibs/fess/llm/AbstractLlmClient.java @@ -401,6 +401,10 @@ protected LlmChatResponse chatWithConcurrencyControl(final LlmChatRequest reques if (concurrencyLimiter == null) { return chat(request); } + if (logger.isDebugEnabled()) { + logger.debug("[LLM] Acquiring concurrency permit. name={}, availablePermits={}, maxConcurrent={}", getName(), + concurrencyLimiter.availablePermits(), getMaxConcurrentRequests()); + } try { if (!concurrencyLimiter.tryAcquire(getConcurrencyWaitTimeoutMs(), TimeUnit.MILLISECONDS)) { logger.warn("[LLM] Concurrency limit exceeded. name={}, maxConcurrent={}, waitTimeout={}ms", getName(), @@ -431,6 +435,10 @@ protected void streamChatWithConcurrencyControl(final LlmChatRequest request, fi streamChat(request, callback); return; } + if (logger.isDebugEnabled()) { + logger.debug("[LLM] Acquiring concurrency permit. name={}, availablePermits={}, maxConcurrent={}", getName(), + concurrencyLimiter.availablePermits(), getMaxConcurrentRequests()); + } try { if (!concurrencyLimiter.tryAcquire(getConcurrencyWaitTimeoutMs(), TimeUnit.MILLISECONDS)) { logger.warn("[LLM] Concurrency limit exceeded. name={}, maxConcurrent={}, waitTimeout={}ms", getName(), @@ -550,6 +558,10 @@ public IntentDetectionResult detectIntent(final String userMessage) { applyPromptTypeParams(request, "intent"); final LlmChatResponse response = chatWithConcurrencyControl(request); + if (logger.isDebugEnabled()) { + logger.debug("[RAG:INTENT] LLM response. promptTokens={}, completionTokens={}, totalTokens={}, finishReason={}", + response.getPromptTokens(), response.getCompletionTokens(), response.getTotalTokens(), response.getFinishReason()); + } if (isEmptyContentWithLengthFinish(response)) { logger.warn( "[RAG:INTENT] Empty content with finish_reason=length detected (possible reasoning model token exhaustion). Falling back to search. userMessage={}", @@ -593,6 +605,10 @@ public IntentDetectionResult detectIntent(final String userMessage, final List