From 9c943a36c97c3f12c206ff783c961ae3b709082e Mon Sep 17 00:00:00 2001
From: Alexander Mamrenko <amamrenko@clubready.com>
Date: Wed, 1 Apr 2026 11:03:21 +0200
Subject: [PATCH] Fix Bedrock non-streaming thinking_tokens always nil

The Bedrock Converse API returns thinking token counts nested
under usage.outputTokensDetails.reasoningTokens, not at the
top-level usage.reasoningTokens. The streaming parser already
handles this fallback correctly, but parse_completion_response
only checked the top-level key.

Add fallback to nested path and tests covering both paths.
---
 lib/ruby_llm/providers/bedrock/chat.rb       |  2 +-
 spec/ruby_llm/providers/bedrock/chat_spec.rb | 51 ++++++++++++++++++++
 2 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb
index c39fa2942..a0c478dd6 100644
--- a/lib/ruby_llm/providers/bedrock/chat.rb
+++ b/lib/ruby_llm/providers/bedrock/chat.rb
@@ -60,7 +60,7 @@ def parse_completion_response(response)
             output_tokens: usage['outputTokens'],
             cached_tokens: usage['cacheReadInputTokens'],
             cache_creation_tokens: usage['cacheWriteInputTokens'],
-            thinking_tokens: usage['reasoningTokens'],
+            thinking_tokens: usage['reasoningTokens'] || usage.dig('outputTokensDetails', 'reasoningTokens'),
             model_id: data['modelId'],
             raw: response
           )
diff --git a/spec/ruby_llm/providers/bedrock/chat_spec.rb b/spec/ruby_llm/providers/bedrock/chat_spec.rb
index 10ee06219..6c34705af 100644
--- a/spec/ruby_llm/providers/bedrock/chat_spec.rb
+++ b/spec/ruby_llm/providers/bedrock/chat_spec.rb
@@ -93,4 +93,55 @@ def render_payload(messages = [], **overrides)
       end
     end
   end
+
+  describe '.parse_completion_response' do
+    it 'extracts thinking_tokens from top-level reasoningTokens' do
+      response_body = {
+        'output' => { 'message' => { 'content' => [{ 'text' => 'Hi!' }] } },
+        'usage' => {
+          'inputTokens' => 100,
+          'outputTokens' => 50,
+          'reasoningTokens' => 1200
+        }
+      }
+      response = instance_double(Faraday::Response, body: response_body)
+      message = described_class.parse_completion_response(response)
+
+      expect(message.thinking_tokens).to eq(1200)
+    end
+
+    it 'extracts thinking_tokens from nested outputTokensDetails' do
+      response_body = {
+        'output' => { 'message' => { 'content' => [{ 'text' => 'Hi!' }] } },
+        'usage' => {
+          'inputTokens' => 100,
+          'outputTokens' => 50,
+          'outputTokensDetails' => { 'reasoningTokens' => 800 }
+        }
+      }
+      response = instance_double(Faraday::Response, body: response_body)
+      message = described_class.parse_completion_response(response)
+
+      expect(message.thinking_tokens).to eq(800)
+    end
+
+    it 'captures cache usage metrics' do
+      response_body = {
+        'output' => { 'message' => { 'content' => [{ 'text' => 'Hi!' }] } },
+        'usage' => {
+          'inputTokens' => 100,
+          'outputTokens' => 50,
+          'cacheReadInputTokens' => 30,
+          'cacheWriteInputTokens' => 10
+        }
+      }
+      response = instance_double(Faraday::Response, body: response_body)
+      message = described_class.parse_completion_response(response)
+
+      expect(message.input_tokens).to eq(100)
+      expect(message.output_tokens).to eq(50)
+      expect(message.cached_tokens).to eq(30)
+      expect(message.cache_creation_tokens).to eq(10)
+    end
+  end
 end