From 9c943a36c97c3f12c206ff783c961ae3b709082e Mon Sep 17 00:00:00 2001 From: Alexander Mamrenko Date: Wed, 1 Apr 2026 11:03:21 +0200 Subject: [PATCH] Fix Bedrock non-streaming thinking_tokens always nil The Bedrock Converse API returns thinking token counts nested under usage.outputTokensDetails.reasoningTokens, not at the top-level usage.reasoningTokens. The streaming parser already handles this fallback correctly, but parse_completion_response only checked the top-level key. Add fallback to nested path and tests covering both paths. --- lib/ruby_llm/providers/bedrock/chat.rb | 2 +- spec/ruby_llm/providers/bedrock/chat_spec.rb | 51 ++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb index c39fa2942..a0c478dd6 100644 --- a/lib/ruby_llm/providers/bedrock/chat.rb +++ b/lib/ruby_llm/providers/bedrock/chat.rb @@ -60,7 +60,7 @@ def parse_completion_response(response) output_tokens: usage['outputTokens'], cached_tokens: usage['cacheReadInputTokens'], cache_creation_tokens: usage['cacheWriteInputTokens'], - thinking_tokens: usage['reasoningTokens'], + thinking_tokens: usage['reasoningTokens'] || usage.dig('outputTokensDetails', 'reasoningTokens'), model_id: data['modelId'], raw: response ) diff --git a/spec/ruby_llm/providers/bedrock/chat_spec.rb b/spec/ruby_llm/providers/bedrock/chat_spec.rb index 10ee06219..6c34705af 100644 --- a/spec/ruby_llm/providers/bedrock/chat_spec.rb +++ b/spec/ruby_llm/providers/bedrock/chat_spec.rb @@ -93,4 +93,55 @@ def render_payload(messages = [], **overrides) end end end + + describe '.parse_completion_response' do + it 'extracts thinking_tokens from top-level reasoningTokens' do + response_body = { + 'output' => { 'message' => { 'content' => [{ 'text' => 'Hi!' }] } }, + 'usage' => { + 'inputTokens' => 100, + 'outputTokens' => 50, + 'reasoningTokens' => 1200 + } + } + response = instance_double(Faraday::Response, body: response_body) + message = described_class.parse_completion_response(response) + + expect(message.thinking_tokens).to eq(1200) + end + + it 'extracts thinking_tokens from nested outputTokensDetails' do + response_body = { + 'output' => { 'message' => { 'content' => [{ 'text' => 'Hi!' }] } }, + 'usage' => { + 'inputTokens' => 100, + 'outputTokens' => 50, + 'outputTokensDetails' => { 'reasoningTokens' => 800 } + } + } + response = instance_double(Faraday::Response, body: response_body) + message = described_class.parse_completion_response(response) + + expect(message.thinking_tokens).to eq(800) + end + + it 'captures cache usage metrics' do + response_body = { + 'output' => { 'message' => { 'content' => [{ 'text' => 'Hi!' }] } }, + 'usage' => { + 'inputTokens' => 100, + 'outputTokens' => 50, + 'cacheReadInputTokens' => 30, + 'cacheWriteInputTokens' => 10 + } + } + response = instance_double(Faraday::Response, body: response_body) + message = described_class.parse_completion_response(response) + + expect(message.input_tokens).to eq(100) + expect(message.output_tokens).to eq(50) + expect(message.cached_tokens).to eq(30) + expect(message.cache_creation_tokens).to eq(10) + end + end end