diff --git a/lib/ruby_llm/providers/bedrock/chat.rb b/lib/ruby_llm/providers/bedrock/chat.rb index 026226c30..f125fd4c0 100644 --- a/lib/ruby_llm/providers/bedrock/chat.rb +++ b/lib/ruby_llm/providers/bedrock/chat.rb @@ -60,7 +60,7 @@ def parse_completion_response(response) output_tokens: usage['outputTokens'], cached_tokens: usage['cacheReadInputTokens'], cache_creation_tokens: usage['cacheWriteInputTokens'], - thinking_tokens: usage['reasoningTokens'], + thinking_tokens: usage['reasoningTokens'] || usage.dig('outputTokensDetails', 'reasoningTokens'), model_id: data['modelId'], raw: response ) diff --git a/spec/ruby_llm/providers/bedrock/chat_spec.rb b/spec/ruby_llm/providers/bedrock/chat_spec.rb index 10e1d594a..430b9e145 100644 --- a/spec/ruby_llm/providers/bedrock/chat_spec.rb +++ b/spec/ruby_llm/providers/bedrock/chat_spec.rb @@ -128,4 +128,55 @@ def render_payload(messages = [], **overrides) end end end + + describe '.parse_completion_response' do + it 'extracts thinking_tokens from top-level reasoningTokens' do + response_body = { + 'output' => { 'message' => { 'content' => [{ 'text' => 'Hi!' }] } }, + 'usage' => { + 'inputTokens' => 100, + 'outputTokens' => 50, + 'reasoningTokens' => 1200 + } + } + response = instance_double(Faraday::Response, body: response_body) + message = described_class.parse_completion_response(response) + + expect(message.thinking_tokens).to eq(1200) + end + + it 'extracts thinking_tokens from nested outputTokensDetails' do + response_body = { + 'output' => { 'message' => { 'content' => [{ 'text' => 'Hi!' }] } }, + 'usage' => { + 'inputTokens' => 100, + 'outputTokens' => 50, + 'outputTokensDetails' => { 'reasoningTokens' => 800 } + } + } + response = instance_double(Faraday::Response, body: response_body) + message = described_class.parse_completion_response(response) + + expect(message.thinking_tokens).to eq(800) + end + + it 'captures cache usage metrics' do + response_body = { + 'output' => { 'message' => { 'content' => [{ 'text' => 'Hi!' }] } }, + 'usage' => { + 'inputTokens' => 100, + 'outputTokens' => 50, + 'cacheReadInputTokens' => 30, + 'cacheWriteInputTokens' => 10 + } + } + response = instance_double(Faraday::Response, body: response_body) + message = described_class.parse_completion_response(response) + + expect(message.input_tokens).to eq(100) + expect(message.output_tokens).to eq(50) + expect(message.cached_tokens).to eq(30) + expect(message.cache_creation_tokens).to eq(10) + end + end end