diff --git a/CHANGELOG.md b/CHANGELOG.md index b119245..51ea668 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +- **Hash options for `image_generation=`**: Pass a `Hash` of tool options (e.g., `chat.image_generation = { size: "1536x1024", quality: "low", model: "gpt-image-2" }`) to configure the OpenAI Responses API image generation tool. Useful for selecting a specific GPT Image model, changing size/quality, forcing generate-vs-edit mode with `action:`, masked edits via `input_image_mask:`, and the rest of the options from OpenAI's image generation docs. `chat.image_generation = true` still works and continues to use OpenAI's defaults. + +### Changed + +- **`image_generation=` validates its argument**: Only `true`, `false`, `nil`, or a `Hash` are now accepted. `nil` is normalized to `false`. Any other value raises `ArgumentError`. + +### Fixed + +- **Saved images now use the correct file extension**: Generated images are sniffed with Marcel and saved as `.png`, `.jpg`, or `.webp` based on the decoded bytes. Previously every image was written to `001.png` regardless of `output_format`, which produced misnamed files when callers asked for JPEG or WebP. + ## [0.6.0] - 2026-04-13 ### Breaking diff --git a/Gemfile.lock b/Gemfile.lock index 47a61a1..ad1e680 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -6,7 +6,7 @@ PATH base64 (~> 0.1, > 0.1.1) json (~> 2.0) marcel (~> 1.0) - openai (~> 0.43) + openai (~> 0.59) tty-spinner (~> 0.9.3) GEM @@ -66,7 +66,7 @@ GEM lint_roller (1.1.0) logger (1.7.0) marcel (1.1.0) - openai (0.43.0) + openai (0.59.0) base64 cgi connection_pool diff --git a/README.md b/README.md index 9da23cf..1c8e12a 100644 --- a/README.md +++ b/README.md @@ -309,6 +309,24 @@ chat.user("Make it even cuter") chat.generate! ``` +### Configuring the Tool + +To configure the tool, pass a `Hash` of options instead of `true`: + +```ruby +chat.image_generation = { + size: "1536x1024", + quality: "low", + model: "gpt-image-2" +} +``` + +Supported keys include `size`, `quality`, `model`, `action`, `background`, `moderation`, `output_format`, `output_compression`, `input_image_mask`, and `input_fidelity`. The `Hash` is passed through to the OpenAI [image generation tool](https://platform.openai.com/docs/guides/image-generation), so refer to those docs for the full list of supported values. + +The file extension of saved images is chosen automatically from the decoded bytes, so `output_format: "jpeg"` writes a `.jpg` and `output_format: "webp"` writes a `.webp`. + +> `partial_images` is not listed above because this gem uses a blocking call to the Responses API. Partial images only stream when `stream: true` is set, which this gem doesn't yet support. + ## Code Interpreter Enable the code interpreter to let the model write and execute Python code on OpenAI's servers. This is useful for math, data analysis, and generating charts: diff --git a/ai-chat.gemspec b/ai-chat.gemspec index c7c0f84..2abf042 100644 --- a/ai-chat.gemspec +++ b/ai-chat.gemspec @@ -19,7 +19,7 @@ Gem::Specification.new do |spec| } spec.required_ruby_version = ">= 3.2" - spec.add_runtime_dependency "openai", "~> 0.43" + spec.add_runtime_dependency "openai", "~> 0.59" spec.add_runtime_dependency "marcel", "~> 1.0" spec.add_runtime_dependency "base64", "~> 0.1", "> 0.1.1" spec.add_runtime_dependency "json", "~> 2.0" diff --git a/examples/10_image_generation.rb b/examples/10_image_generation.rb index 0669b72..883f20c 100755 --- a/examples/10_image_generation.rb +++ b/examples/10_image_generation.rb @@ -60,5 +60,16 @@ puts "Black and white image: #{c.messages.last[:images]}" puts +puts "Example 4: Configure generation with a Hash of options" +puts "-" * 50 +d = AI::Chat.new +d.image_generation = {size: "1536x1024", quality: "low"} +d.user("Draw a landscape of rolling hills at sunset") +puts "User: #{d.last[:content]}" +d.generate! +puts "Assistant: #{d.messages.last[:content]}" +puts "Landscape image: #{d.messages.last[:images]}" +puts + puts "=== Image Generation Examples Complete ===" puts diff --git a/lib/ai/chat.rb b/lib/ai/chat.rb index 87579ef..10ddc6c 100644 --- a/lib/ai/chat.rb +++ b/lib/ai/chat.rb @@ -18,8 +18,8 @@ module AI # :reek:IrresponsibleModule class Chat # :reek:Attribute - attr_accessor :background, :code_interpreter, :conversation_id, :image_generation, :image_folder, :messages, :model, :reasoning_effort, :web_search - attr_reader :client, :last_response_id, :proxy, :schema, :schema_file, :verbosity + attr_accessor :background, :code_interpreter, :conversation_id, :image_folder, :messages, :model, :reasoning_effort, :web_search + attr_reader :client, :image_generation, :last_response_id, :proxy, :schema, :schema_file, :verbosity BASE_PROXY_URL = "https://prepend.me/api.openai.com/v1" PROXY_ENV = "AICHAT_PROXY" @@ -205,6 +205,19 @@ def verbosity=(value) end end + def image_generation=(value) + case value + when true + @image_generation = true + when false, nil + @image_generation = false + when Hash + @image_generation = value.transform_keys(&:to_sym) + else + raise ArgumentError, "Invalid image_generation value: #{value.inspect}. Must be true, false, or a Hash of tool options (e.g., { size: \"1536x1024\", quality: \"low\" })." + end + end + def last messages.last end @@ -523,7 +536,8 @@ def tools tools_list << {type: "web_search"} end if image_generation - tools_list << {type: "image_generation"} + options = image_generation.is_a?(Hash) ? image_generation : {} + tools_list << options.merge(type: "image_generation") end if code_interpreter tools_list << {type: "code_interpreter", container: {type: "auto"}} @@ -577,7 +591,8 @@ def extract_and_save_images(response) result = output.result image_data = Base64.strict_decode64(result) - filename = "#{(index + 1).to_s.rjust(3, "0")}.png" + extension = image_extension_for(image_data) + filename = "#{(index + 1).to_s.rjust(3, "0")}.#{extension}" file_path = File.join(subfolder_path, filename) File.binwrite(file_path, image_data) @@ -589,6 +604,18 @@ def extract_and_save_images(response) image_filenames end + IMAGE_EXTENSIONS_BY_MIME_TYPE = { + "image/png" => "png", + "image/jpeg" => "jpg", + "image/webp" => "webp" + }.freeze + + # :reek:UtilityFunction + def image_extension_for(bytes) + mime_type = Marcel::MimeType.for(StringIO.new(bytes)) + IMAGE_EXTENSIONS_BY_MIME_TYPE.fetch(mime_type, "png") + end + def create_images_folder(response_id) # ISO 8601 basic format with centisecond precision timestamp = Time.now.strftime("%Y%m%dT%H%M%S%2N") diff --git a/spec/unit/chat_spec.rb b/spec/unit/chat_spec.rb index 4712061..0d13b14 100644 --- a/spec/unit/chat_spec.rb +++ b/spec/unit/chat_spec.rb @@ -485,6 +485,125 @@ def schema_client_double end end + describe "#image_generation=" do + it "accepts true" do + chat.image_generation = true + + expect(chat.image_generation).to eq(true) + end + + it "accepts false" do + chat.image_generation = true + chat.image_generation = false + + expect(chat.image_generation).to eq(false) + end + + it "normalizes nil to false" do + chat.image_generation = true + chat.image_generation = nil + + expect(chat.image_generation).to eq(false) + end + + it "accepts a Hash of tool options" do + options = {size: "1536x1024", quality: "low", model: "gpt-image-2"} + chat.image_generation = options + + expect(chat.image_generation).to eq(options) + end + + it "accepts an empty Hash" do + chat.image_generation = {} + + expect(chat.image_generation).to eq({}) + end + + it "normalizes string keys to symbols" do + chat.image_generation = {"size" => "1536x1024", "quality" => "low"} + + expect(chat.image_generation).to eq({size: "1536x1024", quality: "low"}) + end + + it "raises ArgumentError for non-boolean, non-Hash values" do + expect { chat.image_generation = "yes" }.to raise_error( + ArgumentError, + /Invalid image_generation value: "yes"\. Must be true, false, or a Hash of tool options/ + ) + end + + it "raises ArgumentError for integers" do + expect { chat.image_generation = 1 }.to raise_error(ArgumentError) + end + + it "raises ArgumentError for arrays" do + expect { chat.image_generation = [1, 2, 3] }.to raise_error(ArgumentError) + end + end + + describe "#tools (via image_generation)" do + it "omits the tool when image_generation is false" do + expect(chat.send(:tools)).to eq([]) + end + + it "emits a bare tool spec when image_generation is true" do + chat.image_generation = true + + expect(chat.send(:tools)).to eq([{type: "image_generation"}]) + end + + it "merges Hash options into the tool spec" do + chat.image_generation = {size: "1536x1024", quality: "low"} + + expect(chat.send(:tools)).to eq([ + {size: "1536x1024", quality: "low", type: "image_generation"} + ]) + end + + it "forces type: image_generation even when the Hash tries to override it" do + chat.image_generation = {type: "something_else", size: "1024x1024"} + + tool = chat.send(:tools).first + expect(tool[:type]).to eq("image_generation") + expect(tool[:size]).to eq("1024x1024") + end + + it "forces type: image_generation when the Hash uses a string-keyed type" do + chat.image_generation = {"type" => "something_else", "size" => "1024x1024"} + + tool = chat.send(:tools).first + expect(tool[:type]).to eq("image_generation") + expect(tool).not_to have_key("type") + expect(tool[:size]).to eq("1024x1024") + end + end + + describe "#image_extension_for (private helper)" do + it "returns png for PNG bytes" do + png = "\x89PNG\r\n\x1a\n\x00".b + + expect(chat.send(:image_extension_for, png)).to eq("png") + end + + it "returns jpg for JPEG bytes" do + jpeg = "\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01".b + + expect(chat.send(:image_extension_for, jpeg)).to eq("jpg") + end + + it "returns webp for WebP bytes" do + webp = "RIFF\x00\x00\x00\x00WEBP".b + + expect(chat.send(:image_extension_for, webp)).to eq("webp") + end + + it "falls back to png for unrecognized bytes" do + garbage = "not an image at all".b + + expect(chat.send(:image_extension_for, garbage)).to eq("png") + end + end + describe "#inspectable_attributes" do it "excludes :response key from displayed messages" do chat.add("Hello", role: "user") @@ -548,6 +667,16 @@ def schema_client_double expect(attr_names).to include(:@image_folder) end + it "includes @image_generation when set to a Hash of options" do + chat.image_generation = {size: "1536x1024"} + + attrs = chat.inspectable_attributes + entry = attrs.find { |name, _| name == :@image_generation } + + expect(entry).not_to be_nil + expect(entry[1]).to eq({size: "1536x1024"}) + end + it "excludes optional state when not set" do attrs = chat.inspectable_attributes attr_names = attrs.map(&:first)