From f14f1bf49e2a32a65a50ef2d199555266cf9bac7 Mon Sep 17 00:00:00 2001 From: Shruc <42489293+P3il4@users.noreply.github.com> Date: Wed, 22 Apr 2026 21:00:18 +0300 Subject: [PATCH] add gpt image 2 (#2829) * add gpt image 2 * index cost key * docs + default low --- .../OpenAiImageGenerationProvider.ts | 109 +++++++++++++++++- .../OpenAiImageGenerationProvider/models.ts | 21 ++++ src/docs/src/AI/txt2img.md | 8 +- 3 files changed, 128 insertions(+), 10 deletions(-) diff --git a/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/OpenAiImageGenerationProvider.ts b/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/OpenAiImageGenerationProvider.ts index 53fe18418a..3147c076fc 100644 --- a/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/OpenAiImageGenerationProvider.ts +++ b/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/OpenAiImageGenerationProvider.ts @@ -85,9 +85,15 @@ export class OpenAiImageGenerationProvider implements IImageProvider { throw new Error('`prompt` must be a string'); } - const validRations = selectedModel?.allowedRatios; - if ( validRations && (!ratio || !validRations.some(r => r.w === ratio.w && r.h === ratio.h)) ) { - ratio = validRations[0]; // Default to the first allowed ratio + const validRatios = selectedModel?.allowedRatios; + if ( validRatios ) { + if ( !ratio || !validRatios.some(r => r.w === ratio.w && r.h === ratio.h) ) { + ratio = validRatios[0]; // Default to the first allowed ratio + } + } else { + // Open-ended size models (gpt-image-2): conform to OpenAI's size + // rules (16px multiples, 3840 cap, 3:1 ratio, pixel budget). + ratio = this.#normalizeGptImage2Ratio(ratio); } if ( ! ratio ) { @@ -101,7 +107,10 @@ export class OpenAiImageGenerationProvider implements IImageProvider { const size = `${ratio.w}x${ratio.h}`; const price_key = this.#buildPriceKey(selectedModel.id, quality!, size); - const outputPriceInCents = selectedModel?.costs[price_key]; + let outputPriceInCents: number | undefined = selectedModel?.costs[price_key]; + if ( outputPriceInCents === undefined ) { + outputPriceInCents = this.#estimateOutputCostFromTokens(selectedModel, ratio, quality); + } if ( outputPriceInCents === undefined ) { const availableSizes = Object.keys(selectedModel?.costs) .filter(key => !OpenAiImageGenerationProvider.#NON_SIZE_COST_KEYS.includes(key)); @@ -412,8 +421,96 @@ export class OpenAiImageGenerationProvider implements IImageProvider { } #isGptImageModel (model: string) { - // Covers gpt-image-1, gpt-image-1-mini, gpt-image-1.5 and future variants. - return model.startsWith('gpt-image-1'); + // Covers gpt-image-1, gpt-image-1-mini, gpt-image-1.5, gpt-image-2 and future variants. + return model.startsWith('gpt-image-'); + } + + // gpt-image-2 size rules: each edge in [16, 3840] and a multiple of 16, + // long:short ratio ≤ 3:1, pixel count in [655360, 8294400]. Silently + // clamps/snaps rather than throwing so arbitrary user input is accepted. + // https://developers.openai.com/api/docs/guides/image-generation + #normalizeGptImage2Ratio (ratio?: { w: number; h: number }) { + const MIN_EDGE = 16; + const MAX_EDGE = 3840; + const STEP = 16; + const MAX_RATIO = 3; + const MIN_PIXELS = 655_360; + const MAX_PIXELS = 8_294_400; + + let w = Number(ratio?.w); + let h = Number(ratio?.h); + if ( !Number.isFinite(w) || !Number.isFinite(h) || w <= 0 || h <= 0 ) { + return { w: 1024, h: 1024 }; + } + + // 1. Clamp long:short ratio to MAX_RATIO by shrinking the longer edge. + if ( w / h > MAX_RATIO ) w = h * MAX_RATIO; + else if ( h / w > MAX_RATIO ) h = w * MAX_RATIO; + + // 2. Cap each edge at MAX_EDGE, preserving aspect ratio. + if ( w > MAX_EDGE ) { + const s = MAX_EDGE / w; w = MAX_EDGE; h *= s; + } + if ( h > MAX_EDGE ) { + const s = MAX_EDGE / h; h = MAX_EDGE; w *= s; + } + + // 3. Scale uniformly into the pixel budget. + const prescaledPixels = w * h; + if ( prescaledPixels < MIN_PIXELS ) { + const s = Math.sqrt(MIN_PIXELS / prescaledPixels); + w *= s; h *= s; + } else if ( prescaledPixels > MAX_PIXELS ) { + const s = Math.sqrt(MAX_PIXELS / prescaledPixels); + w *= s; h *= s; + } + + // 4. Snap to STEP. Bias rounding direction so snap doesn't push pixels + // back out of the budget. + const dir = prescaledPixels < MIN_PIXELS ? 1 + : prescaledPixels > MAX_PIXELS ? -1 + : 0; + const snap = (v: number) => { + const snapped = dir > 0 ? Math.ceil(v / STEP) * STEP + : dir < 0 ? Math.floor(v / STEP) * STEP + : Math.round(v / STEP) * STEP; + return Math.max(MIN_EDGE, Math.min(MAX_EDGE, snapped)); + }; + w = snap(w); h = snap(h); + + // 5. If snap rounding pushed ratio above MAX_RATIO, trim the longer + // edge by one STEP. Pixel budget had headroom from step 3 so this + // won't drop below MIN_PIXELS. + if ( Math.max(w, h) / Math.min(w, h) > MAX_RATIO ) { + if ( w >= h ) w = Math.max(MIN_EDGE, w - STEP); + else h = Math.max(MIN_EDGE, h - STEP); + } + return { w, h }; + } + + // extracted from calculator at https://developers.openai.com/api/docs/guides/image-generation#cost-and-latency + #estimateGptImage2OutputTokens (width: number, height: number, quality?: string): number { + const FACTORS: Record = { low: 16, medium: 48, high: 96 }; + const factor = FACTORS[quality ?? ''] ?? FACTORS.medium; + const longEdge = Math.max(width, height); + const shortEdge = Math.min(width, height); + const shortLatent = Math.round(factor * shortEdge / longEdge); + const latentW = width >= height ? factor : shortLatent; + const latentH = width >= height ? shortLatent : factor; + const baseArea = latentW * latentH; + return Math.ceil(baseArea * (2_000_000 + width * height) / 4_000_000); + } + + #estimateOutputCostFromTokens ( + selectedModel: IImageModel, + ratio: { w: number; h: number }, + quality?: string, + ): number | undefined { + if ( ! selectedModel.id.startsWith('gpt-image-2') ) return undefined; + const rate = this.#getCostRate(selectedModel, 'image_output'); + if ( rate === undefined ) return undefined; + const tokens = this.#estimateGptImage2OutputTokens(ratio.w, ratio.h, quality); + return this.#costForTokens(tokens, rate); } #buildPriceKey (model: string, quality: string, size: string) { diff --git a/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/models.ts b/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/models.ts index 3b86dd05a6..de62f7584f 100644 --- a/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/models.ts +++ b/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/models.ts @@ -1,6 +1,27 @@ import { IImageModel } from '../types'; export const OPEN_AI_IMAGE_GENERATION_MODELS: IImageModel[] = [ + { + puterId: 'openai:openai/gpt-image-2', + id: 'gpt-image-2', + aliases: ['openai/gpt-image-2', 'gpt-image-2-2026-04-21'], + name: 'GPT Image 2', + version: '2.0', + costs_currency: 'usd-cents', + index_cost_key: 'low:1024x1024', + costs: { + // Text tokens (per 1M tokens) + text_input: 500, // $5.00 + text_cached_input: 125, // $1.25 + text_output: 1000, // $10.00 + // Image tokens (per 1M tokens) + image_input: 800, // $8.00 + image_cached_input: 200, // $2.00 + image_output: 3000, // $30.00 + 'low:1024x1024': 0.588, + }, + allowedQualityLevels: ['low', 'medium', 'high', 'auto'], + }, { puterId: 'openai:openai/gpt-image-1.5', id: 'gpt-image-1.5', diff --git a/src/docs/src/AI/txt2img.md b/src/docs/src/AI/txt2img.md index 8441d85a9a..adb7b4e107 100755 --- a/src/docs/src/AI/txt2img.md +++ b/src/docs/src/AI/txt2img.md @@ -37,13 +37,13 @@ Additional settings for the generation request. Available options depend on the #### OpenAI Options -Available when `provider: 'openai-image-generation'` or inferred from model (`gpt-image-1.5`, `gpt-image-1`, `gpt-image-1-mini`, `dall-e-3`): +Available when `provider: 'openai-image-generation'` or inferred from model (`gpt-image-2`, `gpt-image-1.5`, `gpt-image-1`, `gpt-image-1-mini`, `dall-e-3`): | Option | Type | Description | |--------|------|-------------| -| `model` | `String` | Image model to use. Available: `'gpt-image-1.5'`, `'gpt-image-1'`, `'gpt-image-1-mini'`, `'dall-e-3'` | -| `quality` | `String` | Image quality. For GPT models: `'high'`, `'medium'`, `'low'` (default: `'low'`). For DALL-E 3: `'hd'`, `'standard'` (default: `'standard'`) | -| `ratio` | `Object` | Aspect ratio with `w` and `h` properties | +| `model` | `String` | Image model to use. Available: `'gpt-image-2'`, `'gpt-image-1.5'`, `'gpt-image-1'`, `'gpt-image-1-mini'`, `'dall-e-3'` | +| `quality` | `String` | Image quality. For GPT models: `'high'`, `'medium'`, `'low'` (default: `'low'`); `gpt-image-2` also accepts `'auto'`. For DALL-E 3: `'hd'`, `'standard'` (default: `'standard'`) | +| `ratio` | `Object` | Aspect ratio with `w` and `h` properties. `gpt-image-2` accepts arbitrary sizes; other GPT models and DALL-E are restricted to fixed sizes | For more details, see the [OpenAI API reference](https://platform.openai.com/docs/api-reference/images/create).