From f14f1bf49e2a32a65a50ef2d199555266cf9bac7 Mon Sep 17 00:00:00 2001
From: Shruc <42489293+P3il4@users.noreply.github.com>
Date: Wed, 22 Apr 2026 21:00:18 +0300
Subject: [PATCH] add gpt image 2 (#2829)

* add gpt image 2

* index cost key

* docs + default low
---
 .../OpenAiImageGenerationProvider.ts          | 109 +++++++++++++++++-
 .../OpenAiImageGenerationProvider/models.ts   |  21 ++++
 src/docs/src/AI/txt2img.md                    |   8 +-
 3 files changed, 128 insertions(+), 10 deletions(-)

diff --git a/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/OpenAiImageGenerationProvider.ts b/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/OpenAiImageGenerationProvider.ts
index 53fe18418a..3147c076fc 100644
--- a/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/OpenAiImageGenerationProvider.ts
+++ b/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/OpenAiImageGenerationProvider.ts
@@ -85,9 +85,15 @@ export class OpenAiImageGenerationProvider implements IImageProvider {
             throw new Error('`prompt` must be a string');
         }
 
-        const validRations = selectedModel?.allowedRatios;
-        if ( validRations && (!ratio || !validRations.some(r => r.w === ratio.w && r.h === ratio.h)) ) {
-            ratio = validRations[0]; // Default to the first allowed ratio
+        const validRatios = selectedModel?.allowedRatios;
+        if ( validRatios ) {
+            if ( !ratio || !validRatios.some(r => r.w === ratio.w && r.h === ratio.h) ) {
+                ratio = validRatios[0]; // Default to the first allowed ratio
+            }
+        } else {
+            // Open-ended size models (gpt-image-2): conform to OpenAI's size
+            // rules (16px multiples, 3840 cap, 3:1 ratio, pixel budget).
+            ratio = this.#normalizeGptImage2Ratio(ratio);
         }
 
         if ( ! ratio ) {
@@ -101,7 +107,10 @@ export class OpenAiImageGenerationProvider implements IImageProvider {
 
         const size = `${ratio.w}x${ratio.h}`;
         const price_key = this.#buildPriceKey(selectedModel.id, quality!, size);
-        const outputPriceInCents = selectedModel?.costs[price_key];
+        let outputPriceInCents: number | undefined = selectedModel?.costs[price_key];
+        if ( outputPriceInCents === undefined ) {
+            outputPriceInCents = this.#estimateOutputCostFromTokens(selectedModel, ratio, quality);
+        }
         if ( outputPriceInCents === undefined ) {
             const availableSizes = Object.keys(selectedModel?.costs)
                 .filter(key => !OpenAiImageGenerationProvider.#NON_SIZE_COST_KEYS.includes(key));
@@ -412,8 +421,96 @@ export class OpenAiImageGenerationProvider implements IImageProvider {
     }
 
     #isGptImageModel (model: string) {
-        // Covers gpt-image-1, gpt-image-1-mini, gpt-image-1.5 and future variants.
-        return model.startsWith('gpt-image-1');
+        // Covers gpt-image-1, gpt-image-1-mini, gpt-image-1.5, gpt-image-2 and future variants.
+        return model.startsWith('gpt-image-');
+    }
+
+    // gpt-image-2 size rules: each edge in [16, 3840] and a multiple of 16,
+    // long:short ratio ≤ 3:1, pixel count in [655360, 8294400]. Silently
+    // clamps/snaps rather than throwing so arbitrary user input is accepted.
+    // https://developers.openai.com/api/docs/guides/image-generation
+    #normalizeGptImage2Ratio (ratio?: { w: number; h: number }) {
+        const MIN_EDGE = 16;
+        const MAX_EDGE = 3840;
+        const STEP = 16;
+        const MAX_RATIO = 3;
+        const MIN_PIXELS = 655_360;
+        const MAX_PIXELS = 8_294_400;
+
+        let w = Number(ratio?.w);
+        let h = Number(ratio?.h);
+        if ( !Number.isFinite(w) || !Number.isFinite(h) || w <= 0 || h <= 0 ) {
+            return { w: 1024, h: 1024 };
+        }
+
+        // 1. Clamp long:short ratio to MAX_RATIO by shrinking the longer edge.
+        if ( w / h > MAX_RATIO ) w = h * MAX_RATIO;
+        else if ( h / w > MAX_RATIO ) h = w * MAX_RATIO;
+
+        // 2. Cap each edge at MAX_EDGE, preserving aspect ratio.
+        if ( w > MAX_EDGE ) {
+            const s = MAX_EDGE / w; w = MAX_EDGE; h *= s;
+        }
+        if ( h > MAX_EDGE ) {
+            const s = MAX_EDGE / h; h = MAX_EDGE; w *= s;
+        }
+
+        // 3. Scale uniformly into the pixel budget.
+        const prescaledPixels = w * h;
+        if ( prescaledPixels < MIN_PIXELS ) {
+            const s = Math.sqrt(MIN_PIXELS / prescaledPixels);
+            w *= s; h *= s;
+        } else if ( prescaledPixels > MAX_PIXELS ) {
+            const s = Math.sqrt(MAX_PIXELS / prescaledPixels);
+            w *= s; h *= s;
+        }
+
+        // 4. Snap to STEP. Bias rounding direction so snap doesn't push pixels
+        //    back out of the budget.
+        const dir = prescaledPixels < MIN_PIXELS ? 1
+            : prescaledPixels > MAX_PIXELS ? -1
+                : 0;
+        const snap = (v: number) => {
+            const snapped = dir > 0 ? Math.ceil(v / STEP) * STEP
+                : dir < 0 ? Math.floor(v / STEP) * STEP
+                    : Math.round(v / STEP) * STEP;
+            return Math.max(MIN_EDGE, Math.min(MAX_EDGE, snapped));
+        };
+        w = snap(w); h = snap(h);
+
+        // 5. If snap rounding pushed ratio above MAX_RATIO, trim the longer
+        //    edge by one STEP. Pixel budget had headroom from step 3 so this
+        //    won't drop below MIN_PIXELS.
+        if ( Math.max(w, h) / Math.min(w, h) > MAX_RATIO ) {
+            if ( w >= h ) w = Math.max(MIN_EDGE, w - STEP);
+            else h = Math.max(MIN_EDGE, h - STEP);
+        }
+        return { w, h };
+    }
+
+    // extracted from calculator at https://developers.openai.com/api/docs/guides/image-generation#cost-and-latency
+    #estimateGptImage2OutputTokens (width: number, height: number, quality?: string): number {
+        const FACTORS: Record<string, number> = { low: 16, medium: 48, high: 96 };
+        const factor = FACTORS[quality ?? ''] ?? FACTORS.medium;
+        const longEdge = Math.max(width, height);
+        const shortEdge = Math.min(width, height);
+        const shortLatent = Math.round(factor * shortEdge / longEdge);
+        const latentW = width >= height ? factor : shortLatent;
+        const latentH = width >= height ? shortLatent : factor;
+        const baseArea = latentW * latentH;
+        return Math.ceil(baseArea * (2_000_000 + width * height) / 4_000_000);
+    }
+
+    #estimateOutputCostFromTokens (
+        selectedModel: IImageModel,
+        ratio: { w: number; h: number },
+        quality?: string,
+    ): number | undefined {
+        if ( ! selectedModel.id.startsWith('gpt-image-2') ) return undefined;
+        const rate = this.#getCostRate(selectedModel, 'image_output');
+        if ( rate === undefined ) return undefined;
+        const tokens = this.#estimateGptImage2OutputTokens(ratio.w, ratio.h, quality);
+        return this.#costForTokens(tokens, rate);
     }
 
     #buildPriceKey (model: string, quality: string, size: string) {
diff --git a/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/models.ts b/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/models.ts
index 3b86dd05a6..de62f7584f 100644
--- a/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/models.ts
+++ b/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/models.ts
@@ -1,6 +1,27 @@
 import { IImageModel } from '../types';
 
 export const OPEN_AI_IMAGE_GENERATION_MODELS: IImageModel[] = [
+    {
+        puterId: 'openai:openai/gpt-image-2',
+        id: 'gpt-image-2',
+        aliases: ['openai/gpt-image-2', 'gpt-image-2-2026-04-21'],
+        name: 'GPT Image 2',
+        version: '2.0',
+        costs_currency: 'usd-cents',
+        index_cost_key: 'low:1024x1024',
+        costs: {
+            // Text tokens (per 1M tokens)
+            text_input: 500, // $5.00
+            text_cached_input: 125, // $1.25
+            text_output: 1000, // $10.00
+            // Image tokens (per 1M tokens)
+            image_input: 800, // $8.00
+            image_cached_input: 200, // $2.00
+            image_output: 3000, // $30.00
+            'low:1024x1024': 0.588,
+        },
+        allowedQualityLevels: ['low', 'medium', 'high', 'auto'],
+    },
     {
         puterId: 'openai:openai/gpt-image-1.5',
         id: 'gpt-image-1.5',
diff --git a/src/docs/src/AI/txt2img.md b/src/docs/src/AI/txt2img.md
index 8441d85a9a..adb7b4e107 100755
--- a/src/docs/src/AI/txt2img.md
+++ b/src/docs/src/AI/txt2img.md
@@ -37,13 +37,13 @@ Additional settings for the generation request. Available options depend on the
 
 #### OpenAI Options
 
-Available when `provider: 'openai-image-generation'` or inferred from model (`gpt-image-1.5`, `gpt-image-1`, `gpt-image-1-mini`, `dall-e-3`):
+Available when `provider: 'openai-image-generation'` or inferred from model (`gpt-image-2`, `gpt-image-1.5`, `gpt-image-1`, `gpt-image-1-mini`, `dall-e-3`):
 
 | Option | Type | Description |
 |--------|------|-------------|
-| `model` | `String` | Image model to use. Available: `'gpt-image-1.5'`, `'gpt-image-1'`, `'gpt-image-1-mini'`, `'dall-e-3'` |
-| `quality` | `String` | Image quality. For GPT models: `'high'`, `'medium'`, `'low'` (default: `'low'`). For DALL-E 3: `'hd'`, `'standard'` (default: `'standard'`) |
-| `ratio` | `Object` | Aspect ratio with `w` and `h` properties |
+| `model` | `String` | Image model to use. Available: `'gpt-image-2'`, `'gpt-image-1.5'`, `'gpt-image-1'`, `'gpt-image-1-mini'`, `'dall-e-3'` |
+| `quality` | `String` | Image quality. For GPT models: `'high'`, `'medium'`, `'low'` (default: `'low'`); `gpt-image-2` also accepts `'auto'`. For DALL-E 3: `'hd'`, `'standard'` (default: `'standard'`) |
+| `ratio` | `Object` | Aspect ratio with `w` and `h` properties. `gpt-image-2` accepts arbitrary sizes; other GPT models and DALL-E are restricted to fixed sizes |
 
 For more details, see the [OpenAI API reference](https://platform.openai.com/docs/api-reference/images/create).