Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,15 @@ export class OpenAiImageGenerationProvider implements IImageProvider {
throw new Error('`prompt` must be a string');
}

const validRations = selectedModel?.allowedRatios;
if ( validRations && (!ratio || !validRations.some(r => r.w === ratio.w && r.h === ratio.h)) ) {
ratio = validRations[0]; // Default to the first allowed ratio
const validRatios = selectedModel?.allowedRatios;
if ( validRatios ) {
if ( !ratio || !validRatios.some(r => r.w === ratio.w && r.h === ratio.h) ) {
ratio = validRatios[0]; // Default to the first allowed ratio
}
} else {
// Open-ended size models (gpt-image-2): conform to OpenAI's size
// rules (16px multiples, 3840 cap, 3:1 ratio, pixel budget).
ratio = this.#normalizeGptImage2Ratio(ratio);
}

if ( ! ratio ) {
Expand All @@ -101,7 +107,10 @@ export class OpenAiImageGenerationProvider implements IImageProvider {

const size = `${ratio.w}x${ratio.h}`;
const price_key = this.#buildPriceKey(selectedModel.id, quality!, size);
const outputPriceInCents = selectedModel?.costs[price_key];
let outputPriceInCents: number | undefined = selectedModel?.costs[price_key];
if ( outputPriceInCents === undefined ) {
outputPriceInCents = this.#estimateOutputCostFromTokens(selectedModel, ratio, quality);
}
if ( outputPriceInCents === undefined ) {
const availableSizes = Object.keys(selectedModel?.costs)
.filter(key => !OpenAiImageGenerationProvider.#NON_SIZE_COST_KEYS.includes(key));
Expand Down Expand Up @@ -412,8 +421,96 @@ export class OpenAiImageGenerationProvider implements IImageProvider {
}

#isGptImageModel (model: string) {
// Covers gpt-image-1, gpt-image-1-mini, gpt-image-1.5 and future variants.
return model.startsWith('gpt-image-1');
// Covers gpt-image-1, gpt-image-1-mini, gpt-image-1.5, gpt-image-2 and future variants.
return model.startsWith('gpt-image-');
}

// gpt-image-2 size rules: each edge in [16, 3840] and a multiple of 16,
// long:short ratio ≤ 3:1, pixel count in [655360, 8294400]. Silently
// clamps/snaps rather than throwing so arbitrary user input is accepted.
// https://developers.openai.com/api/docs/guides/image-generation
#normalizeGptImage2Ratio (ratio?: { w: number; h: number }) {
const MIN_EDGE = 16;
const MAX_EDGE = 3840;
const STEP = 16;
const MAX_RATIO = 3;
const MIN_PIXELS = 655_360;
const MAX_PIXELS = 8_294_400;

let w = Number(ratio?.w);
let h = Number(ratio?.h);
if ( !Number.isFinite(w) || !Number.isFinite(h) || w <= 0 || h <= 0 ) {
return { w: 1024, h: 1024 };
}

// 1. Clamp long:short ratio to MAX_RATIO by shrinking the longer edge.
if ( w / h > MAX_RATIO ) w = h * MAX_RATIO;
else if ( h / w > MAX_RATIO ) h = w * MAX_RATIO;

// 2. Cap each edge at MAX_EDGE, preserving aspect ratio.
if ( w > MAX_EDGE ) {
const s = MAX_EDGE / w; w = MAX_EDGE; h *= s;
}
if ( h > MAX_EDGE ) {
const s = MAX_EDGE / h; h = MAX_EDGE; w *= s;
}

// 3. Scale uniformly into the pixel budget.
const prescaledPixels = w * h;
if ( prescaledPixels < MIN_PIXELS ) {
const s = Math.sqrt(MIN_PIXELS / prescaledPixels);
w *= s; h *= s;
} else if ( prescaledPixels > MAX_PIXELS ) {
const s = Math.sqrt(MAX_PIXELS / prescaledPixels);
w *= s; h *= s;
}

// 4. Snap to STEP. Bias rounding direction so snap doesn't push pixels
// back out of the budget.
const dir = prescaledPixels < MIN_PIXELS ? 1
: prescaledPixels > MAX_PIXELS ? -1
: 0;
const snap = (v: number) => {
const snapped = dir > 0 ? Math.ceil(v / STEP) * STEP
: dir < 0 ? Math.floor(v / STEP) * STEP
: Math.round(v / STEP) * STEP;
return Math.max(MIN_EDGE, Math.min(MAX_EDGE, snapped));
};
w = snap(w); h = snap(h);

// 5. If snap rounding pushed ratio above MAX_RATIO, trim the longer
// edge by one STEP. Pixel budget had headroom from step 3 so this
// won't drop below MIN_PIXELS.
if ( Math.max(w, h) / Math.min(w, h) > MAX_RATIO ) {
if ( w >= h ) w = Math.max(MIN_EDGE, w - STEP);
else h = Math.max(MIN_EDGE, h - STEP);
}
return { w, h };
}

// extracted from calculator at https://developers.openai.com/api/docs/guides/image-generation#cost-and-latency
#estimateGptImage2OutputTokens (width: number, height: number, quality?: string): number {
const FACTORS: Record<string, number> = { low: 16, medium: 48, high: 96 };
const factor = FACTORS[quality ?? ''] ?? FACTORS.medium;
const longEdge = Math.max(width, height);
const shortEdge = Math.min(width, height);
const shortLatent = Math.round(factor * shortEdge / longEdge);
const latentW = width >= height ? factor : shortLatent;
const latentH = width >= height ? shortLatent : factor;
const baseArea = latentW * latentH;
return Math.ceil(baseArea * (2_000_000 + width * height) / 4_000_000);
}

#estimateOutputCostFromTokens (
selectedModel: IImageModel,
ratio: { w: number; h: number },
quality?: string,
): number | undefined {
if ( ! selectedModel.id.startsWith('gpt-image-2') ) return undefined;
const rate = this.#getCostRate(selectedModel, 'image_output');
if ( rate === undefined ) return undefined;
const tokens = this.#estimateGptImage2OutputTokens(ratio.w, ratio.h, quality);
return this.#costForTokens(tokens, rate);
}

#buildPriceKey (model: string, quality: string, size: string) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,27 @@
import { IImageModel } from '../types';

export const OPEN_AI_IMAGE_GENERATION_MODELS: IImageModel[] = [
{
puterId: 'openai:openai/gpt-image-2',
id: 'gpt-image-2',
aliases: ['openai/gpt-image-2', 'gpt-image-2-2026-04-21'],
name: 'GPT Image 2',
version: '2.0',
costs_currency: 'usd-cents',
index_cost_key: 'low:1024x1024',
costs: {
// Text tokens (per 1M tokens)
text_input: 500, // $5.00
text_cached_input: 125, // $1.25
text_output: 1000, // $10.00
// Image tokens (per 1M tokens)
image_input: 800, // $8.00
image_cached_input: 200, // $2.00
image_output: 3000, // $30.00
'low:1024x1024': 0.588,
},
allowedQualityLevels: ['low', 'medium', 'high', 'auto'],
},
{
puterId: 'openai:openai/gpt-image-1.5',
id: 'gpt-image-1.5',
Expand Down
8 changes: 4 additions & 4 deletions src/docs/src/AI/txt2img.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ Additional settings for the generation request. Available options depend on the

#### OpenAI Options

Available when `provider: 'openai-image-generation'` or inferred from model (`gpt-image-1.5`, `gpt-image-1`, `gpt-image-1-mini`, `dall-e-3`):
Available when `provider: 'openai-image-generation'` or inferred from model (`gpt-image-2`, `gpt-image-1.5`, `gpt-image-1`, `gpt-image-1-mini`, `dall-e-3`):

| Option | Type | Description |
|--------|------|-------------|
| `model` | `String` | Image model to use. Available: `'gpt-image-1.5'`, `'gpt-image-1'`, `'gpt-image-1-mini'`, `'dall-e-3'` |
| `quality` | `String` | Image quality. For GPT models: `'high'`, `'medium'`, `'low'` (default: `'low'`). For DALL-E 3: `'hd'`, `'standard'` (default: `'standard'`) |
| `ratio` | `Object` | Aspect ratio with `w` and `h` properties |
| `model` | `String` | Image model to use. Available: `'gpt-image-2'`, `'gpt-image-1.5'`, `'gpt-image-1'`, `'gpt-image-1-mini'`, `'dall-e-3'` |
| `quality` | `String` | Image quality. For GPT models: `'high'`, `'medium'`, `'low'` (default: `'low'`); `gpt-image-2` also accepts `'auto'`. For DALL-E 3: `'hd'`, `'standard'` (default: `'standard'`) |
| `ratio` | `Object` | Aspect ratio with `w` and `h` properties. `gpt-image-2` accepts arbitrary sizes; other GPT models and DALL-E are restricted to fixed sizes |

For more details, see the [OpenAI API reference](https://platform.openai.com/docs/api-reference/images/create).

Expand Down