Stackbilt-dev · stackbilt-admin · Apr 17, 2026 · Apr 16, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,21 @@
 All notable changes to `@stackbilt/llm-providers` are documented here.
 Format follows [Keep a Changelog](https://keepachangelog.com/). Versions use [Semantic Versioning](https://semver.org/).
 
+## [1.3.0] — 2026-04-16
+
+### Added
+- **Cloudflare Workers AI vision support** — `CloudflareProvider` now accepts `request.images` and routes to vision-capable models. Previously image data was silently dropped on the CF path.
+- **Three new CF vision models**:
+  - `@cf/google/gemma-4-26b-a4b-it` — 256K context, vision + function calling + reasoning
+  - `@cf/meta/llama-4-scout-17b-16e-instruct` — natively multimodal, tool calling
+  - `@cf/meta/llama-3.2-11b-vision-instruct` — image understanding
+- **`CloudflareProvider.supportsVision = true`** — factory's `analyzeImage` now dispatches to CF when configured.
+- **Factory default vision fallback** — `getDefaultVisionModel()` falls back to `@cf/google/gemma-4-26b-a4b-it` when neither Anthropic nor OpenAI is configured, enabling CF-only deployments to use `analyzeImage()`.
+
+### Changed
+- Images are passed to CF using the OpenAI-compatible `image_url` content-part shape (base64 data URIs). HTTP image URLs throw a helpful `ConfigurationError` — fetch the image and pass bytes in `image.data`.
+- Attempting `request.images` on a non-vision CF model throws a `ConfigurationError` naming the vision-capable alternatives.
+
 ## [1.2.0] — 2026-04-01
 
 ### Added

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@stackbilt/llm-providers",
-  "version": "1.2.0",
+  "version": "1.3.0",
   "description": "Multi-LLM failover with circuit breakers, cost tracking, and intelligent retry. Cloudflare Workers native.",
   "author": "Stackbilt <admin@stackbilt.dev>",
   "license": "Apache-2.0",

diff --git a/src/__tests__/cloudflare.test.ts b/src/__tests__/cloudflare.test.ts
@@ -271,4 +271,97 @@ describe('CloudflareProvider', () => {
       ]);
     });
   });
+
+  describe('vision', () => {
+    it('advertises vision capability on the provider', () => {
+      expect(provider.supportsVision).toBe(true);
+    });
+
+    it('marks Gemma 4, Llama 4 Scout, and Llama 3.2 Vision as vision-capable', () => {
+      const capabilities = provider.exposeModelCapabilities();
+      expect(capabilities['@cf/google/gemma-4-26b-a4b-it'].supportsVision).toBe(true);
+      expect(capabilities['@cf/meta/llama-4-scout-17b-16e-instruct'].supportsVision).toBe(true);
+      expect(capabilities['@cf/meta/llama-3.2-11b-vision-instruct'].supportsVision).toBe(true);
+    });
+
+    it('attaches images to the last user message as OpenAI image_url parts', async () => {
+      mockAiRun.mockResolvedValueOnce({
+        choices: [{ message: { role: 'assistant', content: 'A ripe tomato.' }, finish_reason: 'stop' }]
+      });
+
+      await provider.generateResponse({
+        model: '@cf/google/gemma-4-26b-a4b-it',
+        messages: [{ role: 'user', content: 'What is in this image?' }],
+        images: [{ data: 'QUJD', mimeType: 'image/png' }],
+        maxTokens: 256
+      });
+
+      const [modelArg, body] = mockAiRun.mock.calls[0];
+      expect(modelArg).toBe('@cf/google/gemma-4-26b-a4b-it');
+      expect(body.messages).toHaveLength(1);
+      const userMsg = body.messages[0];
+      expect(userMsg.role).toBe('user');
+      expect(Array.isArray(userMsg.content)).toBe(true);
+      expect(userMsg.content[0]).toEqual({ type: 'text', text: 'What is in this image?' });
+      expect(userMsg.content[1]).toEqual({
+        type: 'image_url',
+        image_url: { url: 'data:image/png;base64,QUJD' }
+      });
+    });
+
+    it('appends multiple images as separate image_url parts', async () => {
+      mockAiRun.mockResolvedValueOnce({
+        choices: [{ message: { content: 'Two tomatoes.' }, finish_reason: 'stop' }]
+      });
+
+      await provider.generateResponse({
+        model: '@cf/meta/llama-4-scout-17b-16e-instruct',
+        messages: [{ role: 'user', content: 'compare' }],
+        images: [
+          { data: 'QQ==', mimeType: 'image/jpeg' },
+          { data: 'Qg==', mimeType: 'image/jpeg' }
+        ]
+      });
+
+      const [, body] = mockAiRun.mock.calls[0];
+      const content = body.messages[body.messages.length - 1].content;
+      expect(content.filter((p: { type: string }) => p.type === 'image_url')).toHaveLength(2);
+    });
+
+    it('accepts pre-formed data: URLs via image.url', async () => {
+      mockAiRun.mockResolvedValueOnce({
+        choices: [{ message: { content: 'ok' }, finish_reason: 'stop' }]
+      });
+
+      await provider.generateResponse({
+        model: '@cf/meta/llama-3.2-11b-vision-instruct',
+        messages: [{ role: 'user', content: 'x' }],
+        images: [{ url: 'data:image/webp;base64,ZEFUQQ==' }]
+      });
+
+      const [, body] = mockAiRun.mock.calls[0];
+      const imagePart = body.messages[0].content[1];
+      expect(imagePart.image_url.url).toBe('data:image/webp;base64,ZEFUQQ==');
+    });
+
+    it('rejects HTTP image URLs (requires base64 bytes)', async () => {
+      await expect(
+        provider.generateResponse({
+          model: '@cf/google/gemma-4-26b-a4b-it',
+          messages: [{ role: 'user', content: 'x' }],
+          images: [{ url: 'https://example.com/img.jpg' }]
+        })
+      ).rejects.toThrow(/HTTP image URLs are not supported/);
+    });
+
+    it('rejects images on non-vision models with a helpful error', async () => {
+      await expect(
+        provider.generateResponse({
+          model: '@cf/meta/llama-3.1-8b-instruct',
+          messages: [{ role: 'user', content: 'x' }],
+          images: [{ data: 'QUJD', mimeType: 'image/png' }]
+        })
+      ).rejects.toThrow(/does not support image input/);
+    });
+  });
 });
diff --git a/src/factory.ts b/src/factory.ts
@@ -1105,6 +1105,7 @@ export class LLMProviderFactory {
     if (this.config.defaultVisionModel) return this.config.defaultVisionModel;
     if (this.providers.has('anthropic')) return 'claude-haiku-4-5-20251001';
     if (this.providers.has('openai')) return 'gpt-4o-mini';
+    if (this.providers.has('cloudflare')) return '@cf/google/gemma-4-26b-a4b-it';
     return undefined;
   }
 

diff --git a/src/providers/cloudflare.ts b/src/providers/cloudflare.ts
@@ -6,6 +6,7 @@
 import type {
   LLMRequest,
   LLMResponse,
+  LLMImageInput,
   CloudflareConfig,
   ModelCapabilities,
   TokenUsage,
@@ -17,9 +18,15 @@ import {
   ModelNotFoundError
 } from '../errors';
 
+interface CloudflareContentPart {
+  type: 'text' | 'image_url';
+  text?: string;
+  image_url?: { url: string };
+}
+
 interface CloudflareMessage {
   role: 'system' | 'user' | 'assistant' | 'tool';
-  content: string | null;
+  content: string | null | CloudflareContentPart[];
   tool_calls?: ToolCall[];
   tool_call_id?: string;
 }
@@ -94,11 +101,15 @@ export class CloudflareProvider extends BaseProvider {
     '@cf/qwen/qwen1.5-0.5b-chat',
     '@cf/qwen/qwen1.5-1.8b-chat',
     '@cf/qwen/qwen1.5-14b-chat-awq',
-    '@cf/qwen/qwen1.5-7b-chat-awq'
+    '@cf/qwen/qwen1.5-7b-chat-awq',
+    '@cf/google/gemma-4-26b-a4b-it',
+    '@cf/meta/llama-4-scout-17b-16e-instruct',
+    '@cf/meta/llama-3.2-11b-vision-instruct'
   ];
   supportsStreaming = true;
   supportsTools = true;
   supportsBatching = true;
+  supportsVision = true;
 
   private ai: Ai;
   private accountId?: string;
@@ -307,6 +318,38 @@ export class CloudflareProvider extends BaseProvider {
         inputTokenCost: 0.0000001,
         outputTokenCost: 0.0000001,
         description: 'Qwen 1.5 7B - Optimized performance'
+      },
+      '@cf/google/gemma-4-26b-a4b-it': {
+        maxContextLength: 256000,
+        supportsStreaming: true,
+        supportsTools: true,
+        toolCalling: true,
+        supportsVision: true,
+        supportsBatching: true,
+        inputTokenCost: 0.0000001,
+        outputTokenCost: 0.0000003,
+        description: 'Gemma 4 26B — vision + tools + reasoning, 256K context'
+      },
+      '@cf/meta/llama-4-scout-17b-16e-instruct': {
+        maxContextLength: 131000,
+        supportsStreaming: true,
+        supportsTools: true,
+        toolCalling: true,
+        supportsVision: true,
+        supportsBatching: true,
+        inputTokenCost: 0.0000003,
+        outputTokenCost: 0.0000009,
+        description: 'Llama 4 Scout 17B — natively multimodal, tool calling'
+      },
+      '@cf/meta/llama-3.2-11b-vision-instruct': {
+        maxContextLength: 128000,
+        supportsStreaming: true,
+        supportsTools: false,
+        supportsVision: true,
+        supportsBatching: true,
+        inputTokenCost: 0.0000005,
+        outputTokenCost: 0.0000005,
+        description: 'Llama 3.2 11B Vision — image understanding'
       }
     };
   }
@@ -326,6 +369,14 @@ export class CloudflareProvider extends BaseProvider {
       );
     }
 
+    const hasImages = (request.images?.length ?? 0) > 0;
+    if (hasImages && !capabilities?.supportsVision) {
+      throw new ConfigurationError(
+        this.name,
+        `Model '${model}' does not support image input. Use a vision-capable model like @cf/google/gemma-4-26b-a4b-it, @cf/meta/llama-4-scout-17b-16e-instruct, or @cf/meta/llama-3.2-11b-vision-instruct.`
+      );
+    }
+
     const messages: CloudflareMessage[] = [];
     const jsonMode = request.response_format?.type === 'json_object';
     const jsonInstruction = '\n\nYou must respond with valid JSON only. No markdown fences, no commentary, no text outside the JSON.';
@@ -381,6 +432,10 @@ export class CloudflareProvider extends BaseProvider {
       }
     }
 
+    if (hasImages) {
+      this.attachImagesToLastUserMessage(messages, request.images!, model);
+    }
+
     const cloudflareRequest: CloudflareRequest = {
       messages,
       temperature: request.temperature,
@@ -402,6 +457,51 @@ export class CloudflareProvider extends BaseProvider {
     return cloudflareRequest;
   }
 
+  private attachImagesToLastUserMessage(
+    messages: CloudflareMessage[],
+    images: NonNullable<LLMRequest['images']>,
+    model: string
+  ): void {
+    const lastUserIndex = (() => {
+      for (let i = messages.length - 1; i >= 0; i--) {
+        if (messages[i].role === 'user') return i;
+      }
+      return -1;
+    })();
+
+    if (lastUserIndex === -1) {
+      throw new ConfigurationError(
+        this.name,
+        `Vision request must include at least one user message (model: ${model})`
+      );
+    }
+
+    const existing = messages[lastUserIndex].content;
+    const text = typeof existing === 'string' ? existing : '';
+
+    const parts: CloudflareContentPart[] = [{ type: 'text', text }];
+    for (const image of images) {
+      const url = this.buildImageDataUrl(image, model);
+      parts.push({ type: 'image_url', image_url: { url } });
+    }
+
+    messages[lastUserIndex].content = parts;
+  }
+
+  private buildImageDataUrl(image: LLMImageInput, model: string): string {
+    if (image.data) {
+      const mime = image.mimeType ?? 'image/jpeg';
+      return `data:${mime};base64,${image.data}`;
+    }
+    if (image.url?.startsWith('data:')) {
+      return image.url;
+    }
+    throw new ConfigurationError(
+      this.name,
+      `Cloudflare vision models (${model}) require base64 image data or a data: URL. HTTP image URLs are not supported — fetch the image and pass bytes in image.data.`
+    );
+  }
+
   private formatResponse(
     result: WorkersAIResult,
     model: string,