Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/agent-core/src/tools/builtin/web/fetch-url.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Fetch content from a URL. Returns the main text content extracted from the page. Use this when you need to read a specific web page.
Fetch content from a URL. Returns the main text content extracted from the page, or the image if the URL points to an image file. Use this when you need to read a specific web page or view an image from the web.

Only public `http`/`https` URLs are supported. Requests to private, loopback, or link-local addresses are refused, and responses larger than 10 MiB are rejected.
43 changes: 39 additions & 4 deletions packages/agent-core/src/tools/builtin/web/fetch-url.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
* should not be registered (not exposed to the LLM).
*/

import type { ContentPart } from '@moonshot-ai/kosong';
import { z } from 'zod';

import type { BuiltinTool } from '../../../agent/tool';
import { ToolAccesses } from '../../../loop/tool-access';
import type { ExecutableToolContext, ExecutableToolResult, ToolExecution } from '../../../loop/types';
import { sniffImageDimensions } from '../../support/file-type';

Check failure on line 15 in packages/agent-core/src/tools/builtin/web/fetch-url.ts

View workflow job for this annotation

GitHub Actions / lint

eslint(no-unused-vars)

Identifier 'sniffImageDimensions' is imported but never used.
import { toInputJsonSchema } from '../../support/input-schema';
import { literalRulePattern, matchesGlobRuleSubject } from '../../support/rule-match';
import { ToolResultBuilder } from '../../support/result-builder';
Expand All @@ -26,13 +28,24 @@
* - `extracted` — the body was an HTML page; only the main article text
* was extracted and returned.
*/
export type UrlFetchKind = 'passthrough' | 'extracted';
export type UrlFetchKind = 'passthrough' | 'extracted' | 'image';

export interface UrlFetchImage {
/** Base64-encoded image data. */
data: string;
/** Image MIME type (e.g. image/png). */
mimeType: string;
/** Original pixel dimensions, if detectable. */
dimensions: { width: number; height: number } | null;
}

export interface UrlFetchResult {
/** The text handed to the LLM. */
/** The text handed to the LLM (for text/HTML responses). */
content: string;
/** Whether `content` is a verbatim passthrough or extracted main text. */
/** Whether content is verbatim, extracted, or this is an image response. */
kind: UrlFetchKind;
/** Image data when kind === 'image'. */
image?: UrlFetchImage | undefined;
}

export interface UrlFetcher {
Expand Down Expand Up @@ -89,7 +102,29 @@
}: ExecutableToolContext,
): Promise<ExecutableToolResult> {
try {
const { content, kind } = await this.fetcher.fetch(args.url, { toolCallId });
const { content, kind, image } = await this.fetcher.fetch(args.url, { toolCallId });

if (kind === 'image' && image !== undefined) {
const output: ContentPart[] = [
{
type: 'text',
text: `<system>Fetched image from ${args.url}. Mime type: ${image.mimeType}. Original dimensions: ${image.dimensions ? `${image.dimensions.width}x${image.dimensions.height}` : 'unknown'} pixels.</system>`,
},
{
type: 'text',
text: `<image url="${args.url}">`,
},
{
type: 'image_url',
imageUrl: { url: `data:${image.mimeType};base64,${image.data}` },
Comment on lines +118 to +119

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Gate fetched images on image_in capability

I checked the built-in tool registration: ReadMediaFile is gated on modelCapabilities.image_in || modelCapabilities.video_in in packages/agent-core/src/agent/tool/index.ts, while FetchURL is still registered unconditionally whenever a urlFetcher exists. With a text-only model (image_in: false), fetching any image/* URL now appends this image_url part to the conversation, so the next provider request can be rejected by models that do not accept image input instead of returning an actionable tool error. Please gate this image result on the current model capability or degrade it to a text-only/error result for non-vision models.

Useful? React with 👍 / 👎.

},
{
type: 'text',
text: '</image>',
},
];
return { output, isError: false };
}

if (!content) {
return {
Expand Down
22 changes: 21 additions & 1 deletion packages/agent-core/src/tools/providers/local-fetch-url.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import { Readability } from '@mozilla/readability';
import { parseHTML as rawParseHTML } from 'linkedom';

import { HttpFetchError, type UrlFetcher, type UrlFetchResult } from '../builtin';
import { sniffImageDimensions } from '../support/file-type';

// Readability's .d.ts references the global `Document` type, but this
// package compiles with `lib: ES2023` (no DOM). Extracting the
Expand Down Expand Up @@ -172,6 +173,26 @@ export class LocalFetchURLProvider implements UrlFetcher {
}
}

const contentType = (response.headers.get('content-type') ?? '').toLowerCase();

// Image responses — read binary data and return as base64.
if (contentType.startsWith('image/')) {
const arrayBuffer = await response.arrayBuffer();
const data = Buffer.from(arrayBuffer);
if (data.length > this.maxBytes) {
throw new Error(
`Image too large: ${String(data.length)} bytes exceeds maxBytes (${String(this.maxBytes)}).`,
);
}
const base64 = data.toString('base64');
const dimensions = sniffImageDimensions(data);
return {
content: '',
kind: 'image',
image: { data: base64, mimeType: contentType.split(';')[0]!.trim(), dimensions },
};
}

const body = await response.text();

// Servers may omit content-length — measure again defensively.
Expand All @@ -182,7 +203,6 @@ export class LocalFetchURLProvider implements UrlFetcher {
);
}

const contentType = (response.headers.get('content-type') ?? '').toLowerCase();
if (contentType.startsWith('text/plain') || contentType.startsWith('text/markdown')) {
return { content: body, kind: 'passthrough' };
}
Expand Down
63 changes: 63 additions & 0 deletions packages/agent-core/test/tools/fetch-url.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,69 @@ describe('FetchURLTool', () => {
const message = (result as { message?: string }).message ?? '';
expect(message).toContain('full response body');
});

it('returns image_url ContentPart array when fetcher returns an image', async () => {
const fetcher: UrlFetcher = {
fetch: vi.fn().mockResolvedValue({
content: '',
kind: 'image',
image: {
data: 'aGVsbG8=',
mimeType: 'image/png',
dimensions: { width: 100, height: 200 },
},
}),
};
const tool = new FetchURLTool(fetcher);

const result = await executeTool(tool, {
turnId: 't1',
toolCallId: 'c_img',
args: { url: 'https://example.com/chart.png' },
signal,
});

expect(result.isError).toBe(false);
expect(Array.isArray(result.output)).toBe(true);
const parts = result.output as Array<{ type: string; text?: string; imageUrl?: { url: string } }>;
expect(parts.length).toBe(4);
expect(parts[0]!.type).toBe('text');
expect(parts[0]!.text).toContain('Fetched image');
expect(parts[0]!.text).toContain('image/png');
expect(parts[0]!.text).toContain('100x200');
expect(parts[1]!.type).toBe('text');
expect(parts[1]!.text).toBe('<image url="https://example.com/chart.png">');
expect(parts[2]!.type).toBe('image_url');
expect(parts[2]!.imageUrl!.url).toBe('data:image/png;base64,aGVsbG8=');
expect(parts[3]!.type).toBe('text');
expect(parts[3]!.text).toBe('</image>');
});

it('returns image without dimensions when fetcher returns null dimensions', async () => {
const fetcher: UrlFetcher = {
fetch: vi.fn().mockResolvedValue({
content: '',
kind: 'image',
image: {
data: 'aGVsbG8=',
mimeType: 'image/webp',
dimensions: null,
},
}),
};
const tool = new FetchURLTool(fetcher);

const result = await executeTool(tool, {
turnId: 't1',
toolCallId: 'c_img2',
args: { url: 'https://example.com/photo.webp' },
signal,
});

expect(result.isError).toBe(false);
const parts = result.output as Array<{ type: string; text?: string }>;
expect(parts[0]!.text).toContain('unknown');
});
});

describe('MoonshotFetchURLProvider', () => {
Expand Down
67 changes: 67 additions & 0 deletions packages/agent-core/test/tools/providers/local-fetch-url.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,35 @@ function htmlResponse(body: string, contentType: string): Response {
});
}

function imageResponse(data: Buffer, contentType: string): Response {
return new Response(data, {
status: 200,
headers: { 'content-type': contentType },
});
}

/** Build a minimal 2x3 PNG (IHDR + IDAT + IEND) for dimension sniffing. */
function tinyPng(): Buffer {
// PNG signature
const sig = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]);
// IHDR chunk: width=2 (big-endian), height=3, bitDepth=8, colorType=2, compression=0, filter=0, interlace=0
const ihdrData = Buffer.from([
0x00, 0x00, 0x00, 0x02, // width
0x00, 0x00, 0x00, 0x03, // height
0x08, 0x02, 0x00, 0x00, 0x00, // bitDepth, colorType, compression, filter, interlace
]);
const ihdrLen = Buffer.from([0x00, 0x00, 0x00, 0x0d]); // 13 bytes
const ihdrType = Buffer.from('IHDR');
const ihdrCrc = Buffer.from([0x00, 0x00, 0x00, 0x00]); // fake CRC for test
const ihdr = Buffer.concat([ihdrLen, ihdrType, ihdrData, ihdrCrc]);
// IEND chunk
const iendLen = Buffer.from([0x00, 0x00, 0x00, 0x00]);
const iendType = Buffer.from('IEND');
const iendCrc = Buffer.from([0x00, 0x00, 0x00, 0x00]); // fake CRC
const iend = Buffer.concat([iendLen, iendType, iendCrc]);
return Buffer.concat([sig, ihdr, iend]);
}

describe('LocalFetchURLProvider content kind', () => {
it('reports text/plain bodies as a verbatim passthrough', async () => {
const fetchImpl = vi
Expand Down Expand Up @@ -55,4 +84,42 @@ describe('LocalFetchURLProvider content kind', () => {
expect(result.kind).toBe('extracted');
expect(result.content).toContain('quick brown fox');
});

it('reports image responses as base64 with kind image and dimensions', async () => {
const png = tinyPng();
const fetchImpl = vi
.fn<typeof fetch>()
.mockResolvedValue(imageResponse(png, 'image/png'));
const provider = new LocalFetchURLProvider({ fetchImpl });

const result = await provider.fetch('https://example.com/img.png');

expect(result.kind).toBe('image');
expect(result.image).toBeDefined();
expect(result.image!.mimeType).toBe('image/png');
expect(result.image!.data).toBe(png.toString('base64'));
expect(result.image!.dimensions).toEqual({ width: 2, height: 3 });
});

it('reports image/jpeg responses as base64 with kind image', async () => {
// Minimal JPEG: SOI + APP0 (JFIF) + SOF0 + DHT + SOS + EOI
const jpeg = Buffer.from([
0xff, 0xd8, // SOI
0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01, // APP0 JFIF
0x01, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00,
0xff, 0xc0, 0x00, 0x0b, 0x08, 0x00, 0x01, 0x00, 0x01, 0x01, 0x01, 0x11, 0x00, // SOF0 1x1
0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00, // SOS
0xff, 0xd9, // EOI
]);
const fetchImpl = vi
.fn<typeof fetch>()
.mockResolvedValue(imageResponse(jpeg, 'image/jpeg; charset=utf-8'));
const provider = new LocalFetchURLProvider({ fetchImpl });

const result = await provider.fetch('https://example.com/img.jpg');

expect(result.kind).toBe('image');
expect(result.image!.mimeType).toBe('image/jpeg');
expect(result.image!.dimensions).toEqual({ width: 1, height: 1 });
});
});
Loading