Skip to content

Commit aa94c61

Browse files
authored
Add read_url tool for web research (#719)
1 parent ae86981 commit aa94c61

17 files changed

Lines changed: 870 additions & 8 deletions

File tree

agents/context-pruner.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,10 @@ const definition: AgentDefinition = {
307307
? `web search for "${query}"`
308308
: 'web search'
309309
}
310+
case 'read_url': {
311+
const url = input.url as string | undefined
312+
return url ? `read URL: ${url}` : 'read a URL'
313+
}
310314
case 'gravity_index': {
311315
const query = input.query as string | undefined
312316
const action = input.action as string | undefined

agents/researcher/researcher-web.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,17 @@ const definition: SecretAgentDefinition = {
1616
},
1717
outputMode: 'last_message',
1818
includeMessageHistory: false,
19-
toolNames: ['web_search', 'run_terminal_command'],
19+
toolNames: ['web_search', 'read_url'],
2020
spawnableAgents: [],
2121

22-
systemPrompt: `You are an expert researcher who can search the web to find relevant information. Your goal is to answer the user's question from current search results and any useful source pages. Use web_search to get Serper JSON search results. Use run_terminal_command with tools like curl to fetch web pages that would help answer the user's question.`,
22+
systemPrompt: `You are an expert researcher who can search the web to find relevant information. Your goal is to answer the user's question from current search results and useful source pages. Use web_search to get Serper JSON search results. Use read_url to fetch and extract readable text from pages that would help answer the user's question.`,
2323
instructionsPrompt: `Provide comprehensive research on the user's prompt.
2424
2525
Use web_search to find current information. The tool returns JSON search results, so inspect the titles, links, snippets, answer boxes, and related results before deciding what to fetch next.
2626
27-
Use run_terminal_command to fetch any web page that would help answer the user's question. Prefer targeted, relevant pages from the search results. Avoid fetching pages that are unlikely to add useful evidence.
27+
Use read_url to fetch any web page that would help answer the user's question. Prefer targeted, relevant pages from the search results, especially official or primary sources. Avoid fetching pages that are unlikely to add useful evidence.
28+
29+
If read_url cannot handle a source, choose a different result or explain the limitation.
2830
2931
Then, write up a concise answer that includes key findings for the user's prompt and cites source URLs when useful.
3032
`.trim(),

agents/types/agent-definition.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ export type TerminalTools = 'run_terminal_command' | 'code_search'
345345
/**
346346
* Web and browser tools
347347
*/
348-
export type WebTools = 'web_search' | 'read_docs'
348+
export type WebTools = 'web_search' | 'read_docs' | 'read_url'
349349

350350
/**
351351
* Agent management tools

agents/types/tools.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ export type ToolName =
1717
| 'read_docs'
1818
| 'read_files'
1919
| 'read_subtree'
20+
| 'read_url'
2021
| 'render_ui'
2122
| 'run_file_change_hooks'
2223
| 'run_terminal_command'
@@ -51,6 +52,7 @@ export interface ToolParamsMap {
5152
read_docs: ReadDocsParams
5253
read_files: ReadFilesParams
5354
read_subtree: ReadSubtreeParams
55+
read_url: ReadUrlParams
5456
render_ui: RenderUiParams
5557
run_file_change_hooks: RunFileChangeHooksParams
5658
run_terminal_command: RunTerminalCommandParams
@@ -276,6 +278,16 @@ export interface ReadSubtreeParams {
276278
maxTokens?: number
277279
}
278280

281+
/**
282+
* Fetch a URL and extract readable text from the page.
283+
*/
284+
export interface ReadUrlParams {
285+
/** The full http:// or https:// URL to fetch and extract readable text from. */
286+
url: string
287+
/** Maximum number of extracted text characters to return. Defaults to 20000. */
288+
max_chars?: number
289+
}
290+
279291
/**
280292
* Render a small interactive UI widget in the Codebuff CLI. Currently supports a button that opens a link.
281293
*/

common/src/templates/initial-agents-dir/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ export default {
132132
### Web & Research
133133

134134
- **`web_search`**: Search the internet for information
135+
- **`read_url`**: Fetch a URL and extract readable page text
135136
- **`read_docs`**: Read technical documentation
136137
- **`browser_logs`**: Navigate and inspect web pages
137138

common/src/templates/initial-agents-dir/types/agent-definition.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ export type TerminalTools = 'run_terminal_command' | 'code_search'
345345
/**
346346
* Web and browser tools
347347
*/
348-
export type WebTools = 'web_search' | 'read_docs'
348+
export type WebTools = 'web_search' | 'read_docs' | 'read_url'
349349

350350
/**
351351
* Agent management tools

common/src/templates/initial-agents-dir/types/tools.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ export type ToolName =
1717
| 'read_docs'
1818
| 'read_files'
1919
| 'read_subtree'
20+
| 'read_url'
2021
| 'render_ui'
2122
| 'run_file_change_hooks'
2223
| 'run_terminal_command'
@@ -51,6 +52,7 @@ export interface ToolParamsMap {
5152
read_docs: ReadDocsParams
5253
read_files: ReadFilesParams
5354
read_subtree: ReadSubtreeParams
55+
read_url: ReadUrlParams
5456
render_ui: RenderUiParams
5557
run_file_change_hooks: RunFileChangeHooksParams
5658
run_terminal_command: RunTerminalCommandParams
@@ -276,6 +278,16 @@ export interface ReadSubtreeParams {
276278
maxTokens?: number
277279
}
278280

281+
/**
282+
* Fetch a URL and extract readable text from the page.
283+
*/
284+
export interface ReadUrlParams {
285+
/** The full http:// or https:// URL to fetch and extract readable text from. */
286+
url: string
287+
/** Maximum number of extracted text characters to return. Defaults to 20000. */
288+
max_chars?: number
289+
}
290+
279291
/**
280292
* Render a small interactive UI widget in the Codebuff CLI. Currently supports a button that opens a link.
281293
*/

common/src/tools/constants.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ export const toolNames = [
3939
'read_docs',
4040
'read_files',
4141
'read_subtree',
42+
'read_url',
4243
'render_ui',
4344
'run_file_change_hooks',
4445
'run_terminal_command',
@@ -73,6 +74,7 @@ export const publishedTools = [
7374
'read_docs',
7475
'read_files',
7576
'read_subtree',
77+
'read_url',
7678
'render_ui',
7779
'run_file_change_hooks',
7880
'run_terminal_command',

common/src/tools/list.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import { proposeWriteFileParams } from './params/tool/propose-write-file'
1919
import { readDocsParams } from './params/tool/read-docs'
2020
import { readFilesParams } from './params/tool/read-files'
2121
import { readSubtreeParams } from './params/tool/read-subtree'
22+
import { readUrlParams } from './params/tool/read-url'
2223
import { renderUIParams } from './params/tool/render-ui'
2324
import { runFileChangeHooksParams } from './params/tool/run-file-change-hooks'
2425
import { runTerminalCommandParams } from './params/tool/run-terminal-command'
@@ -59,6 +60,7 @@ export const toolParams = {
5960
read_docs: readDocsParams,
6061
read_files: readFilesParams,
6162
read_subtree: readSubtreeParams,
63+
read_url: readUrlParams,
6264
render_ui: renderUIParams,
6365
run_file_change_hooks: runFileChangeHooksParams,
6466
run_terminal_command: runTerminalCommandParams,
@@ -131,6 +133,10 @@ export const clientToolCallSchema = z.discriminatedUnion('toolName', [
131133
toolName: z.literal('run_file_change_hooks'),
132134
input: toolParams.run_file_change_hooks.inputSchema,
133135
}),
136+
z.object({
137+
toolName: z.literal('read_url'),
138+
input: toolParams.read_url.inputSchema,
139+
}),
134140
z.object({
135141
toolName: z.literal('run_terminal_command'),
136142
input: toolParams.run_terminal_command.inputSchema.and(
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import z from 'zod/v4'
2+
3+
import { $getNativeToolCallExampleString, jsonToolResultSchema } from '../utils'
4+
5+
import type { $ToolParams } from '../../constants'
6+
7+
const toolName = 'read_url'
8+
const endsAgentStep = true
9+
const inputSchema = z
10+
.object({
11+
url: z
12+
.url()
13+
.refine((value) => {
14+
try {
15+
const parsedUrl = new URL(value)
16+
return (
17+
parsedUrl.protocol === 'http:' || parsedUrl.protocol === 'https:'
18+
)
19+
} catch {
20+
return false
21+
}
22+
}, 'URL must use http:// or https://')
23+
.describe(
24+
'The full http:// or https:// URL to fetch and extract readable text from.',
25+
),
26+
max_chars: z
27+
.number()
28+
.int()
29+
.min(1_000)
30+
.max(50_000)
31+
.default(20_000)
32+
.optional()
33+
.describe(
34+
'Maximum number of extracted text characters to return. Defaults to 20000.',
35+
),
36+
})
37+
.describe('Fetch a URL and extract readable text from the page.')
38+
39+
const description = `
40+
Purpose: Fetch a URL returned by web_search and extract the readable page text so you can answer with source-backed evidence.
41+
42+
Use this after web_search when snippets are not enough. Prefer authoritative, relevant pages from the search results. The tool follows redirects, extracts titles and metadata, strips scripts/styles/navigation boilerplate from HTML, and returns normalized readable text.
43+
44+
Do not use run_terminal_command with curl just to inspect web pages; use read_url instead. If read_url reports unsupported content or extraction failure, then choose a different search result or explain the limitation.
45+
46+
Example:
47+
${$getNativeToolCallExampleString({
48+
toolName,
49+
inputSchema,
50+
input: {
51+
url: 'https://react.dev/reference/react/useActionState',
52+
max_chars: 12000,
53+
},
54+
endsAgentStep,
55+
})}
56+
`.trim()
57+
58+
export const readUrlParams = {
59+
toolName,
60+
endsAgentStep,
61+
description,
62+
inputSchema,
63+
outputSchema: jsonToolResultSchema(
64+
z.union([
65+
z.object({
66+
url: z.string(),
67+
finalUrl: z.string(),
68+
status: z.number(),
69+
contentType: z.string().optional(),
70+
title: z.string().optional(),
71+
description: z.string().optional(),
72+
text: z.string(),
73+
truncated: z.boolean(),
74+
}),
75+
z.object({
76+
url: z.string().optional(),
77+
errorMessage: z.string(),
78+
}),
79+
]),
80+
),
81+
} satisfies $ToolParams

0 commit comments

Comments
 (0)