From 739da290054e0c3f8e1bc76f4a5822aa8c969440 Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Wed, 17 Jun 2026 15:07:02 -0500 Subject: [PATCH 01/11] feat: add file upload/download support to browserless_agent --- CHANGELOG.md | 3 + src/@types/types.d.ts | 3 +- src/index.ts | 62 ++++------- src/lib/download-store.ts | 86 +++++++++++++++ src/lib/http-auth.ts | 60 +++++++++++ src/resources/downloads.ts | 32 ++++++ src/resources/upload-route.ts | 82 ++++++++++++++ src/skills/file-transfers.md | 70 ++++++++++++ src/skills/index.ts | 12 +++ src/skills/system-prompt.ts | 13 ++- src/tools/agent.ts | 194 ++++++++++++++++++++++++++++++++++ src/tools/schemas.ts | 64 +++++++++++ test/lib/http-auth.spec.ts | 54 ++++++++++ test/skills/skills.spec.ts | 33 +++++- test/tools/agent.spec.ts | 162 ++++++++++++++++++++++++++++ 15 files changed, 883 insertions(+), 47 deletions(-) create mode 100644 src/lib/download-store.ts create mode 100644 src/lib/http-auth.ts create mode 100644 src/resources/downloads.ts create mode 100644 src/resources/upload-route.ts create mode 100644 src/skills/file-transfers.md create mode 100644 test/lib/http-auth.spec.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a76921..53b18a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ ## Latest +- Add file upload/download support to `browserless_agent` via the `uploadFile` and `getDownloads` commands, plus a `file-transfers` skill. Downloads are persisted to the MCP server's filesystem and returned as handles — a path in stdio mode, a `browserless-download://` resource link in HTTP mode — so large base64 payloads never pass through the conversation. `uploadFile` accepts a `handle` (re-upload a downloaded file in either transport), a local `path` (stdio), or base64 `content`. Honors the server-side 10MB/50MB transfer cap. +- Add a `POST /upload` HTTP endpoint (httpStream transport) for staging a local file into the temp store out-of-band: `curl -F file=@path "/upload?token="` returns a handle for `uploadFile`, so HTTP-mode uploads never base64 through the conversation. Token-gated (same rules as the MCP surface); staged files share the 15-minute TTL store. + ## v1.6.1 Drop vestigial mcp-proxy postinstall patch that broke `npm install` in consumers diff --git a/src/@types/types.d.ts b/src/@types/types.d.ts index c375faf..c10e622 100644 --- a/src/@types/types.d.ts +++ b/src/@types/types.d.ts @@ -217,7 +217,8 @@ export type SkillId = | 'screenshots' | 'tabs' | 'autonomous-login' - | 'auth-profile'; + | 'auth-profile' + | 'file-transfers'; export interface DetectContext { snapshot?: SnapshotResult; diff --git a/src/index.ts b/src/index.ts index 7748da5..180c4e6 100644 --- a/src/index.ts +++ b/src/index.ts @@ -17,13 +17,13 @@ import { registerCrawlTool } from './tools/crawl.js'; import { registerPerformanceTool } from './tools/performance.js'; import { registerApiDocsResource } from './resources/api-docs.js'; import { registerStatusResource } from './resources/status.js'; +import { registerDownloadResources } from './resources/downloads.js'; +import { registerUploadRoute } from './resources/upload-route.js'; import { registerScrapeUrlPrompt } from './prompts/scrape-url.js'; import { registerExtractContentPrompt } from './prompts/extract-content.js'; import { AnalyticsHelper } from './lib/analytics.js'; -import { - resolveApiKey, - installSupabaseTokenTtlPatch, -} from './lib/account-resolver.js'; +import { installSupabaseTokenTtlPatch } from './lib/account-resolver.js'; +import { resolveBrowserlessAuth } from './lib/http-auth.js'; import { BoundedEventStore } from './lib/bounded-event-store.js'; import { RedisOAuthProxy } from './lib/redis-oauth-proxy.js'; import { Redis } from 'ioredis'; @@ -107,45 +107,17 @@ const hybridAuthenticate = config.transport === 'httpStream' ? async (request: IncomingMessage) => { const params = new URLSearchParams(request.url?.split('?')[1] ?? ''); - const authHeader = request.headers.authorization as string | undefined; - const headerToken = authHeader?.startsWith('Bearer ') - ? authHeader.slice(7) - : authHeader; - - const apiUrl = - (request.headers['x-browserless-api-url'] as string) ?? - params.get('browserlessUrl') ?? - config.browserlessApiUrl; - - // JWTs have 3 dot-separated base64url segments; plain API keys do not. - const isJwt = headerToken ? headerToken.split('.').length === 3 : false; - - // 1. Authorization header with plain API key - if (headerToken && !isJwt) { - return { token: headerToken, apiUrl } as BrowserlessSession; - } - - // 2. ?token= query param - const directToken = params.get('token') || undefined; - if (directToken) { - return { token: directToken, apiUrl } as BrowserlessSession; - } - - // 3. Authorization header with JWT → decode Supabase token directly - if (isJwt && headerToken) { - const { apiKey } = await resolveApiKey( - config.supabaseUrl, - config.supabaseServiceRoleKey, - headerToken, - ); - return { token: apiKey, apiUrl } as BrowserlessSession; - } - - throw new Error( - 'No Browserless API token provided. ' + - 'Pass it as Authorization: Bearer header, ' + - '?token= query parameter, or authenticate via OAuth.', - ); + return (await resolveBrowserlessAuth( + { + authHeader: request.headers.authorization as string | undefined, + tokenQuery: params.get('token') || undefined, + apiUrlHeader: request.headers['x-browserless-api-url'] as + | string + | undefined, + browserlessUrlQuery: params.get('browserlessUrl') || undefined, + }, + config, + )) as BrowserlessSession; } : undefined; @@ -167,6 +139,7 @@ registerCrawlTool(server, config, analytics); registerPerformanceTool(server, config, analytics); registerApiDocsResource(server, config); registerStatusResource(server, config); +registerDownloadResources(server); registerScrapeUrlPrompt(server); registerExtractContentPrompt(server); @@ -190,6 +163,9 @@ if (config.transport === 'httpStream') { stateless: false, }, }); + // Out-of-band file staging for uploads (the LLM curls a file here and gets a + // handle, instead of base64-ing it through the conversation). httpStream only. + registerUploadRoute(server, config); console.error( `[browserless-mcp] HTTP Streamable server listening on port ${config.port}`, ); diff --git a/src/lib/download-store.ts b/src/lib/download-store.ts new file mode 100644 index 0000000..23f91cc --- /dev/null +++ b/src/lib/download-store.ts @@ -0,0 +1,86 @@ +import { mkdir, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { basename, join } from 'node:path'; + +/** + * A captured download persisted to the MCP server's filesystem. The base64 + * payload never re-enters the model's context: getDownloads returns a handle + * (a path in stdio mode, a `browserless-download://` URI in HTTP mode) and the + * bytes are read back from disk only when actually needed — by a resource read + * (HTTP) or an uploadFile that references the handle. + */ +export interface StoredDownload { + id: string; + path: string; + filename: string; + mimeType: string; + size: number; +} + +export const DOWNLOAD_URI_SCHEME = 'browserless-download'; + +// Temp files are short-lived: a stored file is dropped (registry entry + bytes +// on disk) after this idle window. Re-download/re-stage if it's needed later. +const TTL_MS = 15 * 60 * 1000; + +const store = new Map(); +let counter = 0; + +// Where captured downloads land on the MCP server. Defaults to a temp dir; +// override with BROWSERLESS_DOWNLOAD_DIR (e.g. a stable downloads folder in +// local/stdio setups where the user wants to keep the files). +const downloadsDir = (): string => + process.env.BROWSERLESS_DOWNLOAD_DIR || + join(tmpdir(), 'browserless-mcp-downloads'); + +/** Build the resource URI for a stored download id. */ +export const downloadUri = (id: string): string => + `${DOWNLOAD_URI_SCHEME}://${id}`; + +/** Persist bytes to disk and register them under a fresh handle. */ +export const storeDownload = async ( + filename: string, + mimeType: string, + data: Buffer, +): Promise => { + const dir = downloadsDir(); + await mkdir(dir, { recursive: true }); + counter += 1; + const id = `${Date.now().toString(36)}-${counter}`; + const safe = basename(filename) || 'download'; + // Prefix with the id so downloads that share a filename don't collide. + const path = join(dir, `${id}-${safe}`); + await writeFile(path, data); + const record: StoredDownload = { + id, + path, + filename: safe, + mimeType, + size: data.byteLength, + }; + store.set(id, record); + + // Expire after the TTL so temp files don't accumulate. unref() keeps the + // timer from holding the process open. + const timer = setTimeout(() => { + store.delete(id); + void rm(path, { force: true }).catch(() => {}); + }, TTL_MS); + timer.unref?.(); + + return record; +}; + +/** + * Resolve a handle to a stored download. Accepts a raw id, a + * `browserless-download://` URI, or (for convenience) the absolute path of + * a previously stored download. + */ +export const getDownload = (handle: string): StoredDownload | undefined => { + if (handle.startsWith(`${DOWNLOAD_URI_SCHEME}://`)) { + return store.get(handle.slice(`${DOWNLOAD_URI_SCHEME}://`.length)); + } + const byId = store.get(handle); + if (byId) return byId; + return [...store.values()].find((r) => r.path === handle); +}; diff --git a/src/lib/http-auth.ts b/src/lib/http-auth.ts new file mode 100644 index 0000000..0f32a07 --- /dev/null +++ b/src/lib/http-auth.ts @@ -0,0 +1,60 @@ +import { resolveApiKey } from './account-resolver.js'; +import type { McpConfig } from '../@types/types.js'; + +export interface ResolvedBrowserlessAuth { + token: string; + apiUrl: string; +} + +export interface AuthInput { + authHeader?: string; + tokenQuery?: string; + apiUrlHeader?: string; + browserlessUrlQuery?: string; +} + +/** + * Resolve a Browserless API token from an inbound HTTP request, in order: + * (1) Authorization header with a plain API key, (2) `?token=` query param, + * (3) Authorization header with a Supabase JWT → resolved via PostgREST. + * Throws when none is present/valid. Shared by the FastMCP `authenticate` + * callback and the custom `/upload` route so both gate on the same rules. + */ +export const resolveBrowserlessAuth = async ( + input: AuthInput, + config: Pick< + McpConfig, + 'browserlessApiUrl' | 'supabaseUrl' | 'supabaseServiceRoleKey' + >, +): Promise => { + const headerToken = input.authHeader?.startsWith('Bearer ') + ? input.authHeader.slice(7) + : input.authHeader; + + const apiUrl = + input.apiUrlHeader ?? input.browserlessUrlQuery ?? config.browserlessApiUrl; + + // JWTs have 3 dot-separated base64url segments; plain API keys do not. + const isJwt = headerToken ? headerToken.split('.').length === 3 : false; + + if (headerToken && !isJwt) { + return { token: headerToken, apiUrl }; + } + if (input.tokenQuery) { + return { token: input.tokenQuery, apiUrl }; + } + if (isJwt && headerToken) { + const { apiKey } = await resolveApiKey( + config.supabaseUrl, + config.supabaseServiceRoleKey, + headerToken, + ); + return { token: apiKey, apiUrl }; + } + + throw new Error( + 'No Browserless API token provided. ' + + 'Pass it as Authorization: Bearer header, ' + + '?token= query parameter, or authenticate via OAuth.', + ); +}; diff --git a/src/resources/downloads.ts b/src/resources/downloads.ts new file mode 100644 index 0000000..9217cb2 --- /dev/null +++ b/src/resources/downloads.ts @@ -0,0 +1,32 @@ +import { FastMCP, UserError } from 'fastmcp'; +import { readFile } from 'node:fs/promises'; +import { getDownload } from '../lib/download-store.js'; + +/** + * Exposes captured downloads as readable MCP resources. getDownloads returns a + * `browserless-download://` link (HTTP mode); the client reads the bytes on + * demand here — so the base64 payload stays out of the model's context until a + * consumer genuinely asks for it. + */ +export function registerDownloadResources(server: FastMCP): void { + server.addResourceTemplate({ + uriTemplate: 'browserless-download://{id}', + name: 'Browserless Download', + mimeType: 'application/octet-stream', + arguments: [ + { + name: 'id', + description: 'The download handle id returned by getDownloads.', + required: true, + }, + ], + async load({ id }) { + const record = getDownload(id); + if (!record) { + throw new UserError(`Unknown download handle: ${id}`); + } + const data = await readFile(record.path); + return { blob: data.toString('base64'), mimeType: record.mimeType }; + }, + }); +} diff --git a/src/resources/upload-route.ts b/src/resources/upload-route.ts new file mode 100644 index 0000000..3b2f7d4 --- /dev/null +++ b/src/resources/upload-route.ts @@ -0,0 +1,82 @@ +import type { FastMCP } from 'fastmcp'; +import { downloadUri, storeDownload } from '../lib/download-store.js'; +import { resolveBrowserlessAuth } from '../lib/http-auth.js'; +import type { McpConfig } from '../@types/types.js'; + +// Hard ceiling on a single staged upload (mirrors the transfer cap). +const MAX_UPLOAD_BYTES = 50 * 1024 * 1024; + +/** + * Registers `POST /upload` on the HTTP-stream server. Clients (e.g. an LLM with + * shell access) push a file's bytes here once — over plain HTTP, NOT through the + * conversation — and get back a handle they pass to the agent's `uploadFile`: + * + * curl -s -F file=@/path/to/file "/upload?token=" + * → { "ok": true, "handle": "browserless-download://", ... } + * + * Requires the same Browserless token as the MCP surface (?token= or + * Authorization: Bearer). The handle resolves against the shared temp-file + * store (15-min TTL), so the + * base64 payload never enters the model's context. Only meaningful for the + * httpStream transport; in stdio mode `uploadFile { path }` reads files directly. + */ +export function registerUploadRoute(server: FastMCP, config: McpConfig): void { + const app = server.getApp(); + + app.post('/upload', async (c) => { + // Raw Hono routes bypass FastMCP's authenticate, so gate the route on the + // same Browserless token rules as the MCP surface — no anonymous drops. + try { + await resolveBrowserlessAuth( + { + authHeader: c.req.header('authorization'), + tokenQuery: c.req.query('token'), + apiUrlHeader: c.req.header('x-browserless-api-url'), + browserlessUrlQuery: c.req.query('browserlessUrl'), + }, + config, + ); + } catch { + return c.json({ ok: false, error: 'Unauthorized' }, 401); + } + + let file: unknown; + try { + const body = await c.req.parseBody(); + file = body.file; + } catch { + return c.json( + { ok: false, error: 'Expected multipart/form-data with a "file" field' }, + 400, + ); + } + + if (!(file instanceof File)) { + return c.json( + { ok: false, error: 'Missing multipart "file" field (use -F file=@path)' }, + 400, + ); + } + + const buf = Buffer.from(await file.arrayBuffer()); + if (buf.byteLength > MAX_UPLOAD_BYTES) { + return c.json( + { ok: false, error: 'FileTooLarge', maxBytes: MAX_UPLOAD_BYTES }, + 413, + ); + } + + const record = await storeDownload( + file.name || 'upload', + file.type || 'application/octet-stream', + buf, + ); + return c.json({ + ok: true, + handle: downloadUri(record.id), + filename: record.filename, + mimeType: record.mimeType, + size: record.size, + }); + }); +} diff --git a/src/skills/file-transfers.md b/src/skills/file-transfers.md new file mode 100644 index 0000000..dead597 --- /dev/null +++ b/src/skills/file-transfers.md @@ -0,0 +1,70 @@ +# File Uploads & Downloads + +Transferring files to/from the browser. Two methods: `uploadFile` (attach files to an ``) and `getDownloads` (retrieve files Chrome downloaded). + +**Key idea — never move bytes through this conversation.** Large files as base64 blow up the context. So downloads come back as a *handle* (a path or a `browserless-download://` URI), and uploads take that handle (or a local path) instead of base64. The MCP server reads/writes the actual bytes on disk; you only pass small references. Only fall back to base64 `content` when you genuinely have raw bytes and no handle. + +## Downloading + +Downloads are captured automatically once the session starts. Trigger the download (click a link/button), then drain the buffer with `getDownloads`: + +```json +{ + "commands": [ + { "method": "click", "params": { "selector": "a#export-csv" } }, + { "method": "getDownloads" } + ] +} +``` + +- Downloads complete asynchronously. If `getDownloads` returns nothing, the file isn't finished — `waitForTimeout` and call `getDownloads` again. +- `getDownloads` **drains** the buffer: each completed file is returned once, then cleared. + +**Local (stdio) mode:** files are written to disk (`BROWSERLESS_DOWNLOAD_DIR`, default a temp dir). The response lists the saved **path** for each file — hand that path straight back to `uploadFile`. Bytes never enter your context. + +**Remote (HTTP) mode:** each file comes back as a `resource_link` with a `browserless-download://` URI. Read it on demand via the MCP resource (`resources/read`) to get the bytes, or pass the URI back as an upload `handle`. Still no base64 in context. + +## Uploading + +```json +{ + "method": "uploadFile", + "params": { + "selector": "input[type=file]", + "files": [ { "handle": "browserless-download://abc-1", "name": "report.pdf" } ] + } +} +``` + +Each file is resolved in this order — pick the first you have: + +- **`handle`** — a handle from a previous `getDownloads`, or from staging a local file (below). The server reads the stored file. Works in **both** transports. This is how you re-upload a file you just downloaded — zero bytes through the conversation. +- **`path`** — a local filesystem path. **stdio only** (HTTP can't read your filesystem). The server reads and encodes it. +- **`content`** — base64 bytes. Last resort; avoid for large files. + +### Uploading a NEW local file in HTTP mode + +The server can't read your filesystem, so stage the file once over HTTP (bytes go via `curl`, never through the conversation), then use the returned handle: + +```bash +curl -s -F file=@"/path/to/file.png" "/upload?token=" +# → { "ok": true, "handle": "browserless-download://abc-1", "filename": "file.png", ... } +``` + +The `/upload` route requires your Browserless token (`?token=` or `Authorization: Bearer`). The `uploadFile` path-rejection error gives you the exact command with the token filled in. + +```json +{ "method": "uploadFile", "params": { "selector": "input[type=file]", "files": [ { "handle": "browserless-download://abc-1" } ] } } +``` + +Staged files share the download store (15-minute TTL). **Never** base64 a file into `content` by hand — that's what staging avoids. + +Other params: +- `selector` — the file input. If hidden behind a styled button, the input still exists in the DOM; target it directly (use a `< ` deep selector for shadow DOM). +- `name` / `mimeType` — optional; default from the handle/path, mimeType inferred from the extension. +- Triggers native `input`/`change` events, so frameworks (React, etc.) see the file. +- Returns `{ "ok": true }`, or `{ "ok": false, "error": "SelectorNotFound" | "InvalidTarget" | "FileTooLarge" }`. + +## Size limits + +Uploads and downloads are capped (server default 10MB, hard max 50MB). Oversized downloads report `error: "FileTooLarge"` (metadata, no data); oversized uploads return `ok: false, error: "FileTooLarge"`. diff --git a/src/skills/index.ts b/src/skills/index.ts index 1632a70..47048ec 100644 --- a/src/skills/index.ts +++ b/src/skills/index.ts @@ -35,6 +35,8 @@ const LOGIN_NUDGE_RE = const TAB_ERROR_CODES = ['TAB_NOT_FOUND', 'TAB_CLOSED', 'TAB_LIMIT_EXCEEDED']; const TAB_COMMAND_METHODS = ['getTabs', 'switchTab', 'createTab', 'closeTab']; +const FILE_TRANSFER_METHODS = ['uploadFile', 'getDownloads']; + const evalPredicate = (p: Predicate, ctx: DetectContext): boolean => { switch (p.kind) { case 'snapshot.has-element': { @@ -174,6 +176,16 @@ const SKILL_SPECS: SkillSpec[] = [ path: 'src/skills/auth-profile.md', triggers: [], }, + { + id: 'file-transfers', + path: 'src/skills/file-transfers.md', + triggers: [ + // A file input on the page — uploads are likely next. + [{ kind: 'snapshot.has-input-type', type: 'file' }], + // The model issued an upload/download command. + [{ kind: 'command.method', methods: FILE_TRANSFER_METHODS }], + ], + }, { id: 'captchas', path: 'src/skills/captchas.md', diff --git a/src/skills/system-prompt.ts b/src/skills/system-prompt.ts index e79b640..84ab3f8 100644 --- a/src/skills/system-prompt.ts +++ b/src/skills/system-prompt.ts @@ -66,6 +66,16 @@ Only click when href is \`javascript:\` / \`#\` / missing. 3. **evaluate** { content } — JS (IIFE): \`(() => { return ... })()\` 4. **html** { selector } — raw HTML +## Files (upload / download) +**NEVER read a file's bytes or base64 into this conversation, and NEVER split/reassemble/inline base64 by hand.** That is the wrong tool and will stall. +- **Upload a local file (stdio)**: \`uploadFile { selector, files: [{ path }] }\` — the server reads + encodes it. +- **Upload a local file (HTTP)**: the server can't read your disk. Stage it once over HTTP, then use the handle: + \`curl -s -F file=@"/path/to/file" "/upload?token="\` → returns \`{ "handle": "browserless-download://…" }\` → \`uploadFile { files: [{ handle }] }\`. (The path-rejection error gives you the exact command with your token + URL filled in.) +- **Re-upload something from \`getDownloads\`**: pass its \`handle\` (works in both modes). +- **Download**: trigger it (click/goto), then \`getDownloads\` — returns a path (stdio) or \`browserless-download://\` handle (HTTP), never the bytes. Reuse that path/handle directly in \`uploadFile\`. +- base64 \`content\` is a LAST RESORT — tiny inline data only. +- Full recipe: \`file-transfers\` skill. + ## Batching — Maximize Per Call Plan ALL actions from snapshot before next snapshot. @@ -126,4 +136,5 @@ Available skills: - **screenshots** — when to screenshot vs. snapshot, scope and format choices - **tabs** — multi-tab workflows, peek-without-switching - **autonomous-login** — load before authenticating: when the user asked you to log in, when a wall blocks the task, or as soon as a password input appears. Covers the don't-login-by-default posture, contextual credential matching, MFA/captcha branches, and the required final JSON response shape. -- **captchas** — the \`solve\` command, response semantics, escalation path (Cloud-only)`; +- **captchas** — the \`solve\` command, response semantics, escalation path (Cloud-only) +- **file-transfers** — \`uploadFile\` / \`getDownloads\`, stdio-path vs. base64 content, size caps`; diff --git a/src/tools/agent.ts b/src/tools/agent.ts index e6876f2..6526b52 100644 --- a/src/tools/agent.ts +++ b/src/tools/agent.ts @@ -1,6 +1,13 @@ import { FastMCP, UserError } from 'fastmcp'; import type { Content } from 'fastmcp'; +import { readFile } from 'node:fs/promises'; +import { basename } from 'node:path'; import { z } from 'zod'; +import { + downloadUri, + getDownload, + storeDownload, +} from '../lib/download-store.js'; import { getOrCreateSession, send, @@ -102,6 +109,170 @@ export const formatScreenshotContent = ( return content; }; +// Hard ceiling mirrored from the enterprise side (MAX_FILE_TRANSFER_MB cap). +// The server enforces its own (possibly lower) limit; this just stops the MCP +// from reading/shipping an oversized local upload before it ever hits the wire. +const FILE_TRANSFER_MAX_BYTES = 50 * 1024 * 1024; + +type DownloadEntry = { + filename?: string; + mimeType?: string; + size?: number; + data?: string; + error?: string; + maxBytes?: number; + message?: string; +}; + +// Resolve each uploadFile entry to base64 `content` before it hits the wire, +// so the model never has to emit a multi-MB base64 string itself: +// - `content` (base64): used as-is. +// - `handle`: a download handle/URI/path from a prior getDownloads — the MCP +// server reads the stored file. Works in both transports (server-side). +// - `path`: a local filesystem path — stdio only (HTTP can't read the client +// filesystem). +export const normalizeUploadCommand = async ( + cmd: { method: string; params: Record }, + transport: McpConfig['transport'], + mcpBaseUrl?: string, + token?: string, +): Promise => { + if (cmd.method !== 'uploadFile') return; + const files = cmd.params.files; + if (!Array.isArray(files)) return; + for (const file of files) { + if (!file || typeof file !== 'object') continue; + const f = file as Record; + if (typeof f.content === 'string' && f.content) continue; + + let buf: Buffer; + let defaultName: string; + + if (typeof f.handle === 'string' && f.handle) { + const record = getDownload(f.handle); + if (!record) { + throw new UserError( + `Unknown upload handle "${f.handle}". Pass a handle returned by ` + + `getDownloads, or supply base64 "content".`, + ); + } + buf = await readFile(record.path); + defaultName = record.filename; + delete f.handle; + } else if (typeof f.path === 'string' && f.path) { + if (transport !== 'stdio') { + const base = mcpBaseUrl ?? ''; + const tokenQ = `?token=${token ?? ''}`; + throw new UserError( + 'uploadFile "path" is not available in HTTP mode (the server can\'t ' + + 'read your filesystem). Stage the file once over HTTP, then pass the ' + + 'returned handle — do NOT base64 it through the conversation:\n' + + ` curl -s -F file=@"${f.path}" "${base}/upload${tokenQ}"\n` + + 'then: uploadFile { files: [{ handle: "" }] }', + ); + } + const path = f.path; + buf = await readFile(path).catch((e: unknown) => { + throw new UserError( + `Failed to read upload file "${path}": ` + + (e instanceof Error ? e.message : String(e)), + ); + }); + defaultName = basename(path); + delete f.path; + } else { + continue; + } + + if (buf.byteLength > FILE_TRANSFER_MAX_BYTES) { + throw new UserError( + `Upload file "${defaultName}" is ${buf.byteLength} bytes, over the ` + + `50MB limit.`, + ); + } + f.content = buf.toString('base64'); + if (!f.name) f.name = defaultName; + } +}; + +const describeFailedDownload = (d: DownloadEntry): string => + `${d.filename ?? 'unknown'}: ${d.error ?? 'no data'}` + + (d.maxBytes ? ` (max ${d.maxBytes} bytes)` : ''); + +// Persist a download to the server's filesystem (out of the model's context) +// and return its handle. Returns null for failed/empty entries. +const persistDownload = async ( + d: DownloadEntry, +): Promise> | null> => { + if (d.error || !d.data || !d.filename) return null; + return storeDownload( + d.filename, + d.mimeType ?? 'application/octet-stream', + Buffer.from(d.data, 'base64'), + ); +}; + +// stdio: files live on the same machine, so the handle is the on-disk path. The +// model gets paths it can hand straight to uploadFile — no base64 in context. +export const formatDownloadsStdio = async ( + downloads: DownloadEntry[], + prefix: string, + skills: string, +): Promise => { + const lines: string[] = []; + for (const d of downloads) { + const record = await persistDownload(d); + if (!record) { + lines.push(`- ${describeFailedDownload(d)}`); + continue; + } + lines.push( + `- ${record.path} (${record.mimeType}, ${record.size} bytes) — ` + + `reuse as uploadFile { path: "${record.path}" }`, + ); + } + const text = downloads.length + ? `${prefix}Saved ${downloads.length} download(s):\n${lines.join('\n')}` + : `${prefix}No new downloads.`; + const content: Content[] = [{ type: 'text', text }]; + if (skills) content.push({ type: 'text', text: skills }); + return content; +}; + +// httpStream: no shared disk. Return a resource_link per file (a small handle, +// not the bytes) — the client reads it on demand via resources/read, and the +// same handle can be passed back to uploadFile. The base64 never enters context. +export const formatDownloadsHttp = async ( + downloads: DownloadEntry[], + prefix: string, + skills: string, +): Promise => { + const content: Content[] = [ + { + type: 'text', + text: downloads.length + ? `${prefix}${downloads.length} download(s) — read via the resource ` + + `link, or reuse the URI as uploadFile { handle }:` + : `${prefix}No new downloads.`, + }, + ]; + for (const d of downloads) { + const record = await persistDownload(d); + if (!record) { + content.push({ type: 'text', text: describeFailedDownload(d) }); + continue; + } + content.push({ + type: 'resource_link', + uri: downloadUri(record.id), + name: record.filename, + mimeType: record.mimeType, + }); + } + if (skills) content.push({ type: 'text', text: skills }); + return content; +}; + // Zod parses params at the tool boundary, so this only needs to supply the {} // default when the field was omitted — the schema never delivers a string, // array, or null here. @@ -403,6 +574,19 @@ export function registerAgentTools( ]; } + // Downloads: branch on transport. stdio writes files to disk and + // returns paths; httpStream returns the bytes as resource blocks. Either + // way the base64 stays out of the model's text context. + if (last.method === 'getDownloads') { + const downloads = + (lastResult?.downloads as DownloadEntry[] | undefined) ?? []; + const skills = triggered.length > 0 ? renderSkills(triggered) : ''; + const prefix = batchPrefix + (closedSuffix ? `${closedSuffix}\n\n` : ''); + return config.transport === 'stdio' + ? await formatDownloadsStdio(downloads, prefix, skills) + : await formatDownloadsHttp(downloads, prefix, skills); + } + // Screenshot: return as image content block (vision input ≈ 1.5K tokens // vs. ~67K tokens if we dumped the base64 inline as text). if (last.method === 'screenshot') { @@ -428,6 +612,16 @@ export function registerAgentTools( }; try { + // Resolve any local upload paths to base64 once, before the (possibly + // retried) send loop runs. + for (const cmd of commands) { + await normalizeUploadCommand( + cmd, + config.transport, + config.mcpBaseUrl, + token, + ); + } const result = await runCommands(false); sendAnalytics(true); return result; diff --git a/src/tools/schemas.ts b/src/tools/schemas.ts index 2818106..1d03b95 100644 --- a/src/tools/schemas.ts +++ b/src/tools/schemas.ts @@ -455,6 +455,68 @@ const SolveCommandSchema = z.object({ .default({}), }); +const UploadFileCommandSchema = z.object({ + method: z.literal('uploadFile'), + params: z.object({ + selector: z + .string() + .describe('CSS selector of the element'), + files: z + .array( + z.object({ + content: z + .string() + .optional() + .describe( + 'Base64-encoded file content. LAST RESORT — only for tiny data ' + + 'you already hold inline. Do NOT read a file into the ' + + 'conversation, and never split/reassemble base64 by hand: use ' + + '`path` (stdio) or `handle` so the server moves the bytes.', + ), + handle: z + .string() + .optional() + .describe( + 'A download handle from a prior getDownloads (a path in stdio ' + + 'mode, a `browserless-download://` URI in HTTP mode). The MCP ' + + 'server reads the stored file — works in both transports and ' + + 'keeps the bytes out of the conversation. Use this to re-upload ' + + 'a file you just downloaded.', + ), + path: z + .string() + .optional() + .describe( + 'Local filesystem path to read and upload. stdio (local) mode ' + + 'only — the MCP server reads and base64-encodes it. In HTTP ' + + 'mode use `handle` or `content` instead.', + ), + name: z + .string() + .optional() + .describe( + 'Filename reported to the page. Defaults to the basename of ' + + '`path`, else "file".', + ), + mimeType: z + .string() + .optional() + .describe('MIME type; inferred from the extension when omitted.'), + }), + ) + .min(1) + .describe( + 'Files to attach. Combined decoded size is capped (server default ' + + '10MB, hard max 50MB).', + ), + }), +}); + +const GetDownloadsCommandSchema = z.object({ + method: z.literal('getDownloads'), + params: z.object({}).optional().default({}), +}); + const CloseCommandSchema = z.object({ method: z.literal('close'), params: z.object({}).optional().default({}), @@ -489,6 +551,8 @@ const specificCommandSchemas = [ LiveURLCommandSchema, SolveCommandSchema, ScreenshotCommandSchema, + UploadFileCommandSchema, + GetDownloadsCommandSchema, CloseCommandSchema, ] as const; diff --git a/test/lib/http-auth.spec.ts b/test/lib/http-auth.spec.ts new file mode 100644 index 0000000..d01708a --- /dev/null +++ b/test/lib/http-auth.spec.ts @@ -0,0 +1,54 @@ +import { expect } from 'chai'; +import { resolveBrowserlessAuth } from '../../src/lib/http-auth.js'; + +const config = { + browserlessApiUrl: 'https://api.example.com', + supabaseUrl: 'https://supabase.example.com', + supabaseServiceRoleKey: 'service-role', +}; + +describe('resolveBrowserlessAuth', () => { + it('accepts a plain API key from the Authorization header', async () => { + const auth = await resolveBrowserlessAuth( + { authHeader: 'Bearer plain-token' }, + config, + ); + expect(auth.token).to.equal('plain-token'); + expect(auth.apiUrl).to.equal('https://api.example.com'); + }); + + it('accepts a bare (non-Bearer) Authorization header', async () => { + const auth = await resolveBrowserlessAuth( + { authHeader: 'plain-token' }, + config, + ); + expect(auth.token).to.equal('plain-token'); + }); + + it('accepts a ?token= query param', async () => { + const auth = await resolveBrowserlessAuth( + { tokenQuery: 'query-token' }, + config, + ); + expect(auth.token).to.equal('query-token'); + }); + + it('honors an explicit api url override', async () => { + const auth = await resolveBrowserlessAuth( + { tokenQuery: 't', apiUrlHeader: 'https://eu.example.com' }, + config, + ); + expect(auth.apiUrl).to.equal('https://eu.example.com'); + }); + + it('throws when no token is present', async () => { + let threw = false; + try { + await resolveBrowserlessAuth({}, config); + } catch (e) { + threw = true; + expect((e as Error).message).to.match(/No Browserless API token/); + } + expect(threw).to.be.true; + }); +}); diff --git a/test/skills/skills.spec.ts b/test/skills/skills.spec.ts index 2092833..6bef5fe 100644 --- a/test/skills/skills.spec.ts +++ b/test/skills/skills.spec.ts @@ -36,8 +36,8 @@ const CLOUD = 'https://production.browserless.io'; const SELF_HOSTED = 'https://browserless.example.com'; describe('skills/registry', () => { - it('loads all ten skill bodies', () => { - expect(skillsRegistry).to.have.lengthOf(10); + it('loads all eleven skill bodies', () => { + expect(skillsRegistry).to.have.lengthOf(11); const ids = skillsRegistry.map((s) => s.id); expect(ids).to.have.members([ 'shadow-dom', @@ -50,6 +50,7 @@ describe('skills/registry', () => { 'tabs', 'autonomous-login', 'auth-profile', + 'file-transfers', ]); for (const skill of skillsRegistry) { expect(skill.body, `${skill.id} body`).to.be.a('string').and.not.empty; @@ -171,6 +172,34 @@ describe('skills/detectSkills - modals', () => { }); }); +describe('skills/detectSkills - file-transfers', () => { + it('fires when a file input is present', () => { + const ctx = { + snapshot: snapshot([el({ type: 'file', selector: 'input[type=file]' })]), + }; + expect(detectSkills(ctx, createSkillState())).to.include('file-transfers'); + }); + + it('fires on an uploadFile command', () => { + const ctx = { + cmd: { method: 'uploadFile', params: { selector: 'input' } }, + }; + expect(detectSkills(ctx, createSkillState())).to.include('file-transfers'); + }); + + it('fires on a getDownloads command', () => { + const ctx = { cmd: { method: 'getDownloads', params: {} } }; + expect(detectSkills(ctx, createSkillState())).to.include('file-transfers'); + }); + + it('does not fire without a file input or transfer command', () => { + const ctx = { snapshot: snapshot([el({ role: 'button', name: 'OK' })]) }; + expect(detectSkills(ctx, createSkillState())).to.not.include( + 'file-transfers', + ); + }); +}); + describe('skills/detectSkills - captchas', () => { it('fires on a Cloudflare challenge URL when on cloud', () => { const ctx = { diff --git a/test/tools/agent.spec.ts b/test/tools/agent.spec.ts index bac8dfb..eda55a8 100644 --- a/test/tools/agent.spec.ts +++ b/test/tools/agent.spec.ts @@ -5,12 +5,19 @@ import type { Content } from 'fastmcp'; import { buildCrossOriginNotice, formatConnectError, + formatDownloadsHttp, + formatDownloadsStdio, formatErrorMessage, formatScreenshotContent, formatSnapshot, + normalizeUploadCommand, registerAgentTools, sanitizeUpgradeBody, } from '../../src/tools/agent.js'; +import { mkdtemp, readFile as fsReadFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { downloadUri, storeDownload } from '../../src/lib/download-store.js'; import { ProfileNotFoundError, UpgradeError, @@ -201,6 +208,161 @@ describe('formatScreenshotContent', () => { }); }); +describe('normalizeUploadCommand', () => { + it('reads a local path into base64 content (stdio)', async () => { + const dir = await mkdtemp(join(tmpdir(), 'mcp-upload-')); + const path = join(dir, 'hello.txt'); + await writeFile(path, 'Hello World!'); + + const cmd = { + method: 'uploadFile', + params: { selector: 'input', files: [{ path }] }, + }; + await normalizeUploadCommand(cmd, 'stdio'); + + const file = (cmd.params.files as Record[])[0]; + expect(file.path).to.be.undefined; + expect(file.name).to.equal('hello.txt'); + expect(Buffer.from(file.content as string, 'base64').toString()).to.equal( + 'Hello World!', + ); + }); + + it('rejects a local path in httpStream mode with a staging recipe', async () => { + const cmd = { + method: 'uploadFile', + params: { selector: 'input', files: [{ path: '/etc/hosts' }] }, + }; + let threw = false; + try { + await normalizeUploadCommand( + cmd, + 'httpStream', + 'https://mcp.example.com', + 'tok-123', + ); + } catch (e) { + threw = true; + const msg = (e as Error).message; + expect(msg).to.match(/not available in HTTP mode/); + expect(msg).to.include('curl -s -F file=@"/etc/hosts"'); + expect(msg).to.include('https://mcp.example.com/upload?token=tok-123'); + } + expect(threw, 'expected normalizeUploadCommand to throw').to.be.true; + }); + + it('leaves base64 content and non-upload commands untouched', async () => { + const cmd = { + method: 'uploadFile', + params: { selector: 'input', files: [{ content: 'YWJj', name: 'a' }] }, + }; + await normalizeUploadCommand(cmd, 'httpStream'); + const file = (cmd.params.files as Record[])[0]; + expect(file.content).to.equal('YWJj'); + + const other = { method: 'click', params: { selector: 'a' } }; + await normalizeUploadCommand(other, 'stdio'); + expect(other.params.selector).to.equal('a'); + }); + + it('resolves a download handle to base64 content (any transport)', async () => { + const record = await storeDownload( + 'grabbed.bin', + 'application/octet-stream', + Buffer.from('Hello World!'), + ); + const cmd = { + method: 'uploadFile', + params: { + selector: 'input', + files: [{ handle: downloadUri(record.id) }], + }, + }; + await normalizeUploadCommand(cmd, 'httpStream'); + const file = (cmd.params.files as Record[])[0]; + expect(file.handle).to.be.undefined; + expect(file.name).to.equal('grabbed.bin'); + expect(Buffer.from(file.content as string, 'base64').toString()).to.equal( + 'Hello World!', + ); + }); + + it('throws on an unknown upload handle', async () => { + const cmd = { + method: 'uploadFile', + params: { selector: 'input', files: [{ handle: 'nope://missing' }] }, + }; + let threw = false; + try { + await normalizeUploadCommand(cmd, 'stdio'); + } catch (e) { + threw = true; + expect((e as Error).message).to.match(/Unknown upload handle/); + } + expect(threw).to.be.true; + }); +}); + +describe('formatDownloadsHttp', () => { + it('returns a resource_link handle, never the base64 bytes', async () => { + const content = await formatDownloadsHttp( + [{ filename: 'report.csv', mimeType: 'text/csv', size: 3, data: 'YWJj' }], + '', + '', + ); + const link = content.find((c) => c.type === 'resource_link') as Extract< + Content, + { type: 'resource_link' } + >; + expect(link).to.exist; + expect(link.uri).to.match(/^browserless-download:\/\//); + expect(link.name).to.equal('report.csv'); + expect(link.mimeType).to.equal('text/csv'); + // The base64 must not appear anywhere in the returned content. + expect(JSON.stringify(content)).to.not.include('YWJj'); + }); + + it('degrades oversized/failed downloads to a text note', async () => { + const content = await formatDownloadsHttp( + [{ filename: 'big.bin', error: 'FileTooLarge', maxBytes: 1048576 }], + '', + '', + ); + expect(content.some((c) => c.type === 'resource_link')).to.be.false; + const note = content[content.length - 1] as Extract< + Content, + { type: 'text' } + >; + expect(note.text).to.match(/big\.bin: FileTooLarge/); + }); +}); + +describe('formatDownloadsStdio', () => { + it('writes the file to disk and reports a reusable path, no base64', async () => { + const dir = await mkdtemp(join(tmpdir(), 'mcp-dl-')); + const prev = process.env.BROWSERLESS_DOWNLOAD_DIR; + process.env.BROWSERLESS_DOWNLOAD_DIR = dir; + try { + const content = await formatDownloadsStdio( + [{ filename: 'report.csv', mimeType: 'text/csv', size: 3, data: 'YWJj' }], + '', + '', + ); + const text = (content[0] as Extract).text; + expect(text).to.include('report.csv'); + expect(text).to.include(dir); + expect(text).to.not.include('YWJj'); + // The reported path points at the written bytes. + const reported = text.split('- ')[1].split(' (')[0]; + const written = await fsReadFile(reported); + expect(written.toString()).to.equal('abc'); + } finally { + if (prev === undefined) delete process.env.BROWSERLESS_DOWNLOAD_DIR; + else process.env.BROWSERLESS_DOWNLOAD_DIR = prev; + } + }); +}); + describe('formatSnapshot', () => { const baseSnap = ( overrides: Partial = {}, From 32560e982924d2155ccd312977f3fb5c5afdc33a Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Wed, 17 Jun 2026 15:07:02 -0500 Subject: [PATCH 02/11] chore: prettier --- CHANGELOG.md | 4 ++-- src/resources/upload-route.ts | 10 ++++++++-- src/skills/file-transfers.md | 15 ++++++++++++--- src/tools/agent.ts | 3 ++- test/tools/agent.spec.ts | 9 ++++++++- 5 files changed, 32 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 53b18a6..914f215 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,10 +2,9 @@ ## [1.6.2](https://github.com/browserless/browserless-mcp/compare/v1.6.1...v1.6.2) (2026-06-08) - ### Bug Fixes -* drop stale COPY patches/ from Dockerfile ([#109](https://github.com/browserless/browserless-mcp/issues/109)) ([976e38d](https://github.com/browserless/browserless-mcp/commit/976e38d4b79643d60485a01cdee0c16486b17afd)) +- drop stale COPY patches/ from Dockerfile ([#109](https://github.com/browserless/browserless-mcp/issues/109)) ([976e38d](https://github.com/browserless/browserless-mcp/commit/976e38d4b79643d60485a01cdee0c16486b17afd)) ## Latest @@ -13,6 +12,7 @@ - Add a `POST /upload` HTTP endpoint (httpStream transport) for staging a local file into the temp store out-of-band: `curl -F file=@path "/upload?token="` returns a handle for `uploadFile`, so HTTP-mode uploads never base64 through the conversation. Token-gated (same rules as the MCP surface); staged files share the 15-minute TTL store. ## v1.6.1 + Drop vestigial mcp-proxy postinstall patch that broke `npm install` in consumers - Dependency updates diff --git a/src/resources/upload-route.ts b/src/resources/upload-route.ts index 3b2f7d4..192300f 100644 --- a/src/resources/upload-route.ts +++ b/src/resources/upload-route.ts @@ -46,14 +46,20 @@ export function registerUploadRoute(server: FastMCP, config: McpConfig): void { file = body.file; } catch { return c.json( - { ok: false, error: 'Expected multipart/form-data with a "file" field' }, + { + ok: false, + error: 'Expected multipart/form-data with a "file" field', + }, 400, ); } if (!(file instanceof File)) { return c.json( - { ok: false, error: 'Missing multipart "file" field (use -F file=@path)' }, + { + ok: false, + error: 'Missing multipart "file" field (use -F file=@path)', + }, 400, ); } diff --git a/src/skills/file-transfers.md b/src/skills/file-transfers.md index dead597..396b02d 100644 --- a/src/skills/file-transfers.md +++ b/src/skills/file-transfers.md @@ -2,7 +2,7 @@ Transferring files to/from the browser. Two methods: `uploadFile` (attach files to an ``) and `getDownloads` (retrieve files Chrome downloaded). -**Key idea — never move bytes through this conversation.** Large files as base64 blow up the context. So downloads come back as a *handle* (a path or a `browserless-download://` URI), and uploads take that handle (or a local path) instead of base64. The MCP server reads/writes the actual bytes on disk; you only pass small references. Only fall back to base64 `content` when you genuinely have raw bytes and no handle. +**Key idea — never move bytes through this conversation.** Large files as base64 blow up the context. So downloads come back as a _handle_ (a path or a `browserless-download://` URI), and uploads take that handle (or a local path) instead of base64. The MCP server reads/writes the actual bytes on disk; you only pass small references. Only fall back to base64 `content` when you genuinely have raw bytes and no handle. ## Downloading @@ -31,7 +31,9 @@ Downloads are captured automatically once the session starts. Trigger the downlo "method": "uploadFile", "params": { "selector": "input[type=file]", - "files": [ { "handle": "browserless-download://abc-1", "name": "report.pdf" } ] + "files": [ + { "handle": "browserless-download://abc-1", "name": "report.pdf" } + ] } } ``` @@ -54,12 +56,19 @@ curl -s -F file=@"/path/to/file.png" "/upload?token=