diff --git a/README.md b/README.md index 1ffbafa00..2bd471ba5 100644 --- a/README.md +++ b/README.md @@ -675,6 +675,23 @@ The Chrome DevTools MCP server supports the following configuration option: - **Type:** boolean - **Default:** `true` +- **`--screenshotFormat`/ `--screenshot-format`** + Override the default output format used by take_screenshot when the caller does not specify one. JPEG and WebP are ~3-5x smaller than PNG, which helps reduce context size in AI conversations. Unset preserves the existing default ("png"). + - **Type:** string + - **Choices:** `jpeg`, `png`, `webp` + +- **`--screenshotQuality`/ `--screenshot-quality`** + Override the default compression quality (0-100) used by take_screenshot for JPEG and WebP when the caller does not specify one. Lower values mean smaller files. Ignored for PNG. Unset preserves the Puppeteer default. + - **Type:** number + +- **`--screenshotMaxWidth`/ `--screenshot-max-width`** + Maximum width in pixels for screenshots. If the captured image is wider, it is downscaled (preserving aspect ratio) before being returned. Reduces context size in AI conversations. Unset means no resize. + - **Type:** number + +- **`--screenshotMaxHeight`/ `--screenshot-max-height`** + Maximum height in pixels for screenshots. If the captured image is taller, it is downscaled (preserving aspect ratio) before being returned. Can be combined with --screenshot-max-width; the smaller scale factor wins. Unset means no resize. + - **Type:** number + - **`--slim`** Exposes a "slim" set of 3 tools covering navigation, script execution and screenshots only. Useful for basic browser tasks. - **Type:** boolean diff --git a/src/bin/chrome-devtools-mcp-cli-options.ts b/src/bin/chrome-devtools-mcp-cli-options.ts index f510744d3..b5614a17a 100644 --- a/src/bin/chrome-devtools-mcp-cli-options.ts +++ b/src/bin/chrome-devtools-mcp-cli-options.ts @@ -264,6 +264,60 @@ export const cliOptions = { hidden: true, describe: 'Include watchdog PID in Clearcut request headers (for testing).', }, + screenshotFormat: { + type: 'string', + description: + 'Override the default output format used by take_screenshot when the caller does not specify one. JPEG and WebP are ~3-5x smaller than PNG, which helps reduce context size in AI conversations. Unset preserves the existing default ("png").', + choices: ['jpeg', 'png', 'webp'] as const, + }, + screenshotQuality: { + type: 'number', + description: + 'Override the default compression quality (0-100) used by take_screenshot for JPEG and WebP when the caller does not specify one. Lower values mean smaller files. Ignored for PNG. Unset preserves the Puppeteer default.', + coerce: (value: number | undefined) => { + if (value === undefined) { + return; + } + if (!Number.isInteger(value) || value < 0 || value > 100) { + throw new Error( + `Invalid screenshotQuality ${value}. Expected an integer between 0 and 100.`, + ); + } + return value; + }, + }, + screenshotMaxWidth: { + type: 'number', + description: + 'Maximum width in pixels for screenshots. If the captured image is wider, it is downscaled (preserving aspect ratio) before being returned. Reduces context size in AI conversations. Unset means no resize.', + coerce: (value: number | undefined) => { + if (value === undefined) { + return; + } + if (!Number.isInteger(value) || value <= 0) { + throw new Error( + `Invalid screenshotMaxWidth ${value}. Expected a positive integer.`, + ); + } + return value; + }, + }, + screenshotMaxHeight: { + type: 'number', + description: + 'Maximum height in pixels for screenshots. If the captured image is taller, it is downscaled (preserving aspect ratio) before being returned. Can be combined with --screenshot-max-width; the smaller scale factor wins. Unset means no resize.', + coerce: (value: number | undefined) => { + if (value === undefined) { + return; + } + if (!Number.isInteger(value) || value <= 0) { + throw new Error( + `Invalid screenshotMaxHeight ${value}. Expected a positive integer.`, + ); + } + return value; + }, + }, slim: { type: 'boolean', describe: diff --git a/src/telemetry/flag_usage_metrics.json b/src/telemetry/flag_usage_metrics.json index 9982b1838..21fbd6796 100644 --- a/src/telemetry/flag_usage_metrics.json +++ b/src/telemetry/flag_usage_metrics.json @@ -295,5 +295,31 @@ { "name": "category_experimental_third_party", "flagType": "boolean" + }, + { + "name": "screenshot_format_present", + "flagType": "boolean" + }, + { + "name": "screenshot_format", + "flagType": "enum", + "choices": [ + "SCREENSHOT_FORMAT_UNSPECIFIED", + "SCREENSHOT_FORMAT_JPEG", + "SCREENSHOT_FORMAT_PNG", + "SCREENSHOT_FORMAT_WEBP" + ] + }, + { + "name": "screenshot_quality_present", + "flagType": "boolean" + }, + { + "name": "screenshot_max_width_present", + "flagType": "boolean" + }, + { + "name": "screenshot_max_height_present", + "flagType": "boolean" } ] diff --git a/src/tools/screenshot.ts b/src/tools/screenshot.ts index 99abdfb59..9edaa6f01 100644 --- a/src/tools/screenshot.ts +++ b/src/tools/screenshot.ts @@ -5,107 +5,232 @@ */ import {zod} from '../third_party/index.js'; -import type {ElementHandle, Page} from '../third_party/index.js'; +import type { + BoundingBox, + ElementHandle, + Page, + ScreenshotClip, +} from '../third_party/index.js'; import {ToolCategory} from './categories.js'; import {definePageTool} from './ToolDefinition.js'; -export const screenshot = definePageTool({ - name: 'take_screenshot', - description: `Take a screenshot of the page or element.`, - annotations: { - category: ToolCategory.DEBUGGING, - // Not read-only due to filePath param. - readOnlyHint: false, - }, - schema: { - format: zod - .enum(['png', 'jpeg', 'webp']) - .default('png') - .describe('Type of format to save the screenshot as. Default is "png"'), - quality: zod - .number() - .min(0) - .max(100) - .optional() - .describe( - 'Compression quality for JPEG and WebP formats (0-100). Higher values mean better quality but larger file sizes. Ignored for PNG format.', - ), - uid: zod - .string() - .optional() - .describe( - 'The uid of an element on the page from the page content snapshot. If omitted, takes a page screenshot.', - ), - fullPage: zod - .boolean() - .optional() - .describe( - 'If set to true takes a screenshot of the full page instead of the currently visible viewport. Incompatible with uid.', +type ScreenshotFormat = 'png' | 'jpeg' | 'webp'; + +async function getSourceBox( + page: Page, + element: ElementHandle | undefined, + fullPage: boolean, +): Promise { + if (element) { + const box = await element.boundingBox(); + return box ?? undefined; + } + if (fullPage) { + const dims = await page.evaluate(() => ({ + width: Math.max( + document.documentElement.scrollWidth, + document.body?.scrollWidth ?? 0, ), - filePath: zod - .string() - .optional() - .describe( - 'The absolute path, or a path relative to the current working directory, to save the screenshot to instead of attaching it to the response.', + height: Math.max( + document.documentElement.scrollHeight, + document.body?.scrollHeight ?? 0, ), - }, - blockedByDialog: true, - verifyFilesSchema: ['filePath'], - handler: async (request, response, context) => { - if (request.params.uid && request.params.fullPage) { - throw new Error('Providing both "uid" and "fullPage" is not allowed.'); + })); + if (dims.width <= 0 || dims.height <= 0) { + return undefined; } + return {x: 0, y: 0, width: dims.width, height: dims.height}; + } + const viewport = page.viewport(); + if (!viewport) { + return undefined; + } + return {x: 0, y: 0, width: viewport.width, height: viewport.height}; +} - let pageOrHandle: Page | ElementHandle; - if (request.params.uid) { - pageOrHandle = await request.page.getElementByUid(request.params.uid); - } else { - pageOrHandle = request.page.pptrPage; - } +function computeDownscaleClip( + box: BoundingBox, + maxWidth: number | undefined, + maxHeight: number | undefined, +): ScreenshotClip | undefined { + const widthScale = + maxWidth !== undefined ? Math.min(1, maxWidth / box.width) : 1; + const heightScale = + maxHeight !== undefined ? Math.min(1, maxHeight / box.height) : 1; + const scale = Math.min(widthScale, heightScale); + if (scale >= 1) { + return undefined; + } + // Skip degenerate sub-pixel results. + if (Math.round(box.width * scale) < 1 || Math.round(box.height * scale) < 1) { + return undefined; + } + return { + x: box.x, + y: box.y, + width: box.width, + height: box.height, + scale, + }; +} - const format = request.params.format; - const quality = format === 'png' ? undefined : request.params.quality; +export const screenshot = definePageTool(args => { + const { + screenshotFormat, + screenshotQuality, + screenshotMaxWidth, + screenshotMaxHeight, + } = args ?? {}; - const screenshot = await pageOrHandle.screenshot({ - type: format, - fullPage: request.params.fullPage, - quality, - optimizeForSpeed: true, // Bonus: optimize encoding for speed - }); + const defaultFormat: ScreenshotFormat = screenshotFormat ?? 'png'; - if (request.params.uid) { - response.appendResponseLine( - `Took a screenshot of node with uid "${request.params.uid}".`, - ); - } else if (request.params.fullPage) { - response.appendResponseLine( - 'Took a screenshot of the full current page.', - ); - } else { - response.appendResponseLine( - "Took a screenshot of the current page's viewport.", - ); - } + return { + name: 'take_screenshot', + description: `Take a screenshot of the page or element.`, + annotations: { + category: ToolCategory.DEBUGGING, + // Not read-only due to filePath param. + readOnlyHint: false, + }, + schema: { + format: zod + .enum(['png', 'jpeg', 'webp']) + .default(defaultFormat) + .describe( + `Type of format to save the screenshot as. Default is "${defaultFormat}"`, + ), + quality: zod + .number() + .min(0) + .max(100) + .optional() + .describe( + 'Compression quality for JPEG and WebP formats (0-100). Higher values mean better quality but larger file sizes. Ignored for PNG format.', + ), + uid: zod + .string() + .optional() + .describe( + 'The uid of an element on the page from the page content snapshot. If omitted, takes a page screenshot.', + ), + fullPage: zod + .boolean() + .optional() + .describe( + 'If set to true takes a screenshot of the full page instead of the currently visible viewport. Incompatible with uid.', + ), + filePath: zod + .string() + .optional() + .describe( + 'The absolute path, or a path relative to the current working directory, to save the screenshot to instead of attaching it to the response.', + ), + }, + blockedByDialog: true, + verifyFilesSchema: ['filePath'], + handler: async (request, response, context) => { + if (request.params.uid && request.params.fullPage) { + throw new Error('Providing both "uid" and "fullPage" is not allowed.'); + } - if (request.params.filePath) { - const result = await context.saveFile( - screenshot, - request.params.filePath, - `.${format}`, - ); - response.appendResponseLine(`Saved screenshot to ${result.filename}.`); - } else if (screenshot.length >= 2_000_000) { - const {filepath} = await context.saveTemporaryFile( - screenshot, - `screenshot.${request.params.format}`, - ); - response.appendResponseLine(`Saved screenshot to ${filepath}.`); - } else { - response.attachImage({ - mimeType: `image/${request.params.format}`, - data: Buffer.from(screenshot).toString('base64'), - }); - } - }, + const page = request.page.pptrPage; + const element = request.params.uid + ? await request.page.getElementByUid(request.params.uid) + : undefined; + + const format = request.params.format; + const quality = + format === 'png' + ? undefined + : (request.params.quality ?? screenshotQuality); + const fullPage = request.params.fullPage ?? false; + + // Compute a downscale clip when --screenshot-max-width or + // --screenshot-max-height is set and the source exceeds either bound. + // The smaller scale factor wins so both bounds are respected while + // preserving aspect ratio. + let clip: ScreenshotClip | undefined; + if ( + screenshotMaxWidth !== undefined || + screenshotMaxHeight !== undefined + ) { + const box = await getSourceBox(page, element, fullPage); + if (box) { + clip = computeDownscaleClip( + box, + screenshotMaxWidth, + screenshotMaxHeight, + ); + } + } + + let screenshot: Uint8Array; + if (clip) { + // page.screenshot with clip lets the CDP scale param downscale the + // capture for viewport, full-page and element shots alike. We rely on + // Puppeteer's default of captureBeyondViewport=true when a clip is + // present so element/full-page captures below the fold still work. + screenshot = await page.screenshot({ + type: format, + quality, + optimizeForSpeed: true, + clip, + }); + } else if (element) { + screenshot = await element.screenshot({ + type: format, + quality, + optimizeForSpeed: true, + }); + } else { + screenshot = await page.screenshot({ + type: format, + fullPage, + quality, + optimizeForSpeed: true, + }); + } + + if (request.params.uid) { + response.appendResponseLine( + `Took a screenshot of node with uid "${request.params.uid}".`, + ); + } else if (fullPage) { + response.appendResponseLine( + 'Took a screenshot of the full current page.', + ); + } else { + response.appendResponseLine( + "Took a screenshot of the current page's viewport.", + ); + } + + // Narrow `format` at the point of use: in the factory form of + // definePageTool TS widens the Schema generic, which loses the literal + // union from zod.enum on request.params.format. + const extension: '.png' | '.jpeg' | '.webp' = + format === 'jpeg' ? '.jpeg' : format === 'webp' ? '.webp' : '.png'; + + if (request.params.filePath) { + const result = await context.saveFile( + screenshot, + request.params.filePath, + extension, + ); + response.appendResponseLine(`Saved screenshot to ${result.filename}.`); + } else if (screenshot.length >= 2_000_000) { + const {filepath} = await context.saveTemporaryFile( + screenshot, + `screenshot${extension}`, + ); + response.appendResponseLine(`Saved screenshot to ${filepath}.`); + } else { + response.attachImage({ + mimeType: `image/${format}`, + data: Buffer.from(screenshot).toString('base64'), + }); + } + }, + }; }); diff --git a/tests/tools/screenshot.test.ts b/tests/tools/screenshot.test.ts index b6aee504d..984278bef 100644 --- a/tests/tools/screenshot.test.ts +++ b/tests/tools/screenshot.test.ts @@ -10,11 +10,28 @@ import {tmpdir} from 'node:os'; import {join} from 'node:path'; import {describe, it} from 'node:test'; +import type {ParsedArguments} from '../../src/bin/chrome-devtools-mcp-cli-options.js'; import {TextSnapshot} from '../../src/TextSnapshot.js'; import {screenshot} from '../../src/tools/screenshot.js'; import {screenshots} from '../snapshot.js'; import {html, withMcpContext} from '../utils.js'; +const screenshotTool = screenshot({} as ParsedArguments); + +/** + * Reads the pixel width from a PNG buffer's IHDR chunk (bytes 16..19). + */ +function pngWidth(data: Buffer): number { + return data.readUInt32BE(16); +} + +/** + * Reads the pixel height from a PNG buffer's IHDR chunk (bytes 20..23). + */ +function pngHeight(data: Buffer): number { + return data.readUInt32BE(20); +} + describe('screenshot', () => { describe('browser_take_screenshot', () => { it('with default options', async () => { @@ -22,7 +39,7 @@ describe('screenshot', () => { const fixture = screenshots.basic; const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); - await screenshot.handler( + await screenshotTool.handler( {params: {format: 'png'}, page: context.getSelectedMcpPage()}, response, context, @@ -41,7 +58,7 @@ describe('screenshot', () => { const fixture = screenshots.basic; const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); - await screenshot.handler( + await screenshotTool.handler( { params: {format: 'png', quality: 0}, page: context.getSelectedMcpPage(), @@ -60,7 +77,7 @@ describe('screenshot', () => { }); it('with jpeg', async () => { await withMcpContext(async (response, context) => { - await screenshot.handler( + await screenshotTool.handler( {params: {format: 'jpeg'}, page: context.getSelectedMcpPage()}, response, context, @@ -76,7 +93,7 @@ describe('screenshot', () => { }); it('with webp', async () => { await withMcpContext(async (response, context) => { - await screenshot.handler( + await screenshotTool.handler( {params: {format: 'webp'}, page: context.getSelectedMcpPage()}, response, context, @@ -95,7 +112,7 @@ describe('screenshot', () => { const fixture = screenshots.viewportOverflow; const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); - await screenshot.handler( + await screenshotTool.handler( { params: {format: 'png', fullPage: true}, page: context.getSelectedMcpPage(), @@ -130,7 +147,7 @@ describe('screenshot', () => { return el?.scrollIntoViewIfNeeded(); }); - await screenshot.handler( + await screenshotTool.handler( { params: {format: 'png', fullPage: true}, page: context.getSelectedMcpPage(), @@ -159,7 +176,7 @@ describe('screenshot', () => { context.getSelectedMcpPage().textSnapshot = await TextSnapshot.create( context.getSelectedMcpPage(), ); - await screenshot.handler( + await screenshotTool.handler( { params: { format: 'png', @@ -187,7 +204,7 @@ describe('screenshot', () => { const fixture = screenshots.basic; const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); - await screenshot.handler( + await screenshotTool.handler( { params: {format: 'png', filePath}, page: context.getSelectedMcpPage(), @@ -231,7 +248,7 @@ describe('screenshot', () => { const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); await assert.rejects( - screenshot.handler( + screenshotTool.handler( { params: {format: 'png', filePath}, page: context.getSelectedMcpPage(), @@ -258,7 +275,7 @@ describe('screenshot', () => { const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); await assert.rejects( - screenshot.handler( + screenshotTool.handler( { params: {format: 'png', filePath}, page: context.getSelectedMcpPage(), @@ -275,6 +292,141 @@ describe('screenshot', () => { } }); + it('honors screenshotFormat default from CLI args', async () => { + const tool = screenshot({ + screenshotFormat: 'jpeg', + } as ParsedArguments); + await withMcpContext(async (response, context) => { + const fixture = screenshots.basic; + const page = context.getSelectedPptrPage(); + await page.setContent(fixture.html); + // No explicit format passed: zod should apply the CLI-driven default. + await tool.handler( + { + params: {format: tool.schema.format.parse(undefined)}, + page: context.getSelectedMcpPage(), + }, + response, + context, + ); + + assert.equal(response.images.length, 1); + assert.equal(response.images[0].mimeType, 'image/jpeg'); + }); + }); + + it('keeps "png" as default format when no CLI override is set', async () => { + const tool = screenshot({} as ParsedArguments); + assert.equal(tool.schema.format.parse(undefined), 'png'); + }); + + it('downscales viewport screenshot when screenshotMaxWidth is set', async () => { + const tool = screenshot({ + screenshotMaxWidth: 100, + } as ParsedArguments); + await withMcpContext(async (response, context) => { + const page = context.getSelectedPptrPage(); + await page.setViewport({width: 800, height: 600}); + await page.setContent( + html`
`, + ); + + await tool.handler( + {params: {format: 'png'}, page: context.getSelectedMcpPage()}, + response, + context, + ); + + assert.equal(response.images.length, 1); + const buf = Buffer.from(response.images[0].data, 'base64'); + assert.equal(pngWidth(buf), 100); + // Aspect ratio preserved: 800x600 -> 100x75. + assert.equal(pngHeight(buf), 75); + }); + }); + + it('downscales using the smaller scale when both max-width and max-height are set', async () => { + const tool = screenshot({ + screenshotMaxWidth: 400, + screenshotMaxHeight: 60, + } as ParsedArguments); + await withMcpContext(async (response, context) => { + const page = context.getSelectedPptrPage(); + await page.setViewport({width: 800, height: 600}); + await page.setContent( + html`
`, + ); + + await tool.handler( + {params: {format: 'png'}, page: context.getSelectedMcpPage()}, + response, + context, + ); + + const buf = Buffer.from(response.images[0].data, 'base64'); + // height bound dictates: 60/600 = 0.1 -> 80x60. + assert.equal(pngHeight(buf), 60); + assert.equal(pngWidth(buf), 80); + }); + }); + + it('does not resize when source is smaller than the max bounds', async () => { + const tool = screenshot({ + screenshotMaxWidth: 4000, + screenshotMaxHeight: 4000, + } as ParsedArguments); + await withMcpContext(async (response, context) => { + const page = context.getSelectedPptrPage(); + await page.setViewport({width: 800, height: 600}); + await page.setContent(html`
`); + + await tool.handler( + {params: {format: 'png'}, page: context.getSelectedMcpPage()}, + response, + context, + ); + + const buf = Buffer.from(response.images[0].data, 'base64'); + assert.equal(pngWidth(buf), 800); + assert.equal(pngHeight(buf), 600); + }); + }); + + it('downscales full page screenshot when screenshotMaxWidth is set', async () => { + const tool = screenshot({ + screenshotMaxWidth: 200, + } as ParsedArguments); + await withMcpContext(async (response, context) => { + const page = context.getSelectedPptrPage(); + await page.setViewport({width: 800, height: 600}); + await page.setContent( + html`
`, + ); + + await tool.handler( + { + params: {format: 'png', fullPage: true}, + page: context.getSelectedMcpPage(), + }, + response, + context, + ); + + const buf = Buffer.from(response.images[0].data, 'base64'); + // Source is at least 1000x1500; scale = 200/1000 = 0.2 -> ~200x300. + // Allow ±2px to absorb sub-pixel rasterization rounding by Chrome. + assert.equal(pngWidth(buf), 200); + assert.ok( + Math.abs(pngHeight(buf) - 300) <= 2, + `expected height near 300, got ${pngHeight(buf)}`, + ); + }); + }); + it('with malformed filePath', async () => { await withMcpContext(async (response, context) => { // Use a platform-specific invalid character. @@ -286,7 +438,7 @@ describe('screenshot', () => { const page = context.getSelectedPptrPage(); await page.setContent(fixture.html); await assert.rejects( - screenshot.handler( + screenshotTool.handler( { params: {format: 'png', filePath}, page: context.getSelectedMcpPage(),