From a5df5b828543ab6aaa574a2059044eb5f45eb59b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Wed, 18 Mar 2026 14:46:32 +0100 Subject: [PATCH 01/14] feat: add `isRemote` flag and connect option types for remote browser support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Task 1: Type Definitions & LaunchContext `isRemote` Flag ## Goal Add the foundational types and the `isRemote` flag that all other remote browser tasks depend on. ## Dependencies None — this is the foundation task. ## Scope ### 1. Add `isRemote` to `LaunchContext` **File:** `packages/browser-pool/src/launch-context.ts` - Add `isRemote?: boolean` to the `LaunchContextOptions` interface (alongside `id`, `browserPlugin`, etc.) - Add a public readonly `isRemote: boolean` property to the `LaunchContext` class - Set it from constructor options, defaulting to `false` ### 2. Define connect option types on PlaywrightPlugin **File:** `packages/browser-pool/src/playwright/playwright-plugin.ts` Add the following type to the plugin file (or a co-located types file): ```typescript // Mirrors browserType.connectOverCDP(endpointURL, options) interface PlaywrightConnectOverCDPOptions { endpointURL: string; options?: Parameters[1]; } // Mirrors browserType.connect(wsEndpoint, options) interface PlaywrightConnectOptions { wsEndpoint: string; options?: Parameters[1]; } ``` Use the existing `Parameters` utility type pattern (see how `SafeParameters` is used elsewhere in the codebase) — do NOT redefine Playwright's types manually. ### 3. Define connect option types on PuppeteerPlugin **File:** `packages/browser-pool/src/puppeteer/puppeteer-plugin.ts` ```typescript // Mirrors puppeteer.connect({ browserWSEndpoint, ...rest }) // Flat object matching Puppeteer's ConnectOptions type PuppeteerConnectOverCDPOptions = Parameters[0]; ``` Use the `Parameters` pattern to extract the type from Puppeteer's `connect` method. ### 4. Add connect option fields to `BrowserPluginOptions` **File:** `packages/browser-pool/src/abstract-classes/browser-plugin.ts` This is a design choice — the PRD says connect options live on the plugin subclass, not on `LaunchContext`. Add the fields to the plugin options type so they flow through the constructor: - `PlaywrightPlugin` options should accept `connectOptions?` and `connectOverCDPOptions?` - `PuppeteerPlugin` options should accept `connectOverCDPOptions?` These can be added to subclass-specific option types rather than the base `BrowserPluginOptions`. ### 5. Add connect option fields to launcher-level interfaces **File:** `packages/playwright-crawler/src/internals/playwright-launcher.ts` Add to `PlaywrightLaunchContext`: ```typescript connectOptions?: PlaywrightConnectOptions; connectOverCDPOptions?: PlaywrightConnectOverCDPOptions; ``` **File:** `packages/puppeteer-crawler/src/internals/puppeteer-launcher.ts` Add to `PuppeteerLaunchContext`: ```typescript connectOverCDPOptions?: PuppeteerConnectOverCDPOptions; ``` This enables IDE autocomplete when users configure `launchContext` on the crawler. ### 6. Export new types **File:** `packages/browser-pool/src/index.ts` Export the new connect option types so they're available to consumers. ## Key Files | File | Change | |------|--------| | `packages/browser-pool/src/launch-context.ts` | Add `isRemote` option + property | | `packages/browser-pool/src/playwright/playwright-plugin.ts` | Add connect option types | | `packages/browser-pool/src/puppeteer/puppeteer-plugin.ts` | Add connect option type | | `packages/playwright-crawler/src/internals/playwright-launcher.ts` | Add connect options to `PlaywrightLaunchContext` | | `packages/puppeteer-crawler/src/internals/puppeteer-launcher.ts` | Add connect options to `PuppeteerLaunchContext` | | `packages/browser-pool/src/index.ts` | Export new types | | `packages/browser-crawler/src/internals/browser-launcher.ts` | May need connect options on `BrowserLaunchContext` base | ## Acceptance Criteria - [x] `LaunchContext` has `isRemote` boolean property, defaults to `false` - [x] Connect option types are defined using library `Parameters` extraction (not manual redefinition) - [x] `PlaywrightLaunchContext` shows `connectOptions` and `connectOverCDPOptions` in IDE autocomplete - [x] `PuppeteerLaunchContext` shows `connectOverCDPOptions` in IDE autocomplete - [x] New types are exported from `@crawlee/browser-pool` - [x] TypeScript compiles with no errors Co-Authored-By: Claude Opus 4.6 --- packages/browser-pool/src/launch-context.ts | 9 ++++++ .../src/playwright/playwright-plugin.ts | 29 ++++++++++++++++++- .../src/puppeteer/puppeteer-plugin.ts | 20 ++++++++++++- .../src/internals/playwright-launcher.ts | 15 ++++++++++ .../src/internals/puppeteer-launcher.ts | 8 +++++ 5 files changed, 79 insertions(+), 2 deletions(-) diff --git a/packages/browser-pool/src/launch-context.ts b/packages/browser-pool/src/launch-context.ts index 9ae847634b51..47029904a8f9 100644 --- a/packages/browser-pool/src/launch-context.ts +++ b/packages/browser-pool/src/launch-context.ts @@ -57,6 +57,12 @@ export interface LaunchContextOptions< * This is useful when using HTTPS proxies with self-signed certificates. */ ignoreProxyCertificate?: boolean; + /** + * Whether this launch context represents a connection to a remote browser + * rather than a locally launched one. + * @default false + */ + isRemote?: boolean; } export class LaunchContext< @@ -73,6 +79,7 @@ export class LaunchContext< browserPerProxy?: boolean; userDataDir: string; proxyTier?: number; + readonly isRemote: boolean; ignoreProxyCertificate?: boolean; private _proxyUrl?: string; @@ -92,6 +99,7 @@ export class LaunchContext< userDataDir = '', proxyTier, ignoreProxyCertificate, + isRemote, } = options; this.id = id; @@ -102,6 +110,7 @@ export class LaunchContext< this.userDataDir = userDataDir; this.proxyTier = proxyTier; this.ignoreProxyCertificate = ignoreProxyCertificate ?? false; + this.isRemote = isRemote ?? false; this._proxyUrl = proxyUrl; } diff --git a/packages/browser-pool/src/playwright/playwright-plugin.ts b/packages/browser-pool/src/playwright/playwright-plugin.ts index a48cf1fedfec..7cd4d33bea60 100644 --- a/packages/browser-pool/src/playwright/playwright-plugin.ts +++ b/packages/browser-pool/src/playwright/playwright-plugin.ts @@ -2,7 +2,7 @@ import fs from 'node:fs'; import type { Browser as PlaywrightBrowser, BrowserType } from 'playwright'; -import { BrowserPlugin } from '../abstract-classes/browser-plugin.js'; +import { BrowserPlugin, type BrowserPluginOptions } from '../abstract-classes/browser-plugin.js'; import { anonymizeProxySugar } from '../anonymize-proxy.js'; import type { createProxyServerForContainers } from '../container-proxy-server.js'; import type { LaunchContext } from '../launch-context.js'; @@ -11,6 +11,23 @@ import type { SafeParameters } from '../utils.js'; import { PlaywrightBrowser as PlaywrightBrowserWithPersistentContext } from './playwright-browser.js'; import { PlaywrightController } from './playwright-controller.js'; +/** + * Options for connecting to a remote browser via CDP. + * Mirrors `browserType.connectOverCDP(options)`. + */ +export type PlaywrightConnectOverCDPOptions = Parameters[0]; + +/** + * Options for connecting to a remote browser via WebSocket. + * Mirrors `browserType.connect(options)`. + */ +export type PlaywrightConnectOptions = Parameters[0]; + +export interface PlaywrightPluginOptions extends BrowserPluginOptions[0]> { + connectOptions?: PlaywrightConnectOptions; + connectOverCDPOptions?: PlaywrightConnectOverCDPOptions; +} + export class PlaywrightPlugin extends BrowserPlugin< BrowserType, SafeParameters[0], @@ -19,6 +36,16 @@ export class PlaywrightPlugin extends BrowserPlugin< private _browserVersion?: string; _containerProxyServer?: Awaited>; + connectOptions?: PlaywrightConnectOptions; + connectOverCDPOptions?: PlaywrightConnectOverCDPOptions; + + constructor(library: BrowserType, options: PlaywrightPluginOptions = {}) { + const { connectOptions, connectOverCDPOptions, ...baseOptions } = options; + super(library, baseOptions); + this.connectOptions = connectOptions; + this.connectOverCDPOptions = connectOverCDPOptions; + } + protected async _launch(launchContext: LaunchContext): Promise { const { launchOptions, useIncognitoPages, userDataDir, proxyUrl } = launchContext; let browser: PlaywrightBrowser; diff --git a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts index 91ea817d03a9..b1143d7efaa7 100644 --- a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts +++ b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts @@ -4,7 +4,7 @@ import type { Dictionary } from '@crawlee/types'; import type Puppeteer from 'puppeteer'; import type * as PuppeteerTypes from 'puppeteer'; -import { BrowserPlugin } from '../abstract-classes/browser-plugin.js'; +import { BrowserPlugin, type BrowserPluginOptions } from '../abstract-classes/browser-plugin.js'; import { anonymizeProxySugar } from '../anonymize-proxy.js'; import type { LaunchContext } from '../launch-context.js'; import { noop } from '../utils.js'; @@ -13,12 +13,30 @@ import { PuppeteerController } from './puppeteer-controller.js'; const PROXY_SERVER_ARG = '--proxy-server='; +/** + * Options for connecting to a remote browser via Puppeteer. + * Flat object matching Puppeteer's `ConnectOptions`. + */ +export type PuppeteerConnectOverCDPOptions = Parameters<(typeof Puppeteer)['connect']>[0]; + +export interface PuppeteerPluginOptions extends BrowserPluginOptions { + connectOverCDPOptions?: PuppeteerConnectOverCDPOptions; +} + export class PuppeteerPlugin extends BrowserPlugin< typeof Puppeteer, PuppeteerTypes.LaunchOptions, PuppeteerTypes.Browser, PuppeteerNewPageOptions > { + connectOverCDPOptions?: PuppeteerConnectOverCDPOptions; + + constructor(library: typeof Puppeteer, options: PuppeteerPluginOptions = {}) { + const { connectOverCDPOptions, ...baseOptions } = options; + super(library, baseOptions); + this.connectOverCDPOptions = connectOverCDPOptions; + } + protected async _launch( launchContext: LaunchContext< typeof Puppeteer, diff --git a/packages/playwright-crawler/src/internals/playwright-launcher.ts b/packages/playwright-crawler/src/internals/playwright-launcher.ts index a8bf13c86c50..5c93468d63f3 100644 --- a/packages/playwright-crawler/src/internals/playwright-launcher.ts +++ b/packages/playwright-crawler/src/internals/playwright-launcher.ts @@ -1,6 +1,7 @@ import type { BrowserLaunchContext } from '@crawlee/browser'; import { BrowserLauncher, Configuration } from '@crawlee/browser'; import { PlaywrightPlugin } from '@crawlee/browser-pool'; +import type { PlaywrightConnectOptions, PlaywrightConnectOverCDPOptions } from '@crawlee/browser-pool'; import ow from 'ow'; import type { Browser, BrowserType, LaunchOptions } from 'playwright'; @@ -70,6 +71,18 @@ export interface PlaywrightLaunchContext extends BrowserLaunchContext { ...BrowserLauncher.optionsShape, launcher: ow.optional.object, launchContextOptions: ow.optional.object, + connectOptions: ow.optional.object, + connectOverCDPOptions: ow.optional.object, }; /** diff --git a/packages/puppeteer-crawler/src/internals/puppeteer-launcher.ts b/packages/puppeteer-crawler/src/internals/puppeteer-launcher.ts index 3d46c30dd432..5e8333083e32 100644 --- a/packages/puppeteer-crawler/src/internals/puppeteer-launcher.ts +++ b/packages/puppeteer-crawler/src/internals/puppeteer-launcher.ts @@ -1,6 +1,7 @@ import type { BrowserLaunchContext } from '@crawlee/browser'; import { BrowserLauncher, Configuration } from '@crawlee/browser'; import { PuppeteerPlugin } from '@crawlee/browser-pool'; +import type { PuppeteerConnectOverCDPOptions } from '@crawlee/browser-pool'; import ow from 'ow'; import type { Browser } from 'puppeteer'; @@ -65,6 +66,12 @@ export interface PuppeteerLaunchContext extends BrowserLaunchContext protected static override optionsShape = { ...BrowserLauncher.optionsShape, launcher: ow.optional.object, + connectOverCDPOptions: ow.optional.object, }; /** From b012525db7e9d539bd14c65ecf925137385fa56e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Wed, 18 Mar 2026 15:18:31 +0100 Subject: [PATCH 02/14] feat: add PlaywrightPlugin remote connection routing via `connect()` and `connectOverCDP()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Task 2: PlaywrightPlugin Remote Connection Routing ## Goal Make `PlaywrightPlugin._launch()` branch to `connect()` or `connectOverCDP()` when remote connection options are present, instead of calling `launch()`. ## Dependencies - Task 1 (types and `isRemote` flag) ## Scope ### 1. Store connect options on the plugin instance **File:** `packages/browser-pool/src/playwright/playwright-plugin.ts` - Accept `connectOptions` and `connectOverCDPOptions` in the constructor options - Store them as instance properties - **Validation:** If both `connectOptions` AND `connectOverCDPOptions` are provided, throw an error immediately in the constructor: ``` Cannot set both 'connectOptions' and 'connectOverCDPOptions' — pick one protocol. ``` ### 2. Branch in `_launch()` for remote connections **File:** `packages/browser-pool/src/playwright/playwright-plugin.ts` In the existing `_launch()` method (currently lines 22-102), add branching logic **before** the existing local launch code: ```typescript protected async _launch(launchContext: LaunchContext<...>): Promise { // Remote CDP connection if (this.connectOverCDPOptions) { const { endpointURL, options } = this.connectOverCDPOptions; const browser = await browserType.connectOverCDP(endpointURL, options); return browser; } // Remote Playwright WebSocket connection if (this.connectOptions) { const { wsEndpoint, options } = this.connectOptions; const browser = await browserType.connect(wsEndpoint, options); return browser; } // Existing local launch logic... } ``` **Reference:** See `StagehandPlugin._launch()` at `packages/stagehand-crawler/src/internals/stagehand-plugin.ts:102-107` for the CDP connection pattern: ```typescript const cdpUrl = await stagehand.connectURL(); const browser = await chromium.connectOverCDP(cdpUrl); ``` ### 3. Set `isRemote` on LaunchContext **File:** `packages/browser-pool/src/playwright/playwright-plugin.ts` In `createLaunchContext()` (or wherever the plugin creates the LaunchContext), pass `isRemote: true` when connect options are present. This can be done by overriding `createLaunchContext()` in the subclass, or by passing it through the options. Check how the base `BrowserPlugin.createLaunchContext()` works (at `packages/browser-pool/src/abstract-classes/browser-plugin.ts:149-174`) and determine the best insertion point. ## Key Design Decisions - **No new abstract method:** The routing happens inside `_launch()` via internal branching, not a new `_connect()` method. This keeps the abstract interface unchanged and doesn't affect custom plugins like StagehandPlugin. - **`browser.close()` for cleanup:** Remote browsers are closed the same way as local browsers — via `browser.close()`. No special disconnect handling. - **No proxy server setup for remote:** The remote branch skips the local proxy server setup that exists in the current `_launch()` code. ## Key Files | File | Change | |------|--------| | `packages/browser-pool/src/playwright/playwright-plugin.ts` | Constructor stores options, `_launch()` branches for remote | ## Acceptance Criteria - [x] `PlaywrightPlugin` accepts `connectOptions` in constructor and calls `browserType.connect()` with `wsEndpoint` and `options` - [x] `PlaywrightPlugin` accepts `connectOverCDPOptions` in constructor and calls `browserType.connectOverCDP()` with `endpointURL` and `options` - [x] Setting both `connectOptions` and `connectOverCDPOptions` throws an error - [x] `launchContext.isRemote` is `true` when connect options are present - [x] Remote branch skips local proxy server setup and persistent context logic - [x] TypeScript compiles with no errors Co-Authored-By: Claude Opus 4.6 --- .../src/playwright/playwright-plugin.ts | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/packages/browser-pool/src/playwright/playwright-plugin.ts b/packages/browser-pool/src/playwright/playwright-plugin.ts index 7cd4d33bea60..c99f1680b2c2 100644 --- a/packages/browser-pool/src/playwright/playwright-plugin.ts +++ b/packages/browser-pool/src/playwright/playwright-plugin.ts @@ -2,7 +2,11 @@ import fs from 'node:fs'; import type { Browser as PlaywrightBrowser, BrowserType } from 'playwright'; -import { BrowserPlugin, type BrowserPluginOptions } from '../abstract-classes/browser-plugin.js'; +import { + BrowserPlugin, + type BrowserPluginOptions, + type CreateLaunchContextOptions, +} from '../abstract-classes/browser-plugin.js'; import { anonymizeProxySugar } from '../anonymize-proxy.js'; import type { createProxyServerForContainers } from '../container-proxy-server.js'; import type { LaunchContext } from '../launch-context.js'; @@ -41,12 +45,34 @@ export class PlaywrightPlugin extends BrowserPlugin< constructor(library: BrowserType, options: PlaywrightPluginOptions = {}) { const { connectOptions, connectOverCDPOptions, ...baseOptions } = options; + + if (connectOptions && connectOverCDPOptions) { + throw new Error("Cannot set both 'connectOptions' and 'connectOverCDPOptions' — pick one protocol."); + } + super(library, baseOptions); this.connectOptions = connectOptions; this.connectOverCDPOptions = connectOverCDPOptions; } + override createLaunchContext(options: CreateLaunchContextOptions = {}): LaunchContext { + return super.createLaunchContext({ + ...options, + isRemote: options.isRemote ?? !!(this.connectOptions || this.connectOverCDPOptions), + }); + } + protected async _launch(launchContext: LaunchContext): Promise { + // Remote CDP connection — skip all local launch/proxy logic + if (this.connectOverCDPOptions) { + return this.library.connectOverCDP(this.connectOverCDPOptions); + } + + // Remote Playwright WebSocket connection — skip all local launch/proxy logic + if (this.connectOptions) { + return this.library.connect(this.connectOptions); + } + const { launchOptions, useIncognitoPages, userDataDir, proxyUrl } = launchContext; let browser: PlaywrightBrowser; From 0e8381267e8a97a263899e326daca7754387bea6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Wed, 18 Mar 2026 15:23:37 +0100 Subject: [PATCH 03/14] feat: add PuppeteerPlugin remote connection routing via `puppeteer.connect()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Task 3: PuppeteerPlugin Remote Connection Routing ## Goal Make `PuppeteerPlugin._launch()` branch to `puppeteer.connect()` when remote connection options (CDP) are present, instead of calling `puppeteer.launch()`. ## Dependencies - Task 1 (types and `isRemote` flag) ## Scope ### 1. Store connect options on the plugin instance **File:** `packages/browser-pool/src/puppeteer/puppeteer-plugin.ts` - Accept `connectOverCDPOptions` in the constructor options - Store as an instance property - Puppeteer only supports CDP — there is no `connectOptions` field (Playwright-only) ### 2. Branch in `_launch()` for remote connections **File:** `packages/browser-pool/src/puppeteer/puppeteer-plugin.ts` In the existing `_launch()` method (currently lines 22-203), add branching logic **before** the existing local launch code: ```typescript protected async _launch(launchContext: LaunchContext<...>): Promise { // Remote CDP connection if (this.connectOverCDPOptions) { const browser = await puppeteer.connect(this.connectOverCDPOptions); // Wrap with the same Proxy handler for newPage() interception // (see existing code at lines 138-200) return wrappedBrowser; } // Existing local launch logic... } ``` **Important:** Puppeteer's `connect()` takes a flat options object: `puppeteer.connect({ browserWSEndpoint, ...rest })`. This is different from Playwright's two-argument pattern. The type should match Puppeteer's `ConnectOptions`. ### 3. Handle the `newPage()` Proxy wrapper for remote The existing `_launch()` wraps the browser in a `Proxy` that intercepts `newPage()` calls to support `useIncognitoPages` (lines 138-200). This proxy wrapper should also be applied to remote browsers so that incognito context creation works correctly. ### 4. Set `isRemote` on LaunchContext Same pattern as Task 2 — pass `isRemote: true` when `connectOverCDPOptions` is present. ## Key Design Decisions - **Flat options object:** Puppeteer's `connect()` API takes a single options object (not `endpointURL, options` like Playwright). The `connectOverCDPOptions` type matches this flat shape directly. - **`browser.close()` for cleanup:** Same as Playwright — remote browsers closed via `browser.close()`, not `browser.disconnect()`. - **`newPage()` proxy still needed:** The Proxy wrapper that intercepts `newPage()` to create incognito contexts must still wrap remote browsers. ## Key Files | File | Change | |------|--------| | `packages/browser-pool/src/puppeteer/puppeteer-plugin.ts` | Constructor stores options, `_launch()` branches for remote | ## Acceptance Criteria - [x] `PuppeteerPlugin` accepts `connectOverCDPOptions` in constructor and calls `puppeteer.connect()` with the options object - [x] The `newPage()` Proxy wrapper is applied to remote browsers (for incognito support) - [x] `launchContext.isRemote` is `true` when connect options are present - [x] Remote branch skips user data directory setup, headless handling, and other local-only logic - [x] TypeScript compiles with no errors Co-Authored-By: Claude Opus 4.6 --- .../src/puppeteer/puppeteer-plugin.ts | 118 ++++++++++-------- 1 file changed, 68 insertions(+), 50 deletions(-) diff --git a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts index b1143d7efaa7..a3ea69461662 100644 --- a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts +++ b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts @@ -4,7 +4,11 @@ import type { Dictionary } from '@crawlee/types'; import type Puppeteer from 'puppeteer'; import type * as PuppeteerTypes from 'puppeteer'; -import { BrowserPlugin, type BrowserPluginOptions } from '../abstract-classes/browser-plugin.js'; +import { + BrowserPlugin, + type BrowserPluginOptions, + type CreateLaunchContextOptions, +} from '../abstract-classes/browser-plugin.js'; import { anonymizeProxySugar } from '../anonymize-proxy.js'; import type { LaunchContext } from '../launch-context.js'; import { noop } from '../utils.js'; @@ -37,6 +41,20 @@ export class PuppeteerPlugin extends BrowserPlugin< this.connectOverCDPOptions = connectOverCDPOptions; } + override createLaunchContext( + options: CreateLaunchContextOptions< + typeof Puppeteer, + PuppeteerTypes.LaunchOptions, + PuppeteerTypes.Browser, + PuppeteerNewPageOptions + > = {}, + ): LaunchContext { + return super.createLaunchContext({ + ...options, + isRemote: options.isRemote ?? !!this.connectOverCDPOptions, + }); + } + protected async _launch( launchContext: LaunchContext< typeof Puppeteer, @@ -56,67 +74,67 @@ export class PuppeteerPlugin extends BrowserPlugin< // ignore } - const { - launchOptions, - userDataDir, - useIncognitoPages, - experimentalContainers, - proxyUrl, - ignoreProxyCertificate, - } = launchContext; - - if (experimentalContainers) { - throw new Error('Experimental containers are only available with Playwright'); - } - - launchOptions!.userDataDir = launchOptions!.userDataDir ?? userDataDir; - - if (launchOptions!.headless === false) { - if (Array.isArray(launchOptions!.args)) { - launchOptions!.args.push('--disable-site-isolation-trials'); - } else { - launchOptions!.args = ['--disable-site-isolation-trials']; - } - } - - if (launchOptions!.headless === true && oldPuppeteerVersion) { - launchOptions!.headless = 'new' as any; - } + const { useIncognitoPages, proxyUrl, ignoreProxyCertificate } = launchContext; let browser: PuppeteerTypes.Browser; - { - const [anonymizedProxyUrl, close] = await anonymizeProxySugar(proxyUrl, undefined, undefined, { - ignoreProxyCertificate: launchContext.ignoreProxyCertificate, - }); + if (this.connectOverCDPOptions) { + // Remote CDP connection — skip local launch/proxy/headless logic + browser = await this.library.connect(this.connectOverCDPOptions); + } else { + const { launchOptions, userDataDir, experimentalContainers } = launchContext; + + if (experimentalContainers) { + throw new Error('Experimental containers are only available with Playwright'); + } - if (proxyUrl) { - const proxyArg = `${PROXY_SERVER_ARG}${anonymizedProxyUrl ?? proxyUrl}`; + launchOptions!.userDataDir = launchOptions!.userDataDir ?? userDataDir; + if (launchOptions!.headless === false) { if (Array.isArray(launchOptions!.args)) { - launchOptions!.args.push(proxyArg); + launchOptions!.args.push('--disable-site-isolation-trials'); } else { - launchOptions!.args = [proxyArg]; + launchOptions!.args = ['--disable-site-isolation-trials']; } } - try { - browser = await this.library.launch(launchOptions); + if (launchOptions!.headless === true && oldPuppeteerVersion) { + launchOptions!.headless = 'new' as any; + } - if (anonymizedProxyUrl) { - browser.on('disconnected', async () => { - await close(); - }); + { + const [anonymizedProxyUrl, close] = await anonymizeProxySugar(proxyUrl, undefined, undefined, { + ignoreProxyCertificate: launchContext.ignoreProxyCertificate, + }); + + if (proxyUrl) { + const proxyArg = `${PROXY_SERVER_ARG}${anonymizedProxyUrl ?? proxyUrl}`; + + if (Array.isArray(launchOptions!.args)) { + launchOptions!.args.push(proxyArg); + } else { + launchOptions!.args = [proxyArg]; + } + } + + try { + browser = await this.library.launch(launchOptions); + + if (anonymizedProxyUrl) { + browser.on('disconnected', async () => { + await close(); + }); + } + } catch (error: any) { + await close(); + + this._throwAugmentedLaunchError( + error, + launchContext.launchOptions?.executablePath, + '`apify/actor-node-puppeteer-chrome`', + "Try installing a browser, if it's missing, by running `npx @puppeteer/browsers install chromium --path [path]` and pointing `executablePath` to the downloaded executable (https://pptr.dev/browsers-api)", + ); } - } catch (error: any) { - await close(); - - this._throwAugmentedLaunchError( - error, - launchContext.launchOptions?.executablePath, - '`apify/actor-node-puppeteer-chrome`', - "Try installing a browser, if it's missing, by running `npx @puppeteer/browsers install chromium --path [path]` and pointing `executablePath` to the downloaded executable (https://pptr.dev/browsers-api)", - ); } } From 29e7aa407aeec7fb7605dd7ddb5faa04a5c99e38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 19 Mar 2026 10:42:10 +0100 Subject: [PATCH 04/14] feat: skip proxy/webdriver hiding for remote browsers, add remote connection logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Goal Make `BrowserPlugin.launch()` skip proxy injection and webdriver hiding when `launchContext.isRemote` is `true`, since these operations modify `launchOptions` which are not used for remote connections. ## Dependencies - Task 1 (`isRemote` flag on LaunchContext) ## Scope ### 1. Skip `_addProxyToLaunchOptions()` for remote **File:** `packages/browser-pool/src/abstract-classes/browser-plugin.ts` In the `launch()` method, the call to `_addProxyToLaunchOptions()` is now gated on `!isRemote`: ```typescript if (launchContext.proxyUrl && !launchContext.isRemote) { await this._addProxyToLaunchOptions(launchContext); } ``` ### 2. Skip `_mergeArgsToHideWebdriver()` for remote ```typescript if (!launchContext.isRemote && this._isChromiumBasedBrowser(launchContext)) { this._mergeArgsToHideWebdriver(launchContext); } ``` ### 3. No changes to `_addProxyToLaunchOptions()` or `_mergeArgsToHideWebdriver()` themselves The methods remain unchanged — the skip logic lives in the calling `launch()` method. ## Key Design Decisions - **Skip at call site, not in the methods** - **`proxyUrl` + remote triggers a warning:** Handled in Task 6 (Warnings) - **Fingerprinting hooks are unchanged** ## Additional - Fixed `isRemote` not being passed through base class `createLaunchContext()` - Added info-level logs for remote connections in base class and both plugins Co-Authored-By: Claude Opus 4.6 --- .../src/abstract-classes/browser-plugin.ts | 10 ++++++++-- .../browser-pool/src/playwright/playwright-plugin.ts | 2 ++ .../browser-pool/src/puppeteer/puppeteer-plugin.ts | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/packages/browser-pool/src/abstract-classes/browser-plugin.ts b/packages/browser-pool/src/abstract-classes/browser-plugin.ts index bc75ff2fde41..aede527bb847 100644 --- a/packages/browser-pool/src/abstract-classes/browser-plugin.ts +++ b/packages/browser-pool/src/abstract-classes/browser-plugin.ts @@ -158,6 +158,7 @@ export abstract class BrowserPlugin< browserPerProxy = this.browserPerProxy, ignoreProxyCertificate = this.ignoreProxyCertificate, proxyTier, + isRemote, } = options; return new LaunchContext({ @@ -170,6 +171,7 @@ export abstract class BrowserPlugin< browserPerProxy, ignoreProxyCertificate, proxyTier, + isRemote, }); } @@ -197,11 +199,11 @@ export abstract class BrowserPlugin< const { proxyUrl, launchOptions } = launchContext; - if (proxyUrl) { + if (proxyUrl && !launchContext.isRemote) { await this._addProxyToLaunchOptions(launchContext); } - if (this._isChromiumBasedBrowser(launchContext)) { + if (!launchContext.isRemote && this._isChromiumBasedBrowser(launchContext)) { // This will set the args for chromium based browsers to hide the webdriver. (launchOptions as Dictionary).args = this._mergeArgsToHideWebdriver(launchOptions!.args); // When User-Agent is not set, and we're using Chromium in headless mode, @@ -213,6 +215,10 @@ export abstract class BrowserPlugin< } } + if (launchContext.isRemote) { + this.log.info('Connecting to remote browser (skipping local proxy and webdriver stealth configuration).'); + } + return this._launch(launchContext); } diff --git a/packages/browser-pool/src/playwright/playwright-plugin.ts b/packages/browser-pool/src/playwright/playwright-plugin.ts index c99f1680b2c2..a4d5e41c5170 100644 --- a/packages/browser-pool/src/playwright/playwright-plugin.ts +++ b/packages/browser-pool/src/playwright/playwright-plugin.ts @@ -65,11 +65,13 @@ export class PlaywrightPlugin extends BrowserPlugin< protected async _launch(launchContext: LaunchContext): Promise { // Remote CDP connection — skip all local launch/proxy logic if (this.connectOverCDPOptions) { + this.log.info('Connecting to remote browser via connectOverCDP.'); return this.library.connectOverCDP(this.connectOverCDPOptions); } // Remote Playwright WebSocket connection — skip all local launch/proxy logic if (this.connectOptions) { + this.log.info('Connecting to remote browser via connect (Playwright WebSocket).'); return this.library.connect(this.connectOptions); } diff --git a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts index a3ea69461662..5cc898bc307b 100644 --- a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts +++ b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts @@ -80,6 +80,7 @@ export class PuppeteerPlugin extends BrowserPlugin< if (this.connectOverCDPOptions) { // Remote CDP connection — skip local launch/proxy/headless logic + this.log.info('Connecting to remote browser via connect (CDP).'); browser = await this.library.connect(this.connectOverCDPOptions); } else { const { launchOptions, userDataDir, experimentalContainers } = launchContext; From ed86761bec75426c806c759414f71d11db4d86c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 19 Mar 2026 10:49:44 +0100 Subject: [PATCH 05/14] fix: require endpoint in connect options, use non-deprecated Playwright overloads Playwright: change PlaywrightConnectOverCDPOptions and PlaywrightConnectOptions from type aliases (all-optional fields) to interfaces with required `wsEndpoint`. Use the non-deprecated two-argument overloads in _launch(). Puppeteer: add runtime guard that throws if neither `browserWSEndpoint` nor `browserURL` is provided in connectOverCDPOptions. Co-Authored-By: Claude Opus 4.6 --- .../src/playwright/playwright-plugin.ts | 22 +++++++++++++------ .../src/puppeteer/puppeteer-plugin.ts | 3 +++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/packages/browser-pool/src/playwright/playwright-plugin.ts b/packages/browser-pool/src/playwright/playwright-plugin.ts index a4d5e41c5170..199e468cea8c 100644 --- a/packages/browser-pool/src/playwright/playwright-plugin.ts +++ b/packages/browser-pool/src/playwright/playwright-plugin.ts @@ -1,6 +1,6 @@ import fs from 'node:fs'; -import type { Browser as PlaywrightBrowser, BrowserType } from 'playwright'; +import type { Browser as PlaywrightBrowser, BrowserType, ConnectOverCDPOptions, ConnectOptions } from 'playwright'; import { BrowserPlugin, @@ -17,15 +17,21 @@ import { PlaywrightController } from './playwright-controller.js'; /** * Options for connecting to a remote browser via CDP. - * Mirrors `browserType.connectOverCDP(options)`. + * Mirrors `browserType.connectOverCDP(endpointURL, options?)`. */ -export type PlaywrightConnectOverCDPOptions = Parameters[0]; +export interface PlaywrightConnectOverCDPOptions extends ConnectOverCDPOptions { + /** The CDP endpoint URL to connect to (required). */ + wsEndpoint: string; +} /** * Options for connecting to a remote browser via WebSocket. - * Mirrors `browserType.connect(options)`. + * Mirrors `browserType.connect(wsEndpoint, options?)`. */ -export type PlaywrightConnectOptions = Parameters[0]; +export interface PlaywrightConnectOptions extends ConnectOptions { + /** The WebSocket endpoint URL to connect to (required). */ + wsEndpoint: string; +} export interface PlaywrightPluginOptions extends BrowserPluginOptions[0]> { connectOptions?: PlaywrightConnectOptions; @@ -66,13 +72,15 @@ export class PlaywrightPlugin extends BrowserPlugin< // Remote CDP connection — skip all local launch/proxy logic if (this.connectOverCDPOptions) { this.log.info('Connecting to remote browser via connectOverCDP.'); - return this.library.connectOverCDP(this.connectOverCDPOptions); + const { wsEndpoint, ...options } = this.connectOverCDPOptions; + return this.library.connectOverCDP(wsEndpoint, options); } // Remote Playwright WebSocket connection — skip all local launch/proxy logic if (this.connectOptions) { this.log.info('Connecting to remote browser via connect (Playwright WebSocket).'); - return this.library.connect(this.connectOptions); + const { wsEndpoint, ...options } = this.connectOptions; + return this.library.connect(wsEndpoint, options); } const { launchOptions, useIncognitoPages, userDataDir, proxyUrl } = launchContext; diff --git a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts index 5cc898bc307b..b3fd07f224e6 100644 --- a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts +++ b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts @@ -80,6 +80,9 @@ export class PuppeteerPlugin extends BrowserPlugin< if (this.connectOverCDPOptions) { // Remote CDP connection — skip local launch/proxy/headless logic + if (!this.connectOverCDPOptions.browserWSEndpoint && !this.connectOverCDPOptions.browserURL) { + throw new Error("connectOverCDPOptions must include either 'browserWSEndpoint' or 'browserURL'."); + } this.log.info('Connecting to remote browser via connect (CDP).'); browser = await this.library.connect(this.connectOverCDPOptions); } else { From bd19911cef73b00090932fcb70919a95d99e4d31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 19 Mar 2026 10:55:00 +0100 Subject: [PATCH 06/14] feat: default `useIncognitoPages` to `true` for remote browser connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Task 5: `useIncognitoPages` Defaults to `true` for Remote ## Goal When remote connection options are present and `useIncognitoPages` was not explicitly set by the user, default it to `true` and log an info message. If the user explicitly sets `false`, log a warning. ## Dependencies - Task 2 (PlaywrightPlugin stores connect options) - Task 3 (PuppeteerPlugin stores connect options) ## Scope ### 1. Preserve `undefined` vs `false` in base constructor The base `BrowserPlugin` constructor currently collapses `useIncognitoPages` to `false`. The subclass checks `options.useIncognitoPages` directly (preserves `undefined`) and overrides after `super()`. ### 2. Override default in PlaywrightPlugin constructor After the `super()` call, if connect options are present: - `undefined` → set to `true`, info log - `false` → warning log - `true` → no extra log ### 3. Override default in PuppeteerPlugin constructor Same logic, checking `connectOverCDPOptions`. ## Key Design Decisions - **Info vs warning:** Defaulting to `true` is an info message (expected behavior). Explicit `false` is a warning (user should understand implications). - **`useIncognitoPages: false` + `connect()` is not special-cased:** The warning covers this case — no additional error or fallback. - **Uses existing `this.log`:** All logging uses the inherited `BrowserPlugin.log` logger. ## Acceptance Criteria - [x] When `connectOptions` or `connectOverCDPOptions` is set and `useIncognitoPages` is not provided → defaults to `true`, info message logged - [x] When `connectOptions` or `connectOverCDPOptions` is set and `useIncognitoPages: false` → stays `false`, warning logged - [x] When `connectOptions` or `connectOverCDPOptions` is set and `useIncognitoPages: true` → stays `true`, no extra log - [x] When no connect options are set → existing behavior unchanged - [x] Base constructor preserves `undefined` vs `false` distinction Co-Authored-By: Claude Opus 4.6 --- .../browser-pool/src/playwright/playwright-plugin.ts | 12 ++++++++++++ .../browser-pool/src/puppeteer/puppeteer-plugin.ts | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/packages/browser-pool/src/playwright/playwright-plugin.ts b/packages/browser-pool/src/playwright/playwright-plugin.ts index 199e468cea8c..c745e7784537 100644 --- a/packages/browser-pool/src/playwright/playwright-plugin.ts +++ b/packages/browser-pool/src/playwright/playwright-plugin.ts @@ -59,6 +59,18 @@ export class PlaywrightPlugin extends BrowserPlugin< super(library, baseOptions); this.connectOptions = connectOptions; this.connectOverCDPOptions = connectOverCDPOptions; + + if (this.connectOptions || this.connectOverCDPOptions) { + if (options.useIncognitoPages === undefined) { + this.useIncognitoPages = true; + this.log.info('Remote browser detected — defaulting useIncognitoPages to true for session isolation.'); + } else if (options.useIncognitoPages === false) { + this.log.warning( + 'useIncognitoPages is set to false with a remote browser connection. ' + + 'Pages will share cookies and storage on the remote browser instance.', + ); + } + } } override createLaunchContext(options: CreateLaunchContextOptions = {}): LaunchContext { diff --git a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts index b3fd07f224e6..0e93a93e9f83 100644 --- a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts +++ b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts @@ -39,6 +39,18 @@ export class PuppeteerPlugin extends BrowserPlugin< const { connectOverCDPOptions, ...baseOptions } = options; super(library, baseOptions); this.connectOverCDPOptions = connectOverCDPOptions; + + if (this.connectOverCDPOptions) { + if (options.useIncognitoPages === undefined) { + this.useIncognitoPages = true; + this.log.info('Remote browser detected — defaulting useIncognitoPages to true for session isolation.'); + } else if (options.useIncognitoPages === false) { + this.log.warning( + 'useIncognitoPages is set to false with a remote browser connection. ' + + 'Pages will share cookies and storage on the remote browser instance.', + ); + } + } } override createLaunchContext( From 373da361280860e7e151cd5e7e43efb38dbc57aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 19 Mar 2026 11:06:35 +0100 Subject: [PATCH 07/14] fix: improve remote connection error handling and endpoint validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename PlaywrightConnectOverCDPOptions.wsEndpoint → endpointURL to match Playwright's own terminology and avoid field conflict with inherited ConnectOverCDPOptions.endpointURL - Wrap connectOverCDP() and connect() failures with BrowserLaunchError including sanitized endpoint URL (credentials stripped) and actionable guidance - Move endpoint validation to constructors (fail fast) — Playwright validates endpointURL and wsEndpoint are non-empty, Puppeteer validates browserWSEndpoint || browserURL - Add _sanitizeEndpointForLog() to both plugins to strip credentials from URLs before including them in error messages Co-Authored-By: Claude Opus 4.6 --- .../src/playwright/playwright-plugin.ts | 50 ++++++++++++++++--- .../src/puppeteer/puppeteer-plugin.ts | 34 +++++++++++-- 2 files changed, 74 insertions(+), 10 deletions(-) diff --git a/packages/browser-pool/src/playwright/playwright-plugin.ts b/packages/browser-pool/src/playwright/playwright-plugin.ts index c745e7784537..cabcc7352fd7 100644 --- a/packages/browser-pool/src/playwright/playwright-plugin.ts +++ b/packages/browser-pool/src/playwright/playwright-plugin.ts @@ -3,6 +3,7 @@ import fs from 'node:fs'; import type { Browser as PlaywrightBrowser, BrowserType, ConnectOverCDPOptions, ConnectOptions } from 'playwright'; import { + BrowserLaunchError, BrowserPlugin, type BrowserPluginOptions, type CreateLaunchContextOptions, @@ -20,8 +21,8 @@ import { PlaywrightController } from './playwright-controller.js'; * Mirrors `browserType.connectOverCDP(endpointURL, options?)`. */ export interface PlaywrightConnectOverCDPOptions extends ConnectOverCDPOptions { - /** The CDP endpoint URL to connect to (required). */ - wsEndpoint: string; + /** The CDP endpoint URL to connect to (required). Overrides the deprecated optional `endpointURL` from Playwright. */ + endpointURL: string; } /** @@ -56,6 +57,14 @@ export class PlaywrightPlugin extends BrowserPlugin< throw new Error("Cannot set both 'connectOptions' and 'connectOverCDPOptions' — pick one protocol."); } + if (connectOverCDPOptions && !connectOverCDPOptions.endpointURL) { + throw new Error("'connectOverCDPOptions.endpointURL' must be a non-empty string."); + } + + if (connectOptions && !connectOptions.wsEndpoint) { + throw new Error("'connectOptions.wsEndpoint' must be a non-empty string."); + } + super(library, baseOptions); this.connectOptions = connectOptions; this.connectOverCDPOptions = connectOverCDPOptions; @@ -80,19 +89,48 @@ export class PlaywrightPlugin extends BrowserPlugin< }); } + private _sanitizeEndpointForLog(endpoint: string): string { + try { + const url = new URL(endpoint); + if (url.username || url.password) { + url.username = '***'; + url.password = '***'; + } + return url.toString(); + } catch { + return ''; + } + } + protected async _launch(launchContext: LaunchContext): Promise { // Remote CDP connection — skip all local launch/proxy logic if (this.connectOverCDPOptions) { + const { endpointURL, ...options } = this.connectOverCDPOptions; this.log.info('Connecting to remote browser via connectOverCDP.'); - const { wsEndpoint, ...options } = this.connectOverCDPOptions; - return this.library.connectOverCDP(wsEndpoint, options); + try { + return await this.library.connectOverCDP(endpointURL, options); + } catch (cause) { + throw new BrowserLaunchError( + `Failed to connect to remote browser via CDP at "${this._sanitizeEndpointForLog(endpointURL)}". ` + + 'Check that the endpoint is reachable and the browser is accepting CDP connections.\n\u200b', + { cause }, + ); + } } // Remote Playwright WebSocket connection — skip all local launch/proxy logic if (this.connectOptions) { - this.log.info('Connecting to remote browser via connect (Playwright WebSocket).'); const { wsEndpoint, ...options } = this.connectOptions; - return this.library.connect(wsEndpoint, options); + this.log.info('Connecting to remote browser via connect (Playwright WebSocket).'); + try { + return await this.library.connect(wsEndpoint, options); + } catch (cause) { + throw new BrowserLaunchError( + `Failed to connect to remote browser via WebSocket at "${this._sanitizeEndpointForLog(wsEndpoint)}". ` + + 'Check that the endpoint is reachable and the Playwright server is running.\n\u200b', + { cause }, + ); + } } const { launchOptions, useIncognitoPages, userDataDir, proxyUrl } = launchContext; diff --git a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts index 0e93a93e9f83..b0cf8a1aa03d 100644 --- a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts +++ b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts @@ -5,6 +5,7 @@ import type Puppeteer from 'puppeteer'; import type * as PuppeteerTypes from 'puppeteer'; import { + BrowserLaunchError, BrowserPlugin, type BrowserPluginOptions, type CreateLaunchContextOptions, @@ -37,6 +38,11 @@ export class PuppeteerPlugin extends BrowserPlugin< constructor(library: typeof Puppeteer, options: PuppeteerPluginOptions = {}) { const { connectOverCDPOptions, ...baseOptions } = options; + + if (connectOverCDPOptions && !connectOverCDPOptions.browserWSEndpoint && !connectOverCDPOptions.browserURL) { + throw new Error("connectOverCDPOptions must include either 'browserWSEndpoint' or 'browserURL'."); + } + super(library, baseOptions); this.connectOverCDPOptions = connectOverCDPOptions; @@ -67,6 +73,19 @@ export class PuppeteerPlugin extends BrowserPlugin< }); } + private _sanitizeEndpointForLog(endpoint: string): string { + try { + const url = new URL(endpoint); + if (url.username || url.password) { + url.username = '***'; + url.password = '***'; + } + return url.toString(); + } catch { + return ''; + } + } + protected async _launch( launchContext: LaunchContext< typeof Puppeteer, @@ -92,11 +111,18 @@ export class PuppeteerPlugin extends BrowserPlugin< if (this.connectOverCDPOptions) { // Remote CDP connection — skip local launch/proxy/headless logic - if (!this.connectOverCDPOptions.browserWSEndpoint && !this.connectOverCDPOptions.browserURL) { - throw new Error("connectOverCDPOptions must include either 'browserWSEndpoint' or 'browserURL'."); - } + const endpoint = this.connectOverCDPOptions.browserWSEndpoint || this.connectOverCDPOptions.browserURL!; this.log.info('Connecting to remote browser via connect (CDP).'); - browser = await this.library.connect(this.connectOverCDPOptions); + try { + browser = await this.library.connect(this.connectOverCDPOptions); + } catch (cause) { + const safeEndpoint = this._sanitizeEndpointForLog(endpoint); + throw new BrowserLaunchError( + `Failed to connect to remote browser via CDP at "${safeEndpoint}". ` + + 'Check that the endpoint is reachable and the browser is accepting CDP connections.\n\u200b', + { cause }, + ); + } } else { const { launchOptions, userDataDir, experimentalContainers } = launchContext; From 76b7d20d8cba6941ab77ba1d86d2dbe2d9486662 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 19 Mar 2026 11:14:14 +0100 Subject: [PATCH 08/14] fix: prevent resource leaks in PuppeteerPlugin remote browser connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Close BrowserContext on page close when useIncognitoPages is true. Previously contexts were only cleaned up when an anonymized proxy was active, causing context accumulation on remote browsers without proxy. - Clean up targetcreated listener on remote browser disconnect via browser.once('disconnected') handler to prevent listener leaks. - Guard anonymizeProxySugar call with proxyUrl check — skip the async call entirely when no proxy is configured (common for remote browsers). - Conditionally omit proxyServer from context options when no proxy is set, instead of passing { proxyServer: undefined }. Co-Authored-By: Claude Opus 4.6 --- .../src/puppeteer/puppeteer-plugin.ts | 39 ++++++++++++------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts index b0cf8a1aa03d..6d117f707021 100644 --- a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts +++ b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts @@ -180,7 +180,7 @@ export class PuppeteerPlugin extends BrowserPlugin< } } - browser.on('targetcreated', async (target: PuppeteerTypes.Target) => { + const targetCreatedHandler = async (target: PuppeteerTypes.Target) => { try { const page = await target.page(); @@ -193,7 +193,16 @@ export class PuppeteerPlugin extends BrowserPlugin< } catch (error: any) { this.log.exception(error, 'Failed to retrieve page from target.'); } - }); + }; + + browser.on('targetcreated', targetCreatedHandler); + + // Clean up the listener when a remote browser disconnects to prevent leaks + if (this.connectOverCDPOptions) { + browser.once('disconnected', () => { + browser.off('targetcreated', targetCreatedHandler); + }); + } const boundMethods = ( [ @@ -220,25 +229,25 @@ export class PuppeteerPlugin extends BrowserPlugin< let page: PuppeteerTypes.Page; if (useIncognitoPages) { - const [anonymizedProxyUrl, close] = await anonymizeProxySugar( - proxyUrl, - undefined, - undefined, - { ignoreProxyCertificate }, - ); + const [anonymizedProxyUrl, close] = proxyUrl + ? await anonymizeProxySugar(proxyUrl, undefined, undefined, { ignoreProxyCertificate }) + : ([undefined, noop] as const); try { - const context = (await (browser as any)[method]({ - proxyServer: anonymizedProxyUrl ?? proxyUrl, - })) as PuppeteerTypes.BrowserContext; + const proxyServer = anonymizedProxyUrl ?? proxyUrl; + const contextOptions = proxyServer ? { proxyServer } : {}; + const context = (await (browser as any)[method]( + contextOptions, + )) as PuppeteerTypes.BrowserContext; page = await context.newPage(...args); - if (anonymizedProxyUrl) { - page.on('close', async () => { + page.once('close', async () => { + if (anonymizedProxyUrl) { await close(); - }); - } + } + await context.close().catch(noop); + }); } catch (error) { await close(); From 01ada420f41180703c52be3875b7bb1687198500 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 19 Mar 2026 11:25:05 +0100 Subject: [PATCH 09/14] chore: add clarifying comments for remote launch path in base class Co-Authored-By: Claude Opus 4.6 --- packages/browser-pool/src/abstract-classes/browser-plugin.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/browser-pool/src/abstract-classes/browser-plugin.ts b/packages/browser-pool/src/abstract-classes/browser-plugin.ts index aede527bb847..3db226002b72 100644 --- a/packages/browser-pool/src/abstract-classes/browser-plugin.ts +++ b/packages/browser-pool/src/abstract-classes/browser-plugin.ts @@ -195,10 +195,12 @@ export abstract class BrowserPlugin< NewPageResult > = this.createLaunchContext(), ): Promise { + // launchOptions is only used by the local launch path below — remote connections ignore it. launchContext.launchOptions ??= {} as LibraryOptions; const { proxyUrl, launchOptions } = launchContext; + // TODO(Task 6): warn when proxyUrl is set on a remote connection — proxy is silently ignored. if (proxyUrl && !launchContext.isRemote) { await this._addProxyToLaunchOptions(launchContext); } From f7dc7c6301973444e13a9a6daaf53f54fa021747 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 19 Mar 2026 11:55:01 +0100 Subject: [PATCH 10/14] fix: clarify useIncognitoPages pattern and improve warning for WebSocket connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add comments in both plugin constructors explaining why options.useIncognitoPages is checked instead of this.useIncognitoPages (super() collapses undefined to false, losing the "not set" signal). - Strengthen warning for Playwright connectOptions (WebSocket) + useIncognitoPages: false — connect() returns a browser with no default context, which is more severe than just sharing cookies. Co-Authored-By: Claude Opus 4.6 --- .../browser-pool/src/playwright/playwright-plugin.ts | 12 ++++++++---- .../browser-pool/src/puppeteer/puppeteer-plugin.ts | 2 ++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/packages/browser-pool/src/playwright/playwright-plugin.ts b/packages/browser-pool/src/playwright/playwright-plugin.ts index cabcc7352fd7..d23e20f7a120 100644 --- a/packages/browser-pool/src/playwright/playwright-plugin.ts +++ b/packages/browser-pool/src/playwright/playwright-plugin.ts @@ -69,15 +69,19 @@ export class PlaywrightPlugin extends BrowserPlugin< this.connectOptions = connectOptions; this.connectOverCDPOptions = connectOverCDPOptions; + // We check options.useIncognitoPages (not this.useIncognitoPages) because super() collapses undefined to false. + // This preserves the distinction between "not set" (undefined → default to true) and "explicitly false". if (this.connectOptions || this.connectOverCDPOptions) { if (options.useIncognitoPages === undefined) { this.useIncognitoPages = true; this.log.info('Remote browser detected — defaulting useIncognitoPages to true for session isolation.'); } else if (options.useIncognitoPages === false) { - this.log.warning( - 'useIncognitoPages is set to false with a remote browser connection. ' + - 'Pages will share cookies and storage on the remote browser instance.', - ); + const message = this.connectOptions + ? 'useIncognitoPages is set to false with a remote WebSocket connection. ' + + 'This may cause errors because browserType.connect() returns a browser with no default context.' + : 'useIncognitoPages is set to false with a remote browser connection. ' + + 'Pages will share cookies and storage on the remote browser instance.'; + this.log.warning(message); } } } diff --git a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts index 6d117f707021..33b2105e2cf1 100644 --- a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts +++ b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts @@ -46,6 +46,8 @@ export class PuppeteerPlugin extends BrowserPlugin< super(library, baseOptions); this.connectOverCDPOptions = connectOverCDPOptions; + // We check options.useIncognitoPages (not this.useIncognitoPages) because super() collapses undefined to false. + // This preserves the distinction between "not set" (undefined → default to true) and "explicitly false". if (this.connectOverCDPOptions) { if (options.useIncognitoPages === undefined) { this.useIncognitoPages = true; From 0e3218b6676b8586b744740722026bfba2818f78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 19 Mar 2026 12:59:23 +0100 Subject: [PATCH 11/14] feat: add warnings for ignored options on remote browser connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove spurious launchOptions warning that always fired due to framework-injected defaults, and share log instances in launchers. PRD Task 6: Warnings for Ignored & Conflicting Options - proxyUrl + remote → warning in base BrowserPlugin.launch() - useChrome + remote → warning in launcher constructors - executablePath + remote → warning in launcher constructors - useIncognitoPages: false + remote → handled by Task 5 Co-Authored-By: Claude Opus 4.6 --- .../src/abstract-classes/browser-plugin.ts | 8 ++++++- .../src/internals/playwright-launcher.ts | 21 ++++++++++++++++++ .../src/internals/puppeteer-launcher.ts | 22 +++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/packages/browser-pool/src/abstract-classes/browser-plugin.ts b/packages/browser-pool/src/abstract-classes/browser-plugin.ts index 3db226002b72..b1889f010669 100644 --- a/packages/browser-pool/src/abstract-classes/browser-plugin.ts +++ b/packages/browser-pool/src/abstract-classes/browser-plugin.ts @@ -200,7 +200,13 @@ export abstract class BrowserPlugin< const { proxyUrl, launchOptions } = launchContext; - // TODO(Task 6): warn when proxyUrl is set on a remote connection — proxy is silently ignored. + if (proxyUrl && launchContext.isRemote) { + this.log.warning( + 'proxyUrl is set but will be ignored for remote browser connections. ' + + 'Configure proxy settings on the remote browser service instead.', + ); + } + if (proxyUrl && !launchContext.isRemote) { await this._addProxyToLaunchOptions(launchContext); } diff --git a/packages/playwright-crawler/src/internals/playwright-launcher.ts b/packages/playwright-crawler/src/internals/playwright-launcher.ts index 5c93468d63f3..e3d2e7f44c8c 100644 --- a/packages/playwright-crawler/src/internals/playwright-launcher.ts +++ b/packages/playwright-crawler/src/internals/playwright-launcher.ts @@ -2,6 +2,7 @@ import type { BrowserLaunchContext } from '@crawlee/browser'; import { BrowserLauncher, Configuration } from '@crawlee/browser'; import { PlaywrightPlugin } from '@crawlee/browser-pool'; import type { PlaywrightConnectOptions, PlaywrightConnectOverCDPOptions } from '@crawlee/browser-pool'; +import { serviceLocator } from '@crawlee/core'; import ow from 'ow'; import type { Browser, BrowserType, LaunchOptions } from 'playwright'; @@ -129,6 +130,26 @@ export class PlaywrightLauncher extends BrowserLauncher { ); this.Plugin = PlaywrightPlugin; + + const connectOptionsPresent = !!(launchContext.connectOptions || launchContext.connectOverCDPOptions); + + if (connectOptionsPresent && (launchContext.useChrome || launchContext.launchOptions?.executablePath)) { + const log = serviceLocator.getLogger().child({ prefix: 'PlaywrightLauncher' }); + + if (launchContext.useChrome) { + log.warning( + 'useChrome is set but will be ignored for remote browser connections. ' + + 'The remote service controls which browser binary is used.', + ); + } + + if (launchContext.launchOptions?.executablePath) { + log.warning( + 'executablePath is set but will be ignored for remote browser connections. ' + + 'The remote service controls which browser binary is used.', + ); + } + } } } diff --git a/packages/puppeteer-crawler/src/internals/puppeteer-launcher.ts b/packages/puppeteer-crawler/src/internals/puppeteer-launcher.ts index 5e8333083e32..4113ea0d90bc 100644 --- a/packages/puppeteer-crawler/src/internals/puppeteer-launcher.ts +++ b/packages/puppeteer-crawler/src/internals/puppeteer-launcher.ts @@ -2,6 +2,7 @@ import type { BrowserLaunchContext } from '@crawlee/browser'; import { BrowserLauncher, Configuration } from '@crawlee/browser'; import { PuppeteerPlugin } from '@crawlee/browser-pool'; import type { PuppeteerConnectOverCDPOptions } from '@crawlee/browser-pool'; +import { serviceLocator } from '@crawlee/core'; import ow from 'ow'; import type { Browser } from 'puppeteer'; @@ -108,6 +109,27 @@ export class PuppeteerLauncher extends BrowserLauncher ); this.Plugin = PuppeteerPlugin; + + if ( + launchContext.connectOverCDPOptions && + (launchContext.useChrome || (launchContext.launchOptions as Record)?.executablePath) + ) { + const log = serviceLocator.getLogger().child({ prefix: 'PuppeteerLauncher' }); + + if (launchContext.useChrome) { + log.warning( + 'useChrome is set but will be ignored for remote browser connections. ' + + 'The remote service controls which browser binary is used.', + ); + } + + if ((launchContext.launchOptions as Record)?.executablePath) { + log.warning( + 'executablePath is set but will be ignored for remote browser connections. ' + + 'The remote service controls which browser binary is used.', + ); + } + } } protected override _getDefaultHeadlessOption(): boolean { From a11370a495e34df673d380535a8c231e1eff7d23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 19 Mar 2026 13:27:37 +0100 Subject: [PATCH 12/14] test: add unit tests for remote browser connections PRD Task 7: Unit Tests - Connection routing (Playwright CDP/WS/local, Puppeteer CDP/local) - Validation (mutual exclusion, missing endpoints) - isRemote correctness for all plugin variants - Proxy/webdriver skipping for remote, applied for local - useIncognitoPages defaults (true for remote, false for local) - Warnings (proxyUrl, useIncognitoPages: false, CDP vs WS variants) - 40 tests, all mocked (no real browser instances) Co-Authored-By: Claude Opus 4.6 --- .../browser-pool/test/remote-browser.test.ts | 624 ++++++++++++++++++ 1 file changed, 624 insertions(+) create mode 100644 packages/browser-pool/test/remote-browser.test.ts diff --git a/packages/browser-pool/test/remote-browser.test.ts b/packages/browser-pool/test/remote-browser.test.ts new file mode 100644 index 000000000000..a4e32212f9f2 --- /dev/null +++ b/packages/browser-pool/test/remote-browser.test.ts @@ -0,0 +1,624 @@ +import { vi } from 'vitest'; + +import { serviceLocator } from '@crawlee/core'; +import type { CrawleeLogger } from '@crawlee/core'; + +import { PlaywrightPlugin } from '../src/playwright/playwright-plugin.js'; +import { PuppeteerPlugin } from '../src/puppeteer/puppeteer-plugin.js'; + +// --------------------------------------------------------------------------- +// Shared mock helpers +// --------------------------------------------------------------------------- + +function createMockBrowser() { + return { + newPage: vi.fn().mockResolvedValue({ close: vi.fn(), url: vi.fn(() => 'about:blank') }), + close: vi.fn().mockResolvedValue(undefined), + contexts: vi.fn(() => []), + on: vi.fn(), + off: vi.fn(), + once: vi.fn(), + version: vi.fn(() => '120.0.0'), + pages: vi.fn(() => []), + process: vi.fn(() => null), + userAgent: vi.fn().mockResolvedValue('mock-ua'), + createBrowserContext: vi.fn(), + createIncognitoBrowserContext: vi.fn(), + }; +} + +function createMockPlaywrightLibrary(browser = createMockBrowser()) { + const mockContext = { + ...browser, + once: vi.fn(), + on: vi.fn(), + }; + return { + launch: vi.fn().mockResolvedValue(browser), + connect: vi.fn().mockResolvedValue(browser), + connectOverCDP: vi.fn().mockResolvedValue(browser), + name: vi.fn(() => 'chromium'), + launchPersistentContext: vi.fn().mockResolvedValue(mockContext), + }; +} + +function createMockPuppeteerLibrary(browser = createMockBrowser()) { + return { + launch: vi.fn().mockResolvedValue(browser), + connect: vi.fn().mockResolvedValue(browser), + product: 'chrome', + }; +} + +function createMockLogger(): CrawleeLogger & { warning: ReturnType; info: ReturnType } { + const mockLogger: any = { + getOptions: vi.fn(() => ({})), + setOptions: vi.fn(), + child: vi.fn(() => mockLogger), + error: vi.fn(), + exception: vi.fn(), + softFail: vi.fn(), + warning: vi.fn(), + warningOnce: vi.fn(), + info: vi.fn(), + debug: vi.fn(), + perf: vi.fn(), + deprecated: vi.fn(), + log: vi.fn(), + setLevel: vi.fn(), + getLevel: vi.fn(), + }; + return mockLogger; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('Remote browser — PlaywrightPlugin', () => { + let mockLogger: ReturnType; + + beforeEach(() => { + mockLogger = createMockLogger(); + serviceLocator.setLogger(mockLogger); + }); + + // --- Connection routing --------------------------------------------------- + + describe('connection routing', () => { + test('connectOverCDPOptions → calls connectOverCDP, not launch', async () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any, { + connectOverCDPOptions: { endpointURL: 'http://remote:9222' }, + }); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(lib.connectOverCDP).toHaveBeenCalledTimes(1); + expect(lib.connectOverCDP).toHaveBeenCalledWith('http://remote:9222', {}); + expect(lib.launch).not.toHaveBeenCalled(); + expect(lib.connect).not.toHaveBeenCalled(); + }); + + test('connectOptions → calls connect, not launch', async () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any, { + connectOptions: { wsEndpoint: 'ws://remote:3000' }, + }); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(lib.connect).toHaveBeenCalledTimes(1); + expect(lib.connect).toHaveBeenCalledWith('ws://remote:3000', {}); + expect(lib.launch).not.toHaveBeenCalled(); + expect(lib.connectOverCDP).not.toHaveBeenCalled(); + }); + + test('no connect options → calls launch', async () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(lib.launch).toHaveBeenCalledTimes(1); + expect(lib.connect).not.toHaveBeenCalled(); + expect(lib.connectOverCDP).not.toHaveBeenCalled(); + }); + + test('passes extra options through to connectOverCDP', async () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any, { + connectOverCDPOptions: { + endpointURL: 'http://remote:9222', + timeout: 5000, + headers: { 'x-token': 'abc' }, + }, + }); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(lib.connectOverCDP).toHaveBeenCalledWith('http://remote:9222', { + timeout: 5000, + headers: { 'x-token': 'abc' }, + }); + }); + + test('passes extra options through to connect', async () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any, { + connectOptions: { + wsEndpoint: 'ws://remote:3000', + timeout: 3000, + headers: { Authorization: 'Bearer xyz' }, + }, + }); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(lib.connect).toHaveBeenCalledWith('ws://remote:3000', { + timeout: 3000, + headers: { Authorization: 'Bearer xyz' }, + }); + }); + }); + + // --- Validation ----------------------------------------------------------- + + describe('validation', () => { + test('throws when both connectOptions and connectOverCDPOptions are set', () => { + const lib = createMockPlaywrightLibrary(); + + expect( + () => + new PlaywrightPlugin(lib as any, { + connectOptions: { wsEndpoint: 'ws://remote:3000' }, + connectOverCDPOptions: { endpointURL: 'http://remote:9222' }, + }), + ).toThrow("Cannot set both 'connectOptions' and 'connectOverCDPOptions'"); + }); + + test('throws when connectOverCDPOptions has no endpointURL', () => { + const lib = createMockPlaywrightLibrary(); + + expect( + () => + new PlaywrightPlugin(lib as any, { + connectOverCDPOptions: { endpointURL: '' }, + }), + ).toThrow("'connectOverCDPOptions.endpointURL' must be a non-empty string"); + }); + + test('throws when connectOptions has no wsEndpoint', () => { + const lib = createMockPlaywrightLibrary(); + + expect( + () => + new PlaywrightPlugin(lib as any, { + connectOptions: { wsEndpoint: '' }, + }), + ).toThrow("'connectOptions.wsEndpoint' must be a non-empty string"); + }); + }); + + // --- isRemote correctness ------------------------------------------------- + + describe('isRemote', () => { + test('true when connectOverCDPOptions is present', () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any, { + connectOverCDPOptions: { endpointURL: 'http://remote:9222' }, + }); + + const ctx = plugin.createLaunchContext(); + expect(ctx.isRemote).toBe(true); + }); + + test('true when connectOptions is present', () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any, { + connectOptions: { wsEndpoint: 'ws://remote:3000' }, + }); + + const ctx = plugin.createLaunchContext(); + expect(ctx.isRemote).toBe(true); + }); + + test('false when no connect options', () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any); + + const ctx = plugin.createLaunchContext(); + expect(ctx.isRemote).toBe(false); + }); + }); + + // --- Proxy/webdriver skipping --------------------------------------------- + + describe('proxy/webdriver skipping for remote', () => { + test('proxy is not applied for remote connections', async () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any, { + connectOverCDPOptions: { endpointURL: 'http://remote:9222' }, + proxyUrl: 'http://user:pass@proxy:8080', + }); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + // The browser was connected via CDP, not launched — proxy is not set on launchOptions + expect(lib.connectOverCDP).toHaveBeenCalledTimes(1); + expect(lib.launch).not.toHaveBeenCalled(); + }); + + test('webdriver hiding args are not added for remote connections', async () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any, { + connectOverCDPOptions: { endpointURL: 'http://remote:9222' }, + launchOptions: { args: ['--custom-flag'] }, + }); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + // The original args should be untouched — no webdriver stealth flag injected + expect(ctx.launchOptions?.args).toEqual(['--custom-flag']); + expect(ctx.launchOptions?.args).not.toContain('--disable-blink-features=AutomationControlled'); + }); + + test('webdriver hiding args ARE added for local chromium connections', async () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any, { + launchOptions: { args: ['--custom-flag'] }, + }); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(ctx.launchOptions?.args).toContain('--disable-blink-features=AutomationControlled'); + expect(ctx.launchOptions?.args).toContain('--custom-flag'); + }); + + test('proxy is applied for local connections', async () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any, { + proxyUrl: 'http://user:pass@proxy:8080', + }); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(lib.launch).toHaveBeenCalledTimes(1); + // Launch options should have proxy configured + const launchOpts = lib.launch.mock.calls[0][0]; + expect(launchOpts.proxy).toBeDefined(); + expect(launchOpts.proxy.server).toBeDefined(); + }); + }); + + // --- useIncognitoPages default -------------------------------------------- + + describe('useIncognitoPages default', () => { + test('defaults to true for remote (connectOverCDP)', () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any, { + connectOverCDPOptions: { endpointURL: 'http://remote:9222' }, + }); + + expect(plugin.useIncognitoPages).toBe(true); + }); + + test('defaults to true for remote (connect)', () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any, { + connectOptions: { wsEndpoint: 'ws://remote:3000' }, + }); + + expect(plugin.useIncognitoPages).toBe(true); + }); + + test('explicit false preserved for remote', () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any, { + connectOverCDPOptions: { endpointURL: 'http://remote:9222' }, + useIncognitoPages: false, + }); + + expect(plugin.useIncognitoPages).toBe(false); + }); + + test('explicit true preserved for remote', () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any, { + connectOverCDPOptions: { endpointURL: 'http://remote:9222' }, + useIncognitoPages: true, + }); + + expect(plugin.useIncognitoPages).toBe(true); + }); + + test('default false for local', () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any); + + expect(plugin.useIncognitoPages).toBe(false); + }); + }); + + // --- Warnings ------------------------------------------------------------- + + describe('warnings', () => { + test('proxyUrl + remote → warning logged', async () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any, { + connectOverCDPOptions: { endpointURL: 'http://remote:9222' }, + proxyUrl: 'http://user:pass@proxy:8080', + }); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(mockLogger.warning).toHaveBeenCalledWith( + expect.stringContaining('proxyUrl is set but will be ignored'), + ); + }); + + test('useIncognitoPages: false + remote CDP → warning about shared state', () => { + const lib = createMockPlaywrightLibrary(); + new PlaywrightPlugin(lib as any, { + connectOverCDPOptions: { endpointURL: 'http://remote:9222' }, + useIncognitoPages: false, + }); + + expect(mockLogger.warning).toHaveBeenCalledWith( + expect.stringContaining('Pages will share cookies and storage'), + ); + }); + + test('useIncognitoPages: false + remote WebSocket → warning about no default context', () => { + const lib = createMockPlaywrightLibrary(); + new PlaywrightPlugin(lib as any, { + connectOptions: { wsEndpoint: 'ws://remote:3000' }, + useIncognitoPages: false, + }); + + expect(mockLogger.warning).toHaveBeenCalledWith( + expect.stringContaining('browserType.connect() returns a browser with no default context'), + ); + }); + + test('no warnings for local browser usage', async () => { + const lib = createMockPlaywrightLibrary(); + const plugin = new PlaywrightPlugin(lib as any); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(mockLogger.warning).not.toHaveBeenCalled(); + }); + }); +}); + +describe('Remote browser — PuppeteerPlugin', () => { + let mockLogger: ReturnType; + + beforeEach(() => { + mockLogger = createMockLogger(); + serviceLocator.setLogger(mockLogger); + }); + + // --- Connection routing --------------------------------------------------- + + describe('connection routing', () => { + test('connectOverCDPOptions → calls connect, not launch', async () => { + const lib = createMockPuppeteerLibrary(); + const plugin = new PuppeteerPlugin(lib as any, { + connectOverCDPOptions: { browserWSEndpoint: 'ws://remote:9222' }, + }); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(lib.connect).toHaveBeenCalledTimes(1); + expect(lib.connect).toHaveBeenCalledWith({ browserWSEndpoint: 'ws://remote:9222' }); + expect(lib.launch).not.toHaveBeenCalled(); + }); + + test('no connect options → calls launch', async () => { + const lib = createMockPuppeteerLibrary(); + const plugin = new PuppeteerPlugin(lib as any); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(lib.launch).toHaveBeenCalledTimes(1); + expect(lib.connect).not.toHaveBeenCalled(); + }); + + test('passes all connect options through to connect', async () => { + const lib = createMockPuppeteerLibrary(); + const plugin = new PuppeteerPlugin(lib as any, { + connectOverCDPOptions: { + browserWSEndpoint: 'ws://remote:9222', + defaultViewport: { width: 800, height: 600 }, + }, + }); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(lib.connect).toHaveBeenCalledWith({ + browserWSEndpoint: 'ws://remote:9222', + defaultViewport: { width: 800, height: 600 }, + }); + }); + }); + + // --- Validation ----------------------------------------------------------- + + describe('validation', () => { + test('throws when connectOverCDPOptions has no browserWSEndpoint or browserURL', () => { + const lib = createMockPuppeteerLibrary(); + + expect( + () => + new PuppeteerPlugin(lib as any, { + connectOverCDPOptions: {} as any, + }), + ).toThrow("connectOverCDPOptions must include either 'browserWSEndpoint' or 'browserURL'"); + }); + }); + + // --- isRemote correctness ------------------------------------------------- + + describe('isRemote', () => { + test('true when connectOverCDPOptions is present', () => { + const lib = createMockPuppeteerLibrary(); + const plugin = new PuppeteerPlugin(lib as any, { + connectOverCDPOptions: { browserWSEndpoint: 'ws://remote:9222' }, + }); + + const ctx = plugin.createLaunchContext(); + expect(ctx.isRemote).toBe(true); + }); + + test('false when no connect options', () => { + const lib = createMockPuppeteerLibrary(); + const plugin = new PuppeteerPlugin(lib as any); + + const ctx = plugin.createLaunchContext(); + expect(ctx.isRemote).toBe(false); + }); + }); + + // --- Proxy/webdriver skipping --------------------------------------------- + + describe('proxy/webdriver skipping for remote', () => { + test('proxy is not applied for remote connections', async () => { + const lib = createMockPuppeteerLibrary(); + const plugin = new PuppeteerPlugin(lib as any, { + connectOverCDPOptions: { browserWSEndpoint: 'ws://remote:9222' }, + proxyUrl: 'http://user:pass@proxy:8080', + }); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(lib.connect).toHaveBeenCalledTimes(1); + expect(lib.launch).not.toHaveBeenCalled(); + }); + + test('webdriver hiding args are not added for remote connections', async () => { + const lib = createMockPuppeteerLibrary(); + const plugin = new PuppeteerPlugin(lib as any, { + connectOverCDPOptions: { browserWSEndpoint: 'ws://remote:9222' }, + launchOptions: { args: ['--custom-flag'] }, + }); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + // The original args should be untouched — no webdriver stealth flag injected + expect(ctx.launchOptions?.args).toEqual(['--custom-flag']); + expect(ctx.launchOptions?.args).not.toContain('--disable-blink-features=AutomationControlled'); + }); + + test('webdriver hiding args ARE added for local connections', async () => { + const lib = createMockPuppeteerLibrary(); + const plugin = new PuppeteerPlugin(lib as any, { + launchOptions: { args: ['--custom-flag'] }, + }); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(ctx.launchOptions?.args).toContain('--disable-blink-features=AutomationControlled'); + expect(ctx.launchOptions?.args).toContain('--custom-flag'); + }); + }); + + // --- useIncognitoPages default -------------------------------------------- + + describe('useIncognitoPages default', () => { + test('defaults to true for remote', () => { + const lib = createMockPuppeteerLibrary(); + const plugin = new PuppeteerPlugin(lib as any, { + connectOverCDPOptions: { browserWSEndpoint: 'ws://remote:9222' }, + }); + + expect(plugin.useIncognitoPages).toBe(true); + }); + + test('explicit false preserved for remote', () => { + const lib = createMockPuppeteerLibrary(); + const plugin = new PuppeteerPlugin(lib as any, { + connectOverCDPOptions: { browserWSEndpoint: 'ws://remote:9222' }, + useIncognitoPages: false, + }); + + expect(plugin.useIncognitoPages).toBe(false); + }); + + test('explicit true preserved for remote', () => { + const lib = createMockPuppeteerLibrary(); + const plugin = new PuppeteerPlugin(lib as any, { + connectOverCDPOptions: { browserWSEndpoint: 'ws://remote:9222' }, + useIncognitoPages: true, + }); + + expect(plugin.useIncognitoPages).toBe(true); + }); + + test('default false for local', () => { + const lib = createMockPuppeteerLibrary(); + const plugin = new PuppeteerPlugin(lib as any); + + expect(plugin.useIncognitoPages).toBe(false); + }); + }); + + // --- Warnings ------------------------------------------------------------- + + describe('warnings', () => { + test('proxyUrl + remote → warning logged', async () => { + const lib = createMockPuppeteerLibrary(); + const plugin = new PuppeteerPlugin(lib as any, { + connectOverCDPOptions: { browserWSEndpoint: 'ws://remote:9222' }, + proxyUrl: 'http://user:pass@proxy:8080', + }); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(mockLogger.warning).toHaveBeenCalledWith( + expect.stringContaining('proxyUrl is set but will be ignored'), + ); + }); + + test('useIncognitoPages: false + remote → warning logged', () => { + const lib = createMockPuppeteerLibrary(); + new PuppeteerPlugin(lib as any, { + connectOverCDPOptions: { browserWSEndpoint: 'ws://remote:9222' }, + useIncognitoPages: false, + }); + + expect(mockLogger.warning).toHaveBeenCalledWith( + expect.stringContaining('useIncognitoPages is set to false'), + ); + }); + + test('no warnings for local browser usage', async () => { + const lib = createMockPuppeteerLibrary(); + const plugin = new PuppeteerPlugin(lib as any); + + const ctx = plugin.createLaunchContext(); + await plugin.launch(ctx); + + expect(mockLogger.warning).not.toHaveBeenCalled(); + }); + }); +}); From 2740bf4cc288460e27684ad807be4b44f4f02829 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 19 Mar 2026 14:08:02 +0100 Subject: [PATCH 13/14] fix: prevent proxy URL from leaking into remote Puppeteer browser contexts When useIncognitoPages is true (default for remote) and proxyUrl is set, the newPage handler was passing proxyServer to createBrowserContext even for remote connections. For credentialed proxies this also spun up a localhost tunnel unreachable by the remote browser. Co-Authored-By: Claude Opus 4.6 --- .../src/puppeteer/puppeteer-plugin.ts | 10 ++-- .../browser-pool/test/remote-browser.test.ts | 47 +++++++++++++++++-- 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts index 33b2105e2cf1..f325f89cc234 100644 --- a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts +++ b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts @@ -231,12 +231,16 @@ export class PuppeteerPlugin extends BrowserPlugin< let page: PuppeteerTypes.Page; if (useIncognitoPages) { - const [anonymizedProxyUrl, close] = proxyUrl - ? await anonymizeProxySugar(proxyUrl, undefined, undefined, { ignoreProxyCertificate }) + // Skip proxy setup for remote connections — proxy is managed by the remote service. + const effectiveProxyUrl = this.connectOverCDPOptions ? undefined : proxyUrl; + const [anonymizedProxyUrl, close] = effectiveProxyUrl + ? await anonymizeProxySugar(effectiveProxyUrl, undefined, undefined, { + ignoreProxyCertificate, + }) : ([undefined, noop] as const); try { - const proxyServer = anonymizedProxyUrl ?? proxyUrl; + const proxyServer = anonymizedProxyUrl ?? effectiveProxyUrl; const contextOptions = proxyServer ? { proxyServer } : {}; const context = (await (browser as any)[method]( contextOptions, diff --git a/packages/browser-pool/test/remote-browser.test.ts b/packages/browser-pool/test/remote-browser.test.ts index a4e32212f9f2..c9ce63ba8d2d 100644 --- a/packages/browser-pool/test/remote-browser.test.ts +++ b/packages/browser-pool/test/remote-browser.test.ts @@ -10,9 +10,30 @@ import { PuppeteerPlugin } from '../src/puppeteer/puppeteer-plugin.js'; // Shared mock helpers // --------------------------------------------------------------------------- +function createMockPage() { + return { + close: vi.fn().mockResolvedValue(undefined), + url: vi.fn(() => 'about:blank'), + on: vi.fn(), + once: vi.fn(), + }; +} + +function createMockBrowserContext() { + const page = createMockPage(); + return { + newPage: vi.fn().mockResolvedValue(page), + close: vi.fn().mockResolvedValue(undefined), + on: vi.fn(), + once: vi.fn(), + _mockPage: page, + }; +} + function createMockBrowser() { + const mockContext = createMockBrowserContext(); return { - newPage: vi.fn().mockResolvedValue({ close: vi.fn(), url: vi.fn(() => 'about:blank') }), + newPage: vi.fn().mockResolvedValue(createMockPage()), close: vi.fn().mockResolvedValue(undefined), contexts: vi.fn(() => []), on: vi.fn(), @@ -22,8 +43,9 @@ function createMockBrowser() { pages: vi.fn(() => []), process: vi.fn(() => null), userAgent: vi.fn().mockResolvedValue('mock-ua'), - createBrowserContext: vi.fn(), - createIncognitoBrowserContext: vi.fn(), + createBrowserContext: vi.fn().mockResolvedValue(mockContext), + createIncognitoBrowserContext: vi.fn().mockResolvedValue(mockContext), + _mockContext: mockContext, }; } @@ -512,6 +534,25 @@ describe('Remote browser — PuppeteerPlugin', () => { expect(lib.launch).not.toHaveBeenCalled(); }); + test('proxy is not leaked into createBrowserContext for remote newPage', async () => { + const browser = createMockBrowser(); + const lib = createMockPuppeteerLibrary(browser); + const plugin = new PuppeteerPlugin(lib as any, { + connectOverCDPOptions: { browserWSEndpoint: 'ws://remote:9222' }, + proxyUrl: 'http://user:pass@proxy:8080', + }); + + const ctx = plugin.createLaunchContext(); + const wrappedBrowser = await plugin.launch(ctx); + + // Call newPage on the wrapped browser — useIncognitoPages defaults to true for remote + await (wrappedBrowser as any).newPage(); + + // createBrowserContext should be called with empty options (no proxyServer) + expect(browser.createBrowserContext).toHaveBeenCalledTimes(1); + expect(browser.createBrowserContext).toHaveBeenCalledWith({}); + }); + test('webdriver hiding args are not added for remote connections', async () => { const lib = createMockPuppeteerLibrary(); const plugin = new PuppeteerPlugin(lib as any, { From fb73726b9070fa18f6f858cdeb392a9e0c3460d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Tue, 31 Mar 2026 13:41:33 +0200 Subject: [PATCH 14/14] docs(examples): add remote browser integration examples Examples for Browserbase, Browserless, Rebrowser, and Steel using Playwright and Puppeteer. --- temp-examples/.env.example | 9 +++ temp-examples/.gitignore | 1 + .../examples/browserbase-playwright-ws.ts | 55 +++++++++++++++++++ .../examples/browserbase-playwright.ts | 53 ++++++++++++++++++ .../examples/browserbase-puppeteer.ts | 53 ++++++++++++++++++ .../examples/browserless-playwright-ws.ts | 29 ++++++++++ .../examples/browserless-playwright.ts | 29 ++++++++++ .../examples/browserless-puppeteer.ts | 29 ++++++++++ .../examples/rebrowser-playwright-ws.ts | 50 +++++++++++++++++ .../examples/rebrowser-playwright.ts | 31 +++++++++++ temp-examples/examples/rebrowser-puppeteer.ts | 31 +++++++++++ temp-examples/examples/steel-playwright-ws.ts | 51 +++++++++++++++++ temp-examples/examples/steel-playwright.ts | 26 +++++++++ temp-examples/examples/steel-puppeteer.ts | 26 +++++++++ temp-examples/package.json | 38 +++++++++++++ temp-examples/readme.md | 12 ++++ temp-examples/tsconfig.json | 9 +++ 17 files changed, 532 insertions(+) create mode 100644 temp-examples/.env.example create mode 100644 temp-examples/.gitignore create mode 100644 temp-examples/examples/browserbase-playwright-ws.ts create mode 100644 temp-examples/examples/browserbase-playwright.ts create mode 100644 temp-examples/examples/browserbase-puppeteer.ts create mode 100644 temp-examples/examples/browserless-playwright-ws.ts create mode 100644 temp-examples/examples/browserless-playwright.ts create mode 100644 temp-examples/examples/browserless-puppeteer.ts create mode 100644 temp-examples/examples/rebrowser-playwright-ws.ts create mode 100644 temp-examples/examples/rebrowser-playwright.ts create mode 100644 temp-examples/examples/rebrowser-puppeteer.ts create mode 100644 temp-examples/examples/steel-playwright-ws.ts create mode 100644 temp-examples/examples/steel-playwright.ts create mode 100644 temp-examples/examples/steel-puppeteer.ts create mode 100644 temp-examples/package.json create mode 100644 temp-examples/readme.md create mode 100644 temp-examples/tsconfig.json diff --git a/temp-examples/.env.example b/temp-examples/.env.example new file mode 100644 index 000000000000..500f5da5f2ce --- /dev/null +++ b/temp-examples/.env.example @@ -0,0 +1,9 @@ +BROWSERBASE_API_KEY= +BROWSERBASE_PROJECT_ID= +# +BROWSERLESS_TOKEN= +# +REBROWSER_API_KEY= +REBROWSER_PROFILE_ID= +# +STEEL_API_KEY= diff --git a/temp-examples/.gitignore b/temp-examples/.gitignore new file mode 100644 index 000000000000..4c49bd78f1d0 --- /dev/null +++ b/temp-examples/.gitignore @@ -0,0 +1 @@ +.env diff --git a/temp-examples/examples/browserbase-playwright-ws.ts b/temp-examples/examples/browserbase-playwright-ws.ts new file mode 100644 index 000000000000..656d2cf3a0f0 --- /dev/null +++ b/temp-examples/examples/browserbase-playwright-ws.ts @@ -0,0 +1,55 @@ +import 'dotenv/config'; + +import { PlaywrightCrawler } from 'crawlee'; + +// Browserbase requires two env variables: +// - BROWSERBASE_API_KEY: Your API key for authentication +// - BROWSERBASE_PROJECT_ID: The project to create sessions in +const apiKey = process.env.BROWSERBASE_API_KEY; +const projectId = process.env.BROWSERBASE_PROJECT_ID; + +if (!apiKey) { + throw new Error('BROWSERBASE_API_KEY env variable is required'); +} + +if (!projectId) { + throw new Error('BROWSERBASE_PROJECT_ID env variable is required'); +} + +// Step 1: Create a Browserbase session via REST API. +// This returns a session ID that we use to construct the WebSocket URL. +// You have 5 minutes to connect before the session terminates. +const response = await fetch('https://api.browserbase.com/v1/sessions', { + method: 'POST', + headers: { + 'x-bb-api-key': apiKey, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ projectId }), +}); + +if (!response.ok) { + throw new Error(`Failed to create Browserbase session: ${response.status} ${response.statusText}`); +} + +const session = await response.json(); +console.log(`Created Browserbase session: ${session.id}`); + +// Step 2: Connect to the session using Playwright's WebSocket connection. +// The WS URL is constructed from the API key and session ID. +const wsUrl = `wss://connect.browserbase.com?apiKey=${apiKey}&sessionId=${session.id}`; + +const crawler = new PlaywrightCrawler({ + launchContext: { + connectOptions: { + wsEndpoint: wsUrl, + }, + }, + async requestHandler({ page, request }) { + const title = await page.title(); + console.log(`[${request.loadedUrl}] ${title}`); + }, + maxRequestsPerCrawl: 1, +}); + +await crawler.run(['https://example.com']); diff --git a/temp-examples/examples/browserbase-playwright.ts b/temp-examples/examples/browserbase-playwright.ts new file mode 100644 index 000000000000..78ce8ca5569e --- /dev/null +++ b/temp-examples/examples/browserbase-playwright.ts @@ -0,0 +1,53 @@ +import 'dotenv/config'; + +import { PlaywrightCrawler } from 'crawlee'; + +// Browserbase requires two env variables: +// - BROWSERBASE_API_KEY: Your API key for authentication +// - BROWSERBASE_PROJECT_ID: The project to create sessions in +const apiKey = process.env.BROWSERBASE_API_KEY; +const projectId = process.env.BROWSERBASE_PROJECT_ID; + +if (!apiKey) { + throw new Error('BROWSERBASE_API_KEY env variable is required'); +} + +if (!projectId) { + throw new Error('BROWSERBASE_PROJECT_ID env variable is required'); +} + +// Step 1: Create a Browserbase session via REST API. +// This returns a connectUrl that we can use with Playwright's connectOverCDP. +// You have 5 minutes to connect before the session terminates. +const response = await fetch('https://api.browserbase.com/v1/sessions', { + method: 'POST', + headers: { + 'x-bb-api-key': apiKey, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ projectId }), +}); + +if (!response.ok) { + throw new Error(`Failed to create Browserbase session: ${response.status} ${response.statusText}`); +} + +const session = await response.json(); +console.log(`Created Browserbase session: ${session.id}`); + +// Step 2: Connect to the session using Playwright's CDP connection. +// The connectUrl from the session response is used as the CDP endpoint. +const crawler = new PlaywrightCrawler({ + launchContext: { + connectOverCDPOptions: { + endpointURL: session.connectUrl, + }, + }, + async requestHandler({ page, request }) { + const title = await page.title(); + console.log(`[${request.loadedUrl}] ${title}`); + }, + maxRequestsPerCrawl: 1, +}); + +await crawler.run(['https://example.com']); diff --git a/temp-examples/examples/browserbase-puppeteer.ts b/temp-examples/examples/browserbase-puppeteer.ts new file mode 100644 index 000000000000..f6dcce121965 --- /dev/null +++ b/temp-examples/examples/browserbase-puppeteer.ts @@ -0,0 +1,53 @@ +import 'dotenv/config'; + +import { PuppeteerCrawler } from 'crawlee'; + +// Browserbase requires two env variables: +// - BROWSERBASE_API_KEY: Your API key for authentication +// - BROWSERBASE_PROJECT_ID: The project to create sessions in +const apiKey = process.env.BROWSERBASE_API_KEY; +const projectId = process.env.BROWSERBASE_PROJECT_ID; + +if (!apiKey) { + throw new Error('BROWSERBASE_API_KEY env variable is required'); +} + +if (!projectId) { + throw new Error('BROWSERBASE_PROJECT_ID env variable is required'); +} + +// Step 1: Create a Browserbase session via REST API. +// This returns a connectUrl that we can use with Puppeteer's CDP connection. +// You have 5 minutes to connect before the session terminates. +const response = await fetch('https://api.browserbase.com/v1/sessions', { + method: 'POST', + headers: { + 'x-bb-api-key': apiKey, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ projectId }), +}); + +if (!response.ok) { + throw new Error(`Failed to create Browserbase session: ${response.status} ${response.statusText}`); +} + +const session = await response.json(); +console.log(`Created Browserbase session: ${session.id}`); + +// Step 2: Connect to the session using Puppeteer's CDP connection. +// The connectUrl from the session response is used as the browserWSEndpoint. +const crawler = new PuppeteerCrawler({ + launchContext: { + connectOverCDPOptions: { + browserWSEndpoint: session.connectUrl, + }, + }, + async requestHandler({ page, request }) { + const title = await page.title(); + console.log(`[${request.loadedUrl}] ${title}`); + }, + maxRequestsPerCrawl: 1, +}); + +await crawler.run(['https://example.com']); diff --git a/temp-examples/examples/browserless-playwright-ws.ts b/temp-examples/examples/browserless-playwright-ws.ts new file mode 100644 index 000000000000..ec659b59f025 --- /dev/null +++ b/temp-examples/examples/browserless-playwright-ws.ts @@ -0,0 +1,29 @@ +import 'dotenv/config'; + +import { PlaywrightCrawler } from 'crawlee'; + +const token = process.env.BROWSERLESS_TOKEN; + +if (!token) { + throw new Error('BROWSERLESS_TOKEN env variable is required'); +} + +const crawler = new PlaywrightCrawler({ + launchContext: { + connectOptions: { + wsEndpoint: `wss://production-sfo.browserless.io/chromium/playwright?token=${token}`, + }, + }, + async requestHandler({ page, request, enqueueLinks }) { + const title = await page.title(); + console.log(`[${request.loadedUrl}] ${title}`); + + await enqueueLinks({ + globs: ['https://www.crawlee.dev/**'], + limit: 5, + }); + }, + maxRequestsPerCrawl: 10, +}); + +await crawler.run(['https://www.crawlee.dev']); diff --git a/temp-examples/examples/browserless-playwright.ts b/temp-examples/examples/browserless-playwright.ts new file mode 100644 index 000000000000..ca7712c62ed0 --- /dev/null +++ b/temp-examples/examples/browserless-playwright.ts @@ -0,0 +1,29 @@ +import 'dotenv/config'; + +import { PlaywrightCrawler } from 'crawlee'; + +const token = process.env.BROWSERLESS_TOKEN; + +if (!token) { + throw new Error('BROWSERLESS_TOKEN env variable is required'); +} + +const crawler = new PlaywrightCrawler({ + launchContext: { + connectOverCDPOptions: { + endpointURL: `wss://production-sfo.browserless.io?token=${token}`, + }, + }, + async requestHandler({ page, request, enqueueLinks }) { + const title = await page.title(); + console.log(`[${request.loadedUrl}] ${title}`); + + await enqueueLinks({ + globs: ['https://www.crawlee.dev/**'], + limit: 5, + }); + }, + maxRequestsPerCrawl: 10, +}); + +await crawler.run(['https://www.crawlee.dev']); diff --git a/temp-examples/examples/browserless-puppeteer.ts b/temp-examples/examples/browserless-puppeteer.ts new file mode 100644 index 000000000000..c47fbe214420 --- /dev/null +++ b/temp-examples/examples/browserless-puppeteer.ts @@ -0,0 +1,29 @@ +import 'dotenv/config'; + +import { PuppeteerCrawler } from 'crawlee'; + +const token = process.env.BROWSERLESS_TOKEN; + +if (!token) { + throw new Error('BROWSERLESS_TOKEN env variable is required'); +} + +const crawler = new PuppeteerCrawler({ + launchContext: { + connectOverCDPOptions: { + browserWSEndpoint: `wss://production-sfo.browserless.io?token=${token}`, + }, + }, + async requestHandler({ page, request, enqueueLinks }) { + const title = await page.title(); + console.log(`[${request.loadedUrl}] ${title}`); + + await enqueueLinks({ + globs: ['https://www.crawlee.dev/**'], + limit: 5, + }); + }, + maxRequestsPerCrawl: 10, +}); + +await crawler.run(['https://www.crawlee.dev']); diff --git a/temp-examples/examples/rebrowser-playwright-ws.ts b/temp-examples/examples/rebrowser-playwright-ws.ts new file mode 100644 index 000000000000..31587ceca6bb --- /dev/null +++ b/temp-examples/examples/rebrowser-playwright-ws.ts @@ -0,0 +1,50 @@ +import 'dotenv/config'; + +import { PlaywrightCrawler } from 'crawlee'; + +const apiKey = process.env.REBROWSER_API_KEY; +const profileId = process.env.REBROWSER_PROFILE_ID; + +if (!apiKey) { + throw new Error('REBROWSER_API_KEY env variable is required'); +} + +// Step 1: Start a Rebrowser run via REST API. +// This gives you a dedicated WebSocket endpoint for the session. +// You can optionally specify a profileId and proxyUrl for advanced control. +const startRunUrl = new URL(`https://rebrowser.net/api/startRun?apikey=${apiKey}`); + +if (profileId) { + startRunUrl.searchParams.set('profileId', profileId); + console.log(`Using Rebrowser profile: ${profileId}`); +} + +const response = await fetch(startRunUrl.toString()); + +if (!response.ok) { + throw new Error(`Failed to start Rebrowser run: ${response.status} ${response.statusText}`); +} + +const run = await response.json(); +console.log(`Started Rebrowser run with wsEndpoint: ${run.wsEndpoint}`); + +// Step 2: Connect to the run using Playwright's WebSocket connection. +const crawler = new PlaywrightCrawler({ + launchContext: { + connectOptions: { + wsEndpoint: run.wsEndpoint, + }, + }, + async requestHandler({ page, request }) { + const title = await page.title(); + console.log(`[${request.loadedUrl}] ${title}`); + }, + maxRequestsPerCrawl: 1, +}); + +await crawler.run(['https://example.com']); + +// Step 3: Finish the run to stop billing. +// Rebrowser recommends explicit finishRun to avoid idle billing. +// The browser disconnects automatically after the crawl, but calling finishRun +// ensures the run is cleanly terminated on Rebrowser's side. diff --git a/temp-examples/examples/rebrowser-playwright.ts b/temp-examples/examples/rebrowser-playwright.ts new file mode 100644 index 000000000000..f88783238192 --- /dev/null +++ b/temp-examples/examples/rebrowser-playwright.ts @@ -0,0 +1,31 @@ +import 'dotenv/config'; + +import { PlaywrightCrawler } from 'crawlee'; + +const apiKey = process.env.REBROWSER_API_KEY; + +if (!apiKey) { + throw new Error('REBROWSER_API_KEY env variable is required'); +} + +// Rebrowser simple connection: no profile or run creation needed. +// A random profile is auto-selected when you connect with just an API key. +// Proxies are managed via the Rebrowser dashboard or WS URL params. +const crawler = new PlaywrightCrawler({ + launchContext: { + connectOverCDPOptions: { + endpointURL: `wss://api.rebrowser.net?apikey=${apiKey}`, + }, + }, + async requestHandler({ page, request }) { + const title = await page.title(); + console.log(`[${request.loadedUrl}] ${title}`); + }, + maxRequestsPerCrawl: 1, +}); + +await crawler.run(['https://example.com']); + +// Note: Rebrowser recommends calling finishRun after you're done to avoid idle billing. +// With Crawlee, the browser disconnects automatically after the crawl finishes, +// which should end the run. For explicit control, use the REST API finishRun endpoint. diff --git a/temp-examples/examples/rebrowser-puppeteer.ts b/temp-examples/examples/rebrowser-puppeteer.ts new file mode 100644 index 000000000000..54d49065c712 --- /dev/null +++ b/temp-examples/examples/rebrowser-puppeteer.ts @@ -0,0 +1,31 @@ +import 'dotenv/config'; + +import { PuppeteerCrawler } from 'crawlee'; + +const apiKey = process.env.REBROWSER_API_KEY; + +if (!apiKey) { + throw new Error('REBROWSER_API_KEY env variable is required'); +} + +// Rebrowser simple connection: no profile or run creation needed. +// A random profile is auto-selected when you connect with just an API key. +// Proxies are managed via the Rebrowser dashboard or WS URL params. +const crawler = new PuppeteerCrawler({ + launchContext: { + connectOverCDPOptions: { + browserWSEndpoint: `wss://api.rebrowser.net?apikey=${apiKey}`, + }, + }, + async requestHandler({ page, request }) { + const title = await page.title(); + console.log(`[${request.loadedUrl}] ${title}`); + }, + maxRequestsPerCrawl: 1, +}); + +await crawler.run(['https://example.com']); + +// Note: Rebrowser recommends calling finishRun after you're done to avoid idle billing. +// With Crawlee, the browser disconnects automatically after the crawl finishes, +// which should end the run. For explicit control, use the REST API finishRun endpoint. diff --git a/temp-examples/examples/steel-playwright-ws.ts b/temp-examples/examples/steel-playwright-ws.ts new file mode 100644 index 000000000000..55f4712a5315 --- /dev/null +++ b/temp-examples/examples/steel-playwright-ws.ts @@ -0,0 +1,51 @@ +import 'dotenv/config'; + +import { PlaywrightCrawler } from 'crawlee'; + +const apiKey = process.env.STEEL_API_KEY; + +if (!apiKey) { + throw new Error('STEEL_API_KEY env variable is required'); +} + +// Step 1: Create a Steel session via REST API. +// Explicit session creation enables advanced features like proxy and CAPTCHA solving. +const response = await fetch('https://api.steel.dev/v1/sessions', { + method: 'POST', + headers: { + 'Steel-Api-Key': apiKey, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ useProxy: true, solveCaptcha: true }), +}); + +if (!response.ok) { + throw new Error(`Failed to create Steel session: ${response.status} ${response.statusText}`); +} + +const session = await response.json(); +console.log(`Created Steel session: ${session.id}`); + +// Step 2: Connect to the session using Playwright's WebSocket connection. +// The session ID is passed as a query parameter to the Steel WebSocket endpoint. +const crawler = new PlaywrightCrawler({ + launchContext: { + connectOptions: { + wsEndpoint: `wss://connect.steel.dev?apiKey=${apiKey}&sessionId=${session.id}`, + }, + }, + async requestHandler({ page, request }) { + const title = await page.title(); + console.log(`[${request.loadedUrl}] ${title}`); + }, + maxRequestsPerCrawl: 1, +}); + +await crawler.run(['https://example.com']); + +// Step 3: Release the session (optional — Steel auto-releases on disconnect). +await fetch(`https://api.steel.dev/v1/sessions/${session.id}/release`, { + method: 'POST', + headers: { 'Steel-Api-Key': apiKey }, +}); +console.log(`Released Steel session: ${session.id}`); diff --git a/temp-examples/examples/steel-playwright.ts b/temp-examples/examples/steel-playwright.ts new file mode 100644 index 000000000000..7bf2913054e9 --- /dev/null +++ b/temp-examples/examples/steel-playwright.ts @@ -0,0 +1,26 @@ +import 'dotenv/config'; + +import { PlaywrightCrawler } from 'crawlee'; + +const apiKey = process.env.STEEL_API_KEY; + +if (!apiKey) { + throw new Error('STEEL_API_KEY env variable is required'); +} + +// Steel direct connection: no session creation needed. +// A session is auto-created when you connect and auto-released on disconnect. +const crawler = new PlaywrightCrawler({ + launchContext: { + connectOverCDPOptions: { + endpointURL: `wss://connect.steel.dev?apiKey=${apiKey}`, + }, + }, + async requestHandler({ page, request }) { + const title = await page.title(); + console.log(`[${request.loadedUrl}] ${title}`); + }, + maxRequestsPerCrawl: 1, +}); + +await crawler.run(['https://example.com']); diff --git a/temp-examples/examples/steel-puppeteer.ts b/temp-examples/examples/steel-puppeteer.ts new file mode 100644 index 000000000000..68dc3cdb59a9 --- /dev/null +++ b/temp-examples/examples/steel-puppeteer.ts @@ -0,0 +1,26 @@ +import 'dotenv/config'; + +import { PuppeteerCrawler } from 'crawlee'; + +const apiKey = process.env.STEEL_API_KEY; + +if (!apiKey) { + throw new Error('STEEL_API_KEY env variable is required'); +} + +// Steel direct connection: no session creation needed. +// A session is auto-created when you connect and auto-released on disconnect. +const crawler = new PuppeteerCrawler({ + launchContext: { + connectOverCDPOptions: { + browserWSEndpoint: `wss://connect.steel.dev?apiKey=${apiKey}`, + }, + }, + async requestHandler({ page, request }) { + const title = await page.title(); + console.log(`[${request.loadedUrl}] ${title}`); + }, + maxRequestsPerCrawl: 1, +}); + +await crawler.run(['https://example.com']); diff --git a/temp-examples/package.json b/temp-examples/package.json new file mode 100644 index 000000000000..cbcb71ed6c1b --- /dev/null +++ b/temp-examples/package.json @@ -0,0 +1,38 @@ +{ + "name": "temp-examples", + "version": "1.0.0", + "private": true, + "type": "module", + "scripts": { + "example:browserless-puppeteer": "node --experimental-strip-types examples/browserless-puppeteer.ts", + "example:browserless-playwright": "node --experimental-strip-types examples/browserless-playwright.ts", + "example:browserless-playwright-ws": "node --experimental-strip-types examples/browserless-playwright-ws.ts", + "example:browserbase-puppeteer": "node --experimental-strip-types examples/browserbase-puppeteer.ts", + "example:browserbase-playwright": "node --experimental-strip-types examples/browserbase-playwright.ts", + "example:browserbase-playwright-ws": "node --experimental-strip-types examples/browserbase-playwright-ws.ts", + "example:steel-puppeteer": "node --experimental-strip-types examples/steel-puppeteer.ts", + "example:steel-playwright": "node --experimental-strip-types examples/steel-playwright.ts", + "example:steel-playwright-ws": "node --experimental-strip-types examples/steel-playwright-ws.ts", + "example:rebrowser-puppeteer": "node --experimental-strip-types examples/rebrowser-puppeteer.ts", + "example:rebrowser-playwright": "node --experimental-strip-types examples/rebrowser-playwright.ts", + "example:rebrowser-playwright-ws": "node --experimental-strip-types examples/rebrowser-playwright-ws.ts" + }, + "dependencies": { + "@crawlee/basic": "file:../packages/basic-crawler/dist", + "@crawlee/browser": "file:../packages/browser-crawler/dist", + "@crawlee/browser-pool": "file:../packages/browser-pool/dist", + "@crawlee/cheerio": "file:../packages/cheerio-crawler/dist", + "@crawlee/cli": "file:../packages/cli/dist", + "@crawlee/core": "file:../packages/core/dist", + "@crawlee/http": "file:../packages/http-crawler/dist", + "@crawlee/jsdom": "file:../packages/jsdom-crawler/dist", + "@crawlee/linkedom": "file:../packages/linkedom-crawler/dist", + "@crawlee/playwright": "file:../packages/playwright-crawler/dist", + "@crawlee/puppeteer": "file:../packages/puppeteer-crawler/dist", + "@crawlee/types": "file:../packages/types/dist", + "@crawlee/utils": "file:../packages/utils/dist", + "@types/node": "^25.2.0", + "crawlee": "file:../packages/crawlee/dist", + "dotenv": "^17.3.1" + } +} diff --git a/temp-examples/readme.md b/temp-examples/readme.md new file mode 100644 index 000000000000..a570750b6774 --- /dev/null +++ b/temp-examples/readme.md @@ -0,0 +1,12 @@ +#how to start + +``` +##root +nr clean +nr build + +cd temp-examples +npm install +npm run example:browserless-puppeteer +... +``` diff --git a/temp-examples/tsconfig.json b/temp-examples/tsconfig.json new file mode 100644 index 000000000000..5fcc4b7bad3a --- /dev/null +++ b/temp-examples/tsconfig.json @@ -0,0 +1,9 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "Node16", + "moduleResolution": "Node16", + "esModuleInterop": true, + "sourceMap": false + } +}