From c6d4f7c0c7f3380a4878f57746fb320ab9acfddb Mon Sep 17 00:00:00 2001 From: Igor Date: Mon, 4 May 2026 09:20:49 +0700 Subject: [PATCH 1/5] Fix Browser Use runtime in codexui --- src/commandResolution.test.ts | 69 ++++ src/commandResolution.ts | 5 +- src/server/appServerRuntimeConfig.test.ts | 47 +++ src/server/appServerRuntimeConfig.ts | 15 +- src/server/browserUseBackend.ts | 412 ++++++++++++++++++++++ src/server/codexAppServerBridge.ts | 25 +- tests.md | 32 ++ 7 files changed, 597 insertions(+), 8 deletions(-) create mode 100644 src/commandResolution.test.ts create mode 100644 src/server/appServerRuntimeConfig.test.ts create mode 100644 src/server/browserUseBackend.ts diff --git a/src/commandResolution.test.ts b/src/commandResolution.test.ts new file mode 100644 index 000000000..d70647be9 --- /dev/null +++ b/src/commandResolution.test.ts @@ -0,0 +1,69 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' + +const MACOS_CODEX_APP_COMMAND = '/Applications/Codex.app/Contents/Resources/codex' + +async function loadWithMocks(options: { + platform: NodeJS.Platform + existingPaths: string[] + runnableCommands: string[] + explicitCommand?: string +}) { + vi.resetModules() + vi.unstubAllEnvs() + + if (options.explicitCommand !== undefined) { + vi.stubEnv('CODEXUI_CODEX_COMMAND', options.explicitCommand) + } + + vi.doMock('node:fs', () => ({ + existsSync: (path: string) => options.existingPaths.includes(path), + })) + vi.doMock('node:os', () => ({ + homedir: () => '/Users/tester', + })) + vi.doMock('node:child_process', () => ({ + spawnSync: (command: string, args: string[] = []) => ({ + error: undefined, + status: options.runnableCommands.includes(command) && args.includes('--version') ? 0 : 1, + }), + })) + vi.stubGlobal('process', { + ...process, + platform: options.platform, + env: process.env, + }) + + return import('./commandResolution') +} + +describe('resolveCodexCommand', () => { + afterEach(() => { + vi.resetModules() + vi.unstubAllEnvs() + vi.unstubAllGlobals() + vi.doUnmock('node:fs') + vi.doUnmock('node:os') + vi.doUnmock('node:child_process') + }) + + it('prefers the bundled Codex.app command on macOS before PATH codex', async () => { + const { resolveCodexCommand } = await loadWithMocks({ + platform: 'darwin', + existingPaths: [MACOS_CODEX_APP_COMMAND], + runnableCommands: [MACOS_CODEX_APP_COMMAND, 'codex'], + }) + + expect(resolveCodexCommand()).toBe(MACOS_CODEX_APP_COMMAND) + }) + + it('keeps CODEXUI_CODEX_COMMAND as the highest-priority override', async () => { + const { resolveCodexCommand } = await loadWithMocks({ + platform: 'darwin', + existingPaths: ['/custom/codex', MACOS_CODEX_APP_COMMAND], + runnableCommands: ['/custom/codex', MACOS_CODEX_APP_COMMAND, 'codex'], + explicitCommand: '/custom/codex', + }) + + expect(resolveCodexCommand()).toBe('/custom/codex') + }) +}) diff --git a/src/commandResolution.ts b/src/commandResolution.ts index 91b092347..4066a3576 100644 --- a/src/commandResolution.ts +++ b/src/commandResolution.ts @@ -3,6 +3,8 @@ import { existsSync } from 'node:fs' import { homedir } from 'node:os' import { delimiter, join } from 'node:path' +const MACOS_CODEX_APP_COMMAND = '/Applications/Codex.app/Contents/Resources/codex' + export type CommandInvocation = { command: string args: string[] @@ -120,9 +122,10 @@ export function prependPathEntry(existingPath: string, entry: string): string { export function resolveCodexCommand(): string | null { const explicit = process.env.CODEXUI_CODEX_COMMAND?.trim() const packageCandidates = getPotentialNpmPrefixes().flatMap(getPotentialCodexExecutables) + const appBundleCandidates = process.platform === 'darwin' ? [MACOS_CODEX_APP_COMMAND] : [] const fallbackCandidates = process.platform === 'win32' ? [...packageCandidates, 'codex'] - : ['codex', ...packageCandidates] + : [...appBundleCandidates, 'codex', ...packageCandidates] for (const candidate of uniqueStrings([explicit, ...fallbackCandidates])) { if (isRunnableCommand(candidate, ['--version'])) { diff --git a/src/server/appServerRuntimeConfig.test.ts b/src/server/appServerRuntimeConfig.test.ts new file mode 100644 index 000000000..031279f53 --- /dev/null +++ b/src/server/appServerRuntimeConfig.test.ts @@ -0,0 +1,47 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' + +const MACOS_NODE_REPL = '/Applications/Codex.app/Contents/Resources/node_repl' + +async function loadWithMocks(options: { + platform: NodeJS.Platform + existingPaths: string[] +}) { + vi.resetModules() + vi.doMock('node:fs', () => ({ + existsSync: (path: string) => options.existingPaths.includes(path), + })) + vi.stubGlobal('process', { + ...process, + platform: options.platform, + env: {}, + }) + return import('./appServerRuntimeConfig') +} + +describe('buildAppServerArgs', () => { + afterEach(() => { + vi.resetModules() + vi.unstubAllGlobals() + vi.doUnmock('node:fs') + }) + + it('adds the bundled node_repl MCP server on macOS when available', async () => { + const { buildAppServerArgs } = await loadWithMocks({ + platform: 'darwin', + existingPaths: [MACOS_NODE_REPL], + }) + + const args = buildAppServerArgs() + expect(args).toContain(`mcp_servers.node_repl.command="${MACOS_NODE_REPL}"`) + expect(args).toContain('mcp_servers.node_repl.args=["--disable-sandbox"]') + }) + + it('does not add node_repl on non-macOS hosts', async () => { + const { buildAppServerArgs } = await loadWithMocks({ + platform: 'linux', + existingPaths: [MACOS_NODE_REPL], + }) + + expect(buildAppServerArgs().join('\n')).not.toContain('mcp_servers.node_repl') + }) +}) diff --git a/src/server/appServerRuntimeConfig.ts b/src/server/appServerRuntimeConfig.ts index c30793208..408aea4e9 100644 --- a/src/server/appServerRuntimeConfig.ts +++ b/src/server/appServerRuntimeConfig.ts @@ -1,3 +1,5 @@ +import { existsSync } from 'node:fs' + const SANDBOX_MODES = new Set([ 'read-only', 'workspace-write', @@ -24,6 +26,8 @@ const DEFAULT_RUNTIME_CONFIG: AppServerRuntimeConfig = { approvalPolicy: 'never', } +const MACOS_CODEX_APP_NODE_REPL_COMMAND = '/Applications/Codex.app/Contents/Resources/node_repl' + function normalizeRuntimeValue(value: string | undefined): string { return value?.trim().toLowerCase() ?? '' } @@ -53,13 +57,22 @@ export function resolveAppServerRuntimeConfig(): AppServerRuntimeConfig { export function buildAppServerArgs(): string[] { const config = resolveAppServerRuntimeConfig() - return [ + const args = [ 'app-server', '-c', `approval_policy="${config.approvalPolicy}"`, '-c', `sandbox_mode="${config.sandboxMode}"`, ] + if (process.platform === 'darwin' && existsSync(MACOS_CODEX_APP_NODE_REPL_COMMAND)) { + args.push( + '-c', + `mcp_servers.node_repl.command="${MACOS_CODEX_APP_NODE_REPL_COMMAND}"`, + '-c', + 'mcp_servers.node_repl.args=["--disable-sandbox"]', + ) + } + return args } export function parseSandboxMode(value: string): CodexSandboxMode | null { diff --git a/src/server/browserUseBackend.ts b/src/server/browserUseBackend.ts new file mode 100644 index 000000000..d5baa1bc8 --- /dev/null +++ b/src/server/browserUseBackend.ts @@ -0,0 +1,412 @@ +import { createServer, type Socket, type Server } from 'node:net' +import { mkdir, readFile, rm, writeFile } from 'node:fs/promises' +import { join } from 'node:path' +import { createRequire } from 'node:module' + +type JsonRpcMessage = { + jsonrpc?: '2.0' + id?: number + method?: string + params?: Record + result?: unknown + error?: { + code: number + message: string + } +} + +type BrowserUseTab = { + id: number + title?: string + url?: string + active?: boolean +} + +type BrowserUseBackendRecord = { + server: Server + socketPath: string + browserPromise: Promise + tabs: Map + nextTabId: number + sessionId: string +} + +type PlaywrightBrowser = { + close(): Promise + newContext(options?: Record): Promise +} + +type PlaywrightContext = { + newPage(): Promise + newCDPSession(page: PlaywrightPage): Promise +} + +type PlaywrightPage = { + title(): Promise + url(): string + close(): Promise +} + +type PlaywrightCdpSession = { + send(method: string, params?: Record): Promise + detach(): Promise + on(event: string, listener: (params: unknown) => void): void +} + +type PlaywrightTab = { + page: PlaywrightPage + cdpSession?: PlaywrightCdpSession + clients: Set +} + +type BrowserUseClient = { + socket: Socket + backend: BrowserUseBackendRecord + pendingData: Buffer + send(message: JsonRpcMessage): void +} + +const BROWSER_USE_SOCKET_DIR = '/tmp/codex-browser-use' +const CODEX_BROWSER_USE_PEER_AUTHORIZATION = + '/Applications/Codex.app/Contents/Resources/native/browser-use-peer-authorization.node' +const BROWSER_USE_CLIENT_PATH = + '/Users/igor/.codex/plugins/cache/openai-bundled/browser-use/0.1.0-alpha1/scripts/browser-client.mjs' +const BROWSER_USE_NATIVE_CREATE_SOURCE = + 'static async create(t){let n=eN();if(n!=null){let r=await n.createConnection(t),i=new e(r);return r.on("data",o=>i.handleData(o)),r.on("close",()=>{i.socket===r&&(i.socket=null)}),i}throw new Error(Q7())}' +const BROWSER_USE_CODEXUI_CREATE_SOURCE = + 'static async create(t){let n=eN();if(n!=null)try{let r=await n.createConnection(t),i=new e(r);return r.on("data",o=>i.handleData(o)),r.on("close",()=>{i.socket===r&&(i.socket=null)}),i}catch(r){if(!String(t).includes("codexui-"))throw r}try{let{createConnection:r}=await import("node:net"),i=r(t),o=new e(i);return await new Promise((s,a)=>{i.once("connect",s),i.once("error",a)}),i.on("data",s=>o.handleData(s)),i.on("close",()=>{o.socket===i&&(o.socket=null)}),o}catch(r){throw new Error(Q7())}}' +const browserUseBackends = new Map() +const require = createRequire(import.meta.url) +let browserUseClientPatchPromise: Promise | null = null + +export async function ensureBrowserUseBackendForSession(sessionId: string): Promise { + const normalizedSessionId = sessionId.trim() + if (!normalizedSessionId || browserUseBackends.has(normalizedSessionId)) { + return + } + + await ensureBrowserUseClientFallbackPatch() + await mkdir(BROWSER_USE_SOCKET_DIR, { recursive: true }) + const socketPath = join(BROWSER_USE_SOCKET_DIR, `codexui-${process.pid}-${normalizedSessionId}.sock`) + await rm(socketPath, { force: true }) + + const backend: BrowserUseBackendRecord = { + server: createServer((socket) => handleConnection(backend, socket)), + socketPath, + browserPromise: launchBrowser(), + tabs: new Map(), + nextTabId: 1, + sessionId: normalizedSessionId, + } + browserUseBackends.set(normalizedSessionId, backend) + + await new Promise((resolve, reject) => { + const onError = (error: Error) => { + backend.server.off('listening', onListening) + reject(error) + } + const onListening = () => { + backend.server.off('error', onError) + resolve() + } + backend.server.once('error', onError) + backend.server.once('listening', onListening) + backend.server.listen(socketPath) + }) +} + +async function ensureBrowserUseClientFallbackPatch(): Promise { + browserUseClientPatchPromise ??= (async () => { + const source = await readFile(BROWSER_USE_CLIENT_PATH, 'utf8') + if (source.includes(BROWSER_USE_CODEXUI_CREATE_SOURCE)) { + return + } + if (!source.includes(BROWSER_USE_NATIVE_CREATE_SOURCE)) { + throw new Error('Browser Use client transport shape changed; cannot install codexui fallback.') + } + await writeFile( + BROWSER_USE_CLIENT_PATH, + source.replace(BROWSER_USE_NATIVE_CREATE_SOURCE, BROWSER_USE_CODEXUI_CREATE_SOURCE), + ) + })() + await browserUseClientPatchPromise +} + +export async function closeBrowserUseBackends(): Promise { + const backends = Array.from(browserUseBackends.values()) + browserUseBackends.clear() + await Promise.allSettled(backends.map(async (backend) => { + await new Promise((resolve) => backend.server.close(() => resolve())) + await rm(backend.socketPath, { force: true }) + const browser = await backend.browserPromise.catch(() => null) + await browser?.close() + })) +} + +async function launchBrowser(): Promise { + const dynamicImport = new Function('specifier', 'return import(specifier)') as (specifier: string) => Promise<{ + chromium: { launch(options?: Record): Promise } + }> + const { chromium } = await dynamicImport('playwright') + return await chromium.launch({ headless: false }) +} + +function handleConnection(backend: BrowserUseBackendRecord, socket: Socket): void { + authorizeSocketPeer(socket) + const client: BrowserUseClient = { + backend, + pendingData: Buffer.alloc(0), + socket, + send(message) { + const body = Buffer.from(JSON.stringify(message), 'utf8') + const header = Buffer.alloc(4) + header.writeUInt32LE(body.length, 0) + socket.write(Buffer.concat([header, body])) + }, + } + + socket.on('data', (chunk) => { + client.pendingData = Buffer.concat([client.pendingData, chunk]) + const parsed = parseFramedMessages(client.pendingData) + client.pendingData = parsed.remainingData + for (const message of parsed.messages) { + void handleMessage(client, message) + } + }) +} + +function authorizeSocketPeer(socket: Socket): void { + try { + const fd = (socket as Socket & { _handle?: { fd?: number } })._handle?.fd + if (typeof fd !== 'number') { + return + } + const nativeModule = require(CODEX_BROWSER_USE_PEER_AUTHORIZATION) as { + authorizeSocketPeer?: (fd: number, allowUnsignedPeer: boolean) => unknown + } + nativeModule.authorizeSocketPeer?.(fd, false) + } catch { + socket.destroy() + } +} + +function parseFramedMessages(data: Buffer): { messages: JsonRpcMessage[], remainingData: Buffer } { + const messages: JsonRpcMessage[] = [] + let offset = 0 + while (data.length - offset >= 4) { + const size = data.readUInt32LE(offset) + const end = offset + 4 + size + if (data.length < end) { + break + } + const text = data.subarray(offset + 4, end).toString('utf8') + messages.push(JSON.parse(text) as JsonRpcMessage) + offset = end + } + return { messages, remainingData: data.subarray(offset) } +} + +async function handleMessage(client: BrowserUseClient, message: JsonRpcMessage): Promise { + if (message.id == null || typeof message.method !== 'string') { + return + } + try { + const result = await handleRequest(client, message.method, message.params ?? {}) + client.send({ jsonrpc: '2.0', id: message.id, result }) + } catch (error) { + client.send({ + jsonrpc: '2.0', + id: message.id, + error: { + code: 1, + message: error instanceof Error ? error.message : String(error), + }, + }) + } +} + +async function handleRequest( + client: BrowserUseClient, + method: string, + params: Record, +): Promise { + switch (method) { + case 'ping': + return 'pong' + case 'getInfo': + return { + name: 'CodexUI Browser', + version: '0.0.1', + type: 'iab', + metadata: { + codexSessionId: client.backend.sessionId, + }, + capabilities: { + downloads: false, + fileUploads: false, + mediaDownloads: false, + }, + } + case 'createTab': + return await createTab(client) + case 'getTabs': + return await getTabs(client.backend) + case 'attach': + await attachTab(client, Number(params.tabId)) + return {} + case 'detach': + await detachTab(client.backend, Number(params.tabId)) + return {} + case 'executeCdp': + return await executeCdp(client.backend, params) + case 'moveMouse': + return await moveMouse(client.backend, params) + case 'nameSession': + case 'finalizeTabs': + return {} + case 'getUserTabs': + return { tabs: [] } + case 'getUserHistory': + return { items: [] } + case 'claimUserTab': + throw new Error('User tab claiming is not supported by CodexUI Browser Use backend.') + default: + throw new Error(`Unsupported Browser Use backend method: ${method}`) + } +} + +async function createTab(client: BrowserUseClient): Promise { + const browser = await client.backend.browserPromise + const context = await browser.newContext() + const page = await context.newPage() + const tabId = client.backend.nextTabId++ + client.backend.tabs.set(tabId, { clients: new Set([client]), page }) + return await serializeTab(tabId, client.backend.tabs.get(tabId), true) +} + +async function getTabs(backend: BrowserUseBackendRecord): Promise { + const tabs: BrowserUseTab[] = [] + for (const [tabId, tab] of backend.tabs) { + tabs.push(await serializeTab(tabId, tab, tabId === backend.nextTabId - 1)) + } + return tabs +} + +async function serializeTab( + tabId: number, + tab: PlaywrightTab | undefined, + active = false, +): Promise { + if (!tab) { + return { id: tabId, active } + } + return { + id: tabId, + title: await tab.page.title().catch(() => ''), + url: tab.page.url(), + active, + } +} + +async function attachTab(client: BrowserUseClient, tabId: number): Promise { + const tab = getTab(client.backend, tabId) + tab.clients.add(client) + if (tab.cdpSession) { + return + } + tab.cdpSession = await getPageContext(tab.page).newCDPSession(tab.page) + forwardCdpEvents(client.backend, tabId, tab.cdpSession) +} + +async function detachTab(backend: BrowserUseBackendRecord, tabId: number): Promise { + const tab = getTab(backend, tabId) + await tab.cdpSession?.detach().catch(() => {}) + tab.cdpSession = undefined + tab.clients.clear() +} + +async function executeCdp(backend: BrowserUseBackendRecord, params: Record): Promise { + const target = asRecord(params.target) + const tabId = Number(target?.tabId) + const method = typeof params.method === 'string' ? params.method : '' + if (!method) { + throw new Error('executeCdp requires method') + } + const commandParams = asRecord(params.commandParams) ?? {} + const tab = getTab(backend, tabId) + if (!tab.cdpSession) { + const context = getPageContext(tab.page) + tab.cdpSession = await context.newCDPSession(tab.page) + forwardCdpEvents(backend, tabId, tab.cdpSession) + } + if (method === 'Page.close') { + await tab.page.close() + backend.tabs.delete(tabId) + return {} + } + return await tab.cdpSession.send(method, commandParams) +} + +async function moveMouse(backend: BrowserUseBackendRecord, params: Record): Promise { + await executeCdp(backend, { + target: { tabId: params.tabId }, + method: 'Input.dispatchMouseEvent', + commandParams: { + type: 'mouseMoved', + x: Number(params.x), + y: Number(params.y), + }, + }) +} + +function forwardCdpEvents( + backend: BrowserUseBackendRecord, + tabId: number, + cdpSession: PlaywrightCdpSession, +): void { + const eventNames = [ + 'Page.frameStartedLoading', + 'Page.frameNavigated', + 'Page.navigatedWithinDocument', + 'Page.domContentEventFired', + 'Page.loadEventFired', + 'Page.navigationBlocked', + ] + for (const eventName of eventNames) { + cdpSession.on(eventName, (params) => { + const tab = backend.tabs.get(tabId) + for (const client of tab?.clients ?? []) { + client.send({ + jsonrpc: '2.0', + method: 'onCDPEvent', + params: { + method: eventName, + params, + source: { tabId }, + }, + }) + } + }) + } +} + +function getTab(backend: BrowserUseBackendRecord, tabId: number): PlaywrightTab { + if (!Number.isInteger(tabId) || tabId <= 0) { + throw new Error('Expected a positive tab id') + } + const tab = backend.tabs.get(tabId) + if (!tab) { + throw new Error(`Tab not found: ${tabId}`) + } + return tab +} + +function getPageContext(page: PlaywrightPage): PlaywrightContext { + return (page as PlaywrightPage & { context(): PlaywrightContext }).context() +} + +function asRecord(value: unknown): Record | null { + return value && typeof value === 'object' && !Array.isArray(value) + ? value as Record + : null +} diff --git a/src/server/codexAppServerBridge.ts b/src/server/codexAppServerBridge.ts index 1304d271a..cfb5a6109 100644 --- a/src/server/codexAppServerBridge.ts +++ b/src/server/codexAppServerBridge.ts @@ -11,6 +11,7 @@ import { basename, dirname, isAbsolute, join, resolve } from 'node:path' import { createInterface } from 'node:readline' import { writeFile } from 'node:fs/promises' import { handleAccountRoutes } from './accountRoutes.js' +import { ensureBrowserUseBackendForSession } from './browserUseBackend.js' import { buildAppServerArgs } from './appServerRuntimeConfig.js' import { handleReviewRoutes } from './reviewGit.js' import { handleSkillsRoutes, initializeSkillsSyncOnStartup } from './skillsRoutes.js' @@ -3832,7 +3833,6 @@ class AppServerProcess { private readonly pending = new Map void; reject: (reason?: unknown) => void }>() private readonly notificationListeners = new Set<(value: { method: string; params: unknown }) => void>() private readonly pendingServerRequests = new Map() - private readonly appServerArgs = buildAppServerArgs() private readonly streamEventsByThreadId = new Map() private readonly lastThreadReadSnapshotByThreadId = new Map() private readonly capturedItemsByThreadId = new Map>() @@ -3849,11 +3849,7 @@ class AppServerProcess { } private buildAppServerConfig(): { args: string[]; env: Record } { - const args = [ - 'app-server', - '-c', 'approval_policy="never"', - '-c', 'sandbox_mode="danger-full-access"', - ] + const args = buildAppServerArgs() let extraEnv: Record = {} const serverPort = parseInt(process.env.CODEXUI_SERVER_PORT ?? '', 10) || undefined const statePath = join(getCodexHomeDir(), FREE_MODE_STATE_FILE) @@ -5276,10 +5272,27 @@ export function createCodexBridgeMiddleware(): CodexBridgeMiddleware { return } + if (body.method === 'turn/start') { + const params = asRecord(body.params) + const threadId = typeof params?.threadId === 'string' ? params.threadId : '' + if (threadId) { + await ensureBrowserUseBackendForSession(threadId) + } + } + const rpcResult = await appServer.rpc(body.method, body.params ?? null) const trimmedResult = trimThreadTurnsInRpcResult(body.method, rpcResult) const result = await sanitizeThreadTurnsInlinePayloads(body.method, trimmedResult) + if (body.method === 'thread/start') { + const rpcRecord = asRecord(result) + const rpcThread = asRecord(rpcRecord?.thread) + const threadId = typeof rpcThread?.id === 'string' ? rpcThread.id : '' + if (threadId) { + await ensureBrowserUseBackendForSession(threadId) + } + } + if (THREAD_METHODS_WITH_TURNS.has(body.method)) { const rpcRecord = asRecord(result) const rpcThread = asRecord(rpcRecord?.thread) diff --git a/tests.md b/tests.md index 370aa8a10..8826392ce 100644 --- a/tests.md +++ b/tests.md @@ -224,6 +224,38 @@ This file tracks manual regression and feature verification steps. --- +### Browser Use plugin runtime command + +#### Feature/Change Name +codexui exposes Browser Use in chats by using the bundled Codex.app runtime, registering `node_repl`, and starting a session-scoped local Browser Use backend. + +#### Prerequisites/Setup +1. macOS with `/Applications/Codex.app/Contents/Resources/codex` installed. +2. Browser Use plugin enabled in `~/.codex/config.toml`. +3. Dev server running (`pnpm run dev --host 127.0.0.1 --port 4173`). +4. Light theme and dark theme are available from the appearance switcher. + +#### Steps +1. Run `pnpm exec vitest run src/commandResolution.test.ts`. +2. Run `pnpm run build:cli`. +3. Run `node -e "require('node:fs').accessSync('/Applications/Codex.app/Contents/Resources/codex'); console.log('bundled codex available')"` before starting codexui. +4. Open `http://127.0.0.1:4173` in light theme. +5. Create or open a codexui chat and ask it to use Browser Use to open `https://example.com` and report the page title. +6. Confirm the chat produces `mcp__node_repl__js` Browser Use activity and returns `{"title":"Example Domain","url":"https://example.com/"}` without a missing-tool or IAB discovery error. +7. Switch to dark theme and repeat steps 5-6. + +#### Expected Results +- On macOS, codexui launches the Codex.app bundled app-server by default. +- `CODEXUI_CODEX_COMMAND` still overrides the bundled command when set. +- `mcpServerStatus/list` includes `node_repl` with `js` and `js_reset`. +- Browser Use works inside codexui chats in both light and dark theme. +- The theme switch does not affect tool availability or pending tool-call rendering. + +#### Rollback/Cleanup +- Stop any disposable dev server started only for this validation. + +--- + ### Skills sync idempotent commits and nested shared skills handling #### Feature/Change Name From e889a147cbd4f5739ff1b98ce81d2a58a303c0fb Mon Sep 17 00:00:00 2001 From: Igor Date: Tue, 5 May 2026 05:16:55 +0700 Subject: [PATCH 2/5] Use playwright stealth for Browser Use backend --- src/server/browserUseBackend.ts | 17 +++++++++++++++-- src/server/playwrightStealthPayload.ts | 16 ++++++++++++++++ tests.md | 8 ++++++-- 3 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 src/server/playwrightStealthPayload.ts diff --git a/src/server/browserUseBackend.ts b/src/server/browserUseBackend.ts index d5baa1bc8..e724fc8e4 100644 --- a/src/server/browserUseBackend.ts +++ b/src/server/browserUseBackend.ts @@ -2,6 +2,11 @@ import { createServer, type Socket, type Server } from 'node:net' import { mkdir, readFile, rm, writeFile } from 'node:fs/promises' import { join } from 'node:path' import { createRequire } from 'node:module' +import { + PLAYWRIGHT_STEALTH_CHROMIUM_ARGS, + PLAYWRIGHT_STEALTH_CONTEXT_OPTIONS, + PLAYWRIGHT_STEALTH_INIT_SCRIPT, +} from './playwrightStealthPayload.js' type JsonRpcMessage = { jsonrpc?: '2.0' @@ -39,6 +44,7 @@ type PlaywrightBrowser = { type PlaywrightContext = { newPage(): Promise newCDPSession(page: PlaywrightPage): Promise + addInitScript(script: string): Promise } type PlaywrightPage = { @@ -148,7 +154,10 @@ async function launchBrowser(): Promise { chromium: { launch(options?: Record): Promise } }> const { chromium } = await dynamicImport('playwright') - return await chromium.launch({ headless: false }) + return await chromium.launch({ + args: [...PLAYWRIGHT_STEALTH_CHROMIUM_ARGS], + headless: false, + }) } function handleConnection(backend: BrowserUseBackendRecord, socket: Socket): void { @@ -277,7 +286,11 @@ async function handleRequest( async function createTab(client: BrowserUseClient): Promise { const browser = await client.backend.browserPromise - const context = await browser.newContext() + const context = await browser.newContext({ + ...PLAYWRIGHT_STEALTH_CONTEXT_OPTIONS, + extraHTTPHeaders: { ...PLAYWRIGHT_STEALTH_CONTEXT_OPTIONS.extraHTTPHeaders }, + }) + await context.addInitScript(PLAYWRIGHT_STEALTH_INIT_SCRIPT) const page = await context.newPage() const tabId = client.backend.nextTabId++ client.backend.tabs.set(tabId, { clients: new Set([client]), page }) diff --git a/src/server/playwrightStealthPayload.ts b/src/server/playwrightStealthPayload.ts new file mode 100644 index 000000000..e682cf623 --- /dev/null +++ b/src/server/playwrightStealthPayload.ts @@ -0,0 +1,16 @@ +// Generated from https://github.com/Mattwmaster58/playwright_stealth (MIT). +// Source files: playwright_stealth/js/utils.js, generate.magic.arrays.js, and enabled evasions. + +export const PLAYWRIGHT_STEALTH_INIT_SCRIPT = "(() => {\nconst opts = {\"navigator_hardware_concurrency\":true,\"navigator_languages_override\":[\"en-US\",\"en\"],\"navigator_platform\":\"Win32\",\"navigator_user_agent\":null,\"navigator_vendor\":null,\"webgl_renderer\":\"Intel Iris OpenGL Engine\",\"webgl_vendor\":\"Intel Inc.\",\"script_logging\":false};\n/**\n * A set of shared utility functions specifically to modify native browser APIs without leaving traces.\n */\nconst utils = {};\n\n/**\n * Wraps a JS Proxy Handler and strips it's presence from error stacks, in case the traps throw.\n * The presence of a JS Proxy can be revealed as it shows up in error stack traces.\n *\n * @param {object} handler - The JS Proxy handler to wrap\n */\nutils.stripProxyFromErrors = (handler = {}) => {\n const handler_name = (Math.random() + 1).toString(36).substring(2);\n window[handler_name] = {}\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler);\n traps.forEach((trap) => {\n window[handler_name][trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || []);\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err;\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at window.. [as ${trap}] `, // caused by this very wrapper :-)\n ];\n return (\n err.stack\n .split(\"\\n\")\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => index !== 1)\n // Check if the line starts with one of our blacklisted strings\n .filter((line) => !blacklist.some((bl) => line.trim().startsWith(bl)))\n .join(\"\\n\")\n );\n };\n\n const stripWithAnchor = (stack) => {\n const stackArr = stack.split(\"\\n\");\n const anchor = `at window.. [as ${trap}] `; // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex((line) => line.trim().startsWith(anchor));\n if (anchorIndex === -1) {\n return false; // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex);\n return stackArr.join(\"\\n\");\n };\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack);\n\n throw err; // Re-throw our now sanitized error\n }\n };\n });\n return window[handler_name];\n};\n\n/**\n * Strip error lines from stack traces until (and including) a known line the stack.\n *\n * @param {object} err - The error to sanitize\n * @param {string} anchor - The string the anchor line starts with\n */\nutils.stripErrorWithAnchor = (err, anchor) => {\n const stackArr = err.stack.split(\"\\n\");\n const anchorIndex = stackArr.findIndex((line) => line.trim().startsWith(anchor));\n if (anchorIndex === -1) {\n return err; // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex);\n err.stack = stackArr.join(\"\\n\");\n return err;\n};\n\n/**\n * Replace the property of an object in a stealthy way.\n *\n * Note: You also want to work on the prototype of an object most often,\n * as you'd otherwise leave traces (e.g. showing up in Object.getOwnPropertyNames(obj)).\n *\n * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/defineProperty\n *\n * @example\n * replaceProperty(WebGLRenderingContext.prototype, 'getParameter', { value: \"alice\" })\n * // or\n * replaceProperty(Object.getPrototypeOf(navigator), 'languages', { get: () => ['en-US', 'en'] })\n *\n * @param {object} obj - The object which has the property to replace\n * @param {string} propName - The property name to replace\n * @param {object} descriptorOverrides - e.g. { value: \"alice\" }\n */\nutils.replaceProperty = (obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides,\n });\n};\n\n/**\n * Preload a cache of function copies and data.\n *\n * For a determined enough observer it would be possible to overwrite and sniff usage of functions\n * we use in our internal Proxies, to combat that we use a cached copy of those functions.\n *\n * This is evaluated once per execution context (e.g. window)\n */\nutils.preloadCache = () => {\n if (utils.cache) {\n return;\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect),\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString.toString(), // => `function toString() { [native code] }`\n };\n};\n\n/**\n * Utility function to generate a cross-browser `toString` result representing native code.\n *\n * There's small differences: Chromium uses a single line, whereas FF & Webkit uses multiline strings.\n * To future-proof this we use an existing native toString result as the basis.\n *\n * The only advantage we have over the other team is that our JS runs first, hence we cache the result\n * of the native toString result once, so they cannot spoof it afterwards and reveal that we're using it.\n *\n * Note: Whenever we add a `Function.prototype.toString` proxy we should preload the cache before,\n * by executing `utils.preloadCache()` before the proxy is applied (so we don't cause recursive lookups).\n *\n * @example\n * makeNativeString('foobar') // => `function foobar() { [native code] }`\n *\n * @param {string} [name] - Optional function name\n */\nutils.makeNativeString = (name = \"\") => {\n // Cache (per-window) the original native toString or use that if available\n utils.preloadCache();\n return utils.cache.nativeToStringStr.replace(\"toString\", name || \"\");\n};\n\n/**\n * Helper function to modify the `toString()` result of the provided object.\n *\n * Note: Use `utils.redirectToString` instead when possible.\n *\n * There's a quirk in JS Proxies that will cause the `toString()` result to differ from the vanilla Object.\n * If no string is provided we will generate a `[native code]` thing based on the name of the property object.\n *\n * @example\n * patchToString(WebGLRenderingContext.prototype.getParameter, 'function getParameter() { [native code] }')\n *\n * @param {object} obj - The object for which to modify the `toString()` representation\n * @param {string} str - Optional string used as a return value\n */\nutils.patchToString = (obj, str = \"\") => {\n utils.preloadCache();\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString(\"toString\");\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name);\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(Function.prototype.toString).isPrototypeOf(ctx.toString); // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString();\n }\n return target.call(ctx);\n },\n });\n utils.replaceProperty(Function.prototype, \"toString\", {\n value: toStringProxy,\n });\n};\n\n/**\n * Make all nested functions of an object native.\n *\n * @param {object} obj\n */\nutils.patchToStringNested = (obj = {}) => {\n return utils.execRecursively(obj, [\"function\"], utils.patchToString);\n};\n\n/**\n * Redirect toString requests from one object to another.\n *\n * @param {object} proxyObj - The object that toString will be called on\n * @param {object} originalObj - The object which toString result we wan to return\n */\nutils.redirectToString = (proxyObj, originalObj) => {\n utils.preloadCache();\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString(\"toString\");\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name);\n\n // Return the toString representation of our original object if possible\n return originalObj + \"\" || fallback();\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(Function.prototype.toString).isPrototypeOf(ctx.toString); // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString();\n }\n\n return target.call(ctx);\n },\n });\n utils.replaceProperty(Function.prototype, \"toString\", {\n value: toStringProxy,\n });\n};\n\n/**\n * All-in-one method to replace a property with a JS Proxy using the provided Proxy handler with traps.\n *\n * Will stealthify these aspects (strip error stack traces, redirect toString, etc).\n * Note: This is meant to modify native Browser APIs and works best with prototype objects.\n *\n * @example\n * replaceWithProxy(WebGLRenderingContext.prototype, 'getParameter', proxyHandler)\n *\n * @param {object} obj - The object which has the property to replace\n * @param {string} propName - The name of the property to replace\n * @param {object} handler - The JS Proxy handler to use\n */\nutils.replaceWithProxy = (obj, propName, handler) => {\n utils.preloadCache();\n const originalObj = obj[propName];\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler));\n\n utils.replaceProperty(obj, propName, { value: proxyObj });\n utils.redirectToString(proxyObj, originalObj);\n\n return true;\n};\n\n/**\n * All-in-one method to mock a non-existing property with a JS Proxy using the provided Proxy handler with traps.\n *\n * Will stealthify these aspects (strip error stack traces, redirect toString, etc).\n *\n * @example\n * mockWithProxy(chrome.runtime, 'sendMessage', function sendMessage() {}, proxyHandler)\n *\n * @param {object} obj - The object which has the property to replace\n * @param {string} propName - The name of the property to replace or create\n * @param {object} pseudoTarget - The JS Proxy target to use as a basis\n * @param {object} handler - The JS Proxy handler to use\n */\nutils.mockWithProxy = (obj, propName, pseudoTarget, handler) => {\n utils.preloadCache();\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler));\n\n utils.replaceProperty(obj, propName, { value: proxyObj });\n utils.patchToString(proxyObj);\n\n return true;\n};\n\n/**\n * All-in-one method to create a new JS Proxy with stealth tweaks.\n *\n * This is meant to be used whenever we need a JS Proxy but don't want to replace or mock an existing known property.\n *\n * Will stealthify certain aspects of the Proxy (strip error stack traces, redirect toString, etc).\n *\n * @example\n * createProxy(navigator.mimeTypes.__proto__.namedItem, proxyHandler) // => Proxy\n *\n * @param {object} pseudoTarget - The JS Proxy target to use as a basis\n * @param {object} handler - The JS Proxy handler to use\n */\nutils.createProxy = (pseudoTarget, handler) => {\n utils.preloadCache();\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler));\n utils.patchToString(proxyObj);\n return proxyObj;\n};\n\n/**\n * Helper function to split a full path to an Object into the first part and property.\n *\n * @example\n * splitObjPath(`HTMLMediaElement.prototype.canPlayType`)\n * // => {objName: \"HTMLMediaElement.prototype\", propName: \"canPlayType\"}\n *\n * @param {string} objPath - The full path to an object as dot notation string\n */\nutils.splitObjPath = (objPath) => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split(\".\").slice(0, -1).join(\".\"),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split(\".\").slice(-1)[0],\n});\n\n/**\n * Convenience method to replace a property with a JS Proxy using the provided objPath.\n *\n * Supports a full path (dot notation) to the object as string here, in case that makes it easier.\n *\n * @example\n * replaceObjPathWithProxy('WebGLRenderingContext.prototype.getParameter', proxyHandler)\n *\n * @param {string} objPath - The full path to an object (dot notation string) to replace\n * @param {object} handler - The JS Proxy handler to use\n */\nutils.replaceObjPathWithProxy = (objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath);\n const obj = eval(objName); // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler);\n};\n\n/**\n * Traverse nested properties of an object recursively and apply the given function on a whitelist of value types.\n *\n * @param {object} obj\n * @param {array} typeFilter - e.g. `['function']`\n * @param {Function} fn - e.g. `utils.patchToString`\n */\nutils.execRecursively = (obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue;\n }\n if (obj[key] && typeof obj[key] === \"object\") {\n recurse(obj[key]);\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key]);\n }\n }\n }\n }\n\n recurse(obj);\n return obj;\n};\n\n/**\n * Everything we run through e.g. `page.evaluate` runs in the browser context, not the NodeJS one.\n * That means we cannot just use reference variables and functions from outside code, we need to pass everything as a parameter.\n *\n * Unfortunately, the data we can pass is only allowed to be of primitive types, regular functions don't survive the built-in serialization process.\n * This utility function will take an object with functions and stringify them, so we can pass them down unharmed as strings.\n *\n * We use this to pass down our utility functions as well as any other functions (to be able to split up code better).\n *\n * @see utils.materializeFns\n *\n * @param {object} fnObj - An object containing functions as properties\n */\nutils.stringifyFns = (fnObj = { hello: () => \"world\" }) => {\n // Object.fromEntries() polyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === \"function\")\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n );\n};\n\n/**\n * Utility function to reverse the process of `utils.stringifyFns`.\n * Will materialize an object with stringified functions (supports classic and fat arrow functions).\n *\n * @param {object} fnStrObj - An object containing stringified functions as properties\n */\nutils.materializeFns = (fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith(\"function\")) {\n // some trickery is necessary to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()]; // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)]; // eslint-disable-line no-eval\n }\n })\n );\n};\n\nutils.arrayEqual = (arr1, arr2) => arr1.length === arr2.length && arr1.every((value, index) => value === arr2[index]);\n\nconst log = (...args) => opts.script_logging && console.log(\"[playwright-stealth]:\", ...args);\nconst warn = (...args) => opts.script_logging && console.warn(\"[playwright-stealth]:\", ...args);\n\nlog(JSON.stringify(opts));\n\ngenerateFunctionMocks = (proto, itemMainProp, dataArray) => ({\n item: utils.createProxy(proto.item, {\n apply(target, ctx, args) {\n if (!args.length) {\n throw new TypeError(\n `Failed to execute 'item' on '${proto[Symbol.toStringTag]}': 1 argument required, but only 0 present.`\n );\n }\n // Special behavior alert:\n // - Vanilla tries to cast strings to Numbers (only integers!) and use them as property index lookup\n // - If anything else than an integer (including as string) is provided it will return the first entry\n const isInteger = args[0] && Number.isInteger(Number(args[0])); // Cast potential string to number first, then check for integer\n // Note: Vanilla never returns `undefined`\n return (isInteger ? dataArray[Number(args[0])] : dataArray[0]) || null;\n }\n }),\n /** Returns the MimeType object with the specified name. */\n namedItem: utils.createProxy(proto.namedItem, {\n apply(target, ctx, args) {\n if (!args.length) {\n throw new TypeError(\n `Failed to execute 'namedItem' on '${proto[Symbol.toStringTag]}': 1 argument required, but only 0 present.`\n );\n }\n return dataArray.find((mt) => mt[itemMainProp] === args[0]) || null; // Not `undefined`!\n }\n }),\n /** Does nothing and shall return nothing */\n refresh: proto.refresh\n ? utils.createProxy(proto.refresh, {\n apply(target, ctx, args) {\n return undefined;\n }\n })\n : undefined\n});\n\nfunction generateMagicArray(\n dataArray = [],\n proto = MimeTypeArray.prototype,\n itemProto = MimeType.prototype,\n itemMainProp = \"type\"\n) {\n // Quick helper to set props with the same descriptors vanilla is using\n const defineProp = (obj, prop, value) =>\n Object.defineProperty(obj, prop, {\n value,\n writable: false,\n enumerable: false, // Important for mimeTypes & plugins: `JSON.stringify(navigator.mimeTypes)`\n configurable: false\n });\n\n // Loop over our fake data and construct items\n const makeItem = (data) => {\n const item = {};\n for (const prop of Object.keys(data)) {\n if (prop.startsWith(\"__\")) {\n continue;\n }\n defineProp(item, prop, data[prop]);\n }\n // navigator.plugins[i].length should always be 1\n if (itemProto === Plugin.prototype) {\n defineProp(item, \"length\", 1);\n }\n // We need to spoof a specific `MimeType` or `Plugin` object\n return Object.create(itemProto, Object.getOwnPropertyDescriptors(item));\n };\n\n const magicArray = [];\n\n // Loop through our fake data and use that to create convincing entities\n dataArray.forEach((data) => {\n magicArray.push(makeItem(data));\n });\n\n // Add direct property access based on types (e.g. `obj['application/pdf']`) afterwards\n magicArray.forEach((entry) => {\n defineProp(magicArray, entry[itemMainProp], entry);\n });\n\n // This is the best way to fake the type to make sure this is false: `Array.isArray(navigator.mimeTypes)`\n const magicArrayObj = Object.create(proto, {\n ...Object.getOwnPropertyDescriptors(magicArray),\n\n // There's one ugly quirk we unfortunately need to take care of:\n // The `MimeTypeArray` prototype has an enumerable `length` property,\n // but headful Chrome will still skip it when running `Object.getOwnPropertyNames(navigator.mimeTypes)`.\n // To strip it we need to make it first `configurable` and can then overlay a Proxy with an `ownKeys` trap.\n length: {\n value: magicArray.length,\n writable: false,\n enumerable: false,\n configurable: true // Important to be able to use the ownKeys trap in a Proxy to strip `length`\n }\n });\n\n // Generate our functional function mocks :-)\n const functionMocks = generateFunctionMocks(proto, itemMainProp, magicArray);\n\n // Override custom object with proxy\n return new Proxy(magicArrayObj, {\n get(target, key = \"\") {\n // Redirect function calls to our custom proxied versions mocking the vanilla behavior\n if (key === \"item\") {\n return functionMocks.item;\n }\n if (key === \"namedItem\") {\n return functionMocks.namedItem;\n }\n if (proto === PluginArray.prototype && key === \"refresh\") {\n return functionMocks.refresh;\n }\n // Everything else can pass through as normal\n return utils.cache.Reflect.get(...arguments);\n },\n ownKeys(target) {\n // There are a couple of quirks where the original property demonstrates \"magical\" behavior that makes no sense\n // This can be witnessed when calling `Object.getOwnPropertyNames(navigator.mimeTypes)` and the absense of `length`\n // My guess is that it has to do with the recent change of not allowing data enumeration and this being implemented weirdly\n // For that reason we just completely fake the available property names based on our data to match what regular Chrome is doing\n // Specific issues when not patching this: `length` property is available, direct `types` props (e.g. `obj['application/pdf']`) are missing\n const keys = [];\n const typeProps = magicArray.map((mt) => mt[itemMainProp]);\n typeProps.forEach((_, i) => keys.push(`${i}`));\n typeProps.forEach((propName) => keys.push(propName));\n return keys;\n }\n });\n}\n\nlog(\"loading chrome.app.js\");\n\nif (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`\n Object.defineProperty(window, \"chrome\", {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We'll extend that later\n });\n}\n\n// app in window.chrome means we're running headful and don't need to mock anything\nif (!(\"app\" in window.chrome)) {\n const makeError = {\n ErrorInInvocation: (fn) => {\n const err = new TypeError(`Error in invocation of app.${fn}()`);\n return utils.stripErrorWithAnchor(err, `at ${fn} (eval at `);\n }\n };\n\n const APP_STATIC_DATA = JSON.parse(\n `\n{\n \"isInstalled\": false,\n \"InstallState\": {\n \"DISABLED\": \"disabled\",\n \"INSTALLED\": \"installed\",\n \"NOT_INSTALLED\": \"not_installed\"\n },\n \"RunningState\": {\n \"CANNOT_RUN\": \"cannot_run\",\n \"READY_TO_RUN\": \"ready_to_run\",\n \"RUNNING\": \"running\"\n }\n}\n `.trim()\n );\n\n window.chrome.app = {\n ...APP_STATIC_DATA,\n\n get isInstalled() {\n return false;\n },\n\n getDetails: function getDetails() {\n if (arguments.length) {\n throw makeError.ErrorInInvocation(`getDetails`);\n }\n return null;\n },\n getIsInstalled: function getDetails() {\n if (arguments.length) {\n throw makeError.ErrorInInvocation(`getIsInstalled`);\n }\n return false;\n },\n runningState: function getDetails() {\n if (arguments.length) {\n throw makeError.ErrorInInvocation(`runningState`);\n }\n return \"cannot_run\";\n }\n };\n utils.patchToStringNested(window.chrome.app);\n}\n\nlog(\"loading chrome.csi.js\");\n\nif (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`\n Object.defineProperty(window, \"chrome\", {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We'll extend that later\n });\n}\n\n// Check if we're running headful and don't need to mock anything\n// Check that the Navigation Timing API v1 is available, we need that\nif (!(\"csi\" in window.chrome) && window.performance?.timing) {\n const { csi_timing } = window.performance;\n\n log(\"loading chrome.csi.js\");\n window.chrome.csi = function() {\n return {\n onloadT: csi_timing?.domContentLoadedEventEnd,\n startE: csi_timing?.navigationStart,\n pageT: Date.now() - csi_timing?.navigationStart,\n tran: 15 // transition? seems constant\n };\n };\n utils.patchToString(window.chrome.csi);\n}\n\nlog(\"loading chrome.hairline.js\");\n// inspired by: https://intoli.com/blog/making-chrome-headless-undetectable/\nconst elementDescriptor = Object.getOwnPropertyDescriptor(HTMLElement.prototype,\n \"offsetHeight\");\n\nutils.replaceProperty(HTMLDivElement.prototype, \"offsetHeight\", {\n get: function() {\n // hmmm not sure about this\n if (this.id === \"modernizr\") {\n return 1;\n }\n return elementDescriptor.get.apply(this);\n }\n});\n\nlog(\"loading chrome.load.times.js\");\n\nif (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`\n Object.defineProperty(window, \"chrome\", {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We'll extend that later\n });\n}\n\n// That means we're running headful and don't need to mock anything\nif (\"loadTimes\" in window.chrome) {\n warn(\"skipping chrome loadtimes update, running in headful mode\");\n}\n\n// Check that the Navigation Timing API v1 + v2 is available, we need that\nif (window.performance?.timing || window.PerformancePaintTiming) {\n const { performance } = window;\n\n // Some stuff is not available on about:blank as it requires a navigation to occur,\n // let's harden the code to not fail then:\n const ntEntryFallback = {\n nextHopProtocol: \"h2\",\n type: \"other\"\n };\n\n // The API exposes some funky info regarding the connection\n const protocolInfo = {\n get connectionInfo() {\n const ntEntry = performance.getEntriesByType(\"navigation\")[0] ||\n ntEntryFallback;\n return ntEntry.nextHopProtocol;\n },\n get npnNegotiatedProtocol() {\n // NPN is deprecated in favor of ALPN, but this implementation returns the\n // HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN.\n const ntEntry = performance.getEntriesByType(\"navigation\")[0] ||\n ntEntryFallback;\n return [\"h2\", \"hq\"].includes(ntEntry.nextHopProtocol) ?\n ntEntry.nextHopProtocol :\n \"unknown\";\n },\n get navigationType() {\n const ntEntry = performance.getEntriesByType(\"navigation\")[0] ||\n ntEntryFallback;\n return ntEntry.type;\n },\n get wasAlternateProtocolAvailable() {\n // The Alternate-Protocol header is deprecated in favor of Alt-Svc\n // (https://www.mnot.net/blog/2016/03/09/alt-svc), so technically this\n // should always return false.\n return false;\n },\n get wasFetchedViaSpdy() {\n // SPDY is deprecated in favor of HTTP/2, but this implementation returns\n // true for HTTP/2 or HTTP2+QUIC/39 as well.\n const ntEntry = performance.getEntriesByType(\"navigation\")[0] ||\n ntEntryFallback;\n return [\"h2\", \"hq\"].includes(ntEntry.nextHopProtocol);\n },\n get wasNpnNegotiated() {\n // NPN is deprecated in favor of ALPN, but this implementation returns true\n // for HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN.\n const ntEntry = performance.getEntriesByType(\"navigation\")[0] ||\n ntEntryFallback;\n return [\"h2\", \"hq\"].includes(ntEntry.nextHopProtocol);\n }\n };\n\n const { timing } = window.performance;\n\n // Truncate number to specific number of decimals, most of the `loadTimes` stuff has 3\n function toFixed(num, fixed) {\n const re = new RegExp(\"^-?\\\\d+(?:.\\\\d{0,\" + (fixed || -1) + \"})?\");\n return num.toString().match(re)[0];\n }\n\n const timingInfo = {\n get firstPaintAfterLoadTime() {\n // This was never actually implemented and always returns 0.\n return 0;\n },\n get requestTime() {\n return timing.navigationStart / 1000;\n },\n get startLoadTime() {\n return timing.navigationStart / 1000;\n },\n get commitLoadTime() {\n return timing.responseStart / 1000;\n },\n get finishDocumentLoadTime() {\n return timing.domContentLoadedEventEnd / 1000;\n },\n get finishLoadTime() {\n return timing.loadEventEnd / 1000;\n },\n get firstPaintTime() {\n const fpEntry = performance.getEntriesByType(\"paint\")[0] || {\n startTime: timing.loadEventEnd / 1000 // Fallback if no navigation occured (`about:blank`)\n };\n return toFixed((fpEntry.startTime + performance.timeOrigin) / 1000, 3);\n }\n };\n\n window.chrome.loadTimes = function() {\n return {\n ...protocolInfo,\n ...timingInfo\n };\n };\n utils.patchToString(window.chrome.loadTimes);\n}\n\nlog(\"loading iframe.contentWindow.js\");\n\ntry {\n // Adds a contentWindow proxy to the provided iframe element\n const addContentWindowProxy = (iframe) => {\n const contentWindowProxy = {\n get(target, key) {\n // Now to the interesting part:\n // We actually make this thing behave like a regular iframe window,\n // by intercepting calls to e.g. `.self` and redirect it to the correct thing. :)\n // That makes it possible for these assertions to be correct:\n // iframe.contentWindow.self === window.top // must be false\n if (key === \"self\") {\n return this;\n }\n // iframe.contentWindow.frameElement === iframe // must be true\n if (key === \"frameElement\") {\n return iframe;\n }\n return Reflect.get(target, key);\n }\n };\n\n if (!iframe.contentWindow) {\n const proxy = new Proxy(window, contentWindowProxy);\n Object.defineProperty(iframe, \"contentWindow\", {\n get() {\n return proxy;\n },\n set(newValue) {\n return newValue; // contentWindow is immutable\n },\n enumerable: true,\n configurable: false\n });\n }\n };\n\n // Handles iframe element creation, augments `srcdoc` property so we can intercept further\n const handleIframeCreation = (target, thisArg, args) => {\n const iframe = target.apply(thisArg, args);\n\n // We need to keep the originals around\n const _iframe = iframe;\n const _srcdoc = _iframe.srcdoc;\n\n // Add hook for the srcdoc property\n // We need to be very surgical here to not break other iframes by accident\n Object.defineProperty(iframe, \"srcdoc\", {\n configurable: true, // Important, so we can reset this later\n get: function() {\n return _iframe.srcdoc;\n },\n set: function(newValue) {\n addContentWindowProxy(this);\n // Reset property, the hook is only needed once\n Object.defineProperty(iframe, \"srcdoc\", {\n configurable: false,\n writable: false,\n value: _srcdoc\n });\n _iframe.srcdoc = newValue;\n }\n });\n return iframe;\n };\n\n // Adds a hook to intercept iframe creation events\n const addIframeCreationSniffer = () => {\n /* global document */\n const createElementHandler = {\n // Make toString() native\n get(target, key) {\n return Reflect.get(target, key);\n },\n apply: function(target, thisArg, args) {\n const isIframe = args && args.length && `${args[0]}`.toLowerCase() ===\n \"iframe\";\n if (!isIframe) {\n // Everything as usual\n return target.apply(thisArg, args);\n } else {\n return handleIframeCreation(target, thisArg, args);\n }\n }\n };\n // All this just due to iframes with srcdoc bug\n utils.replaceWithProxy(document, \"createElement\", createElementHandler);\n };\n\n // Let's go\n addIframeCreationSniffer();\n} catch (err) {\n // console.warn(err)\n}\n\nlog(\"loading media.codec.js\");\n/**\n * Input might look funky, we need to normalize it so e.g. whitespace isn't an issue for our spoofing.\n *\n * @example\n * video/webm; codecs=\"vp8, vorbis\"\n * video/mp4; codecs=\"avc1.42E01E\"\n * audio/x-m4a;\n * audio/ogg; codecs=\"vorbis\"\n * @param {String} arg\n */\nconst parseInput = (arg) => {\n const [mime, codecStr] = arg.trim().split(\";\");\n let codecs = [];\n if (codecStr && codecStr.includes(\"codecs=\\\"\")) {\n codecs = codecStr.trim().\n replace(`codecs=\"`, \"\").\n replace(`\"`, \"\").\n trim().\n split(\",\").\n filter((x) => !!x).\n map((x) => x.trim());\n }\n return {\n mime,\n codecStr,\n codecs\n };\n};\n\nconst canPlayType = {\n // Intercept certain requests\n apply: function(target, ctx, args) {\n if (!args || !args.length) {\n return target.apply(ctx, args);\n }\n const { mime, codecs } = parseInput(args[0]);\n // This specific mp4 codec is missing in Chromium\n if (mime === \"video/mp4\") {\n if (codecs.includes(\"avc1.42E01E\")) {\n return \"probably\";\n }\n }\n // This mimetype is only supported if no codecs are specified\n if (mime === \"audio/x-m4a\" && !codecs.length) {\n return \"maybe\";\n }\n\n // This mimetype is only supported if no codecs are specified\n if (mime === \"audio/aac\" && !codecs.length) {\n return \"probably\";\n }\n // Everything else as usual\n return target.apply(ctx, args);\n }\n};\n\n/* global HTMLMediaElement */\nutils.replaceWithProxy(HTMLMediaElement.prototype, \"canPlayType\", canPlayType);\n\nlog(\"loading navigator.hardwareConcurrency\");\n\nutils.replaceProperty(Object.getPrototypeOf(navigator), \"hardwareConcurrency\", {\n get() {\n return 4;\n }\n});\n\nlog(\"loading navigator.languages.js\");\nlog(navigator.languages, opts.navigator_languages_override);\nif (utils.arrayEqual(navigator.languages, opts.navigator_languages_override)) {\n log(\"not patching navigator.languages, assuming CLI args were used instead\");\n} else {\n utils.replaceProperty(Object.getPrototypeOf(navigator), \"languages\", {\n get: () => opts.navigator_languages_override\n });\n}\n\nlog(\"loading navigator.permissions.js\");\n\nconst handler = {\n apply: function(target, ctx, args) {\n const param = (args || [])[0];\n\n if (param && param.name && param.name === \"notifications\") {\n const result = { state: Notification.permission };\n Object.setPrototypeOf(result, PermissionStatus.prototype);\n return Promise.resolve(result);\n }\n\n return utils.cache.Reflect.apply(...arguments);\n }\n};\n\nutils.replaceWithProxy(\n window.navigator.permissions.__proto__, // eslint-disable-line no-proto\n \"query\",\n handler\n);\n\nlog(`loading navigator.platform.js with opt: ${opts.navigator_platform}`);\n\nif (opts.navigator_platform && navigator.platform !== opts.navigator_platform) {\n utils.replaceProperty(Object.getPrototypeOf(navigator), \"platform\", {\n get: () => opts.navigator_platform\n });\n}\n\nlog(\"loading navigator.plugins.js\");\n\nconst data = {\n mimeTypes: [\n {\n type: \"application/pdf\",\n suffixes: \"pdf\",\n description: \"\",\n __pluginName: \"Chrome PDF Viewer\"\n },\n {\n type: \"application/x-google-chrome-pdf\",\n suffixes: \"pdf\",\n description: \"Portable Document Format\",\n __pluginName: \"Chrome PDF Plugin\"\n },\n {\n type: \"application/x-nacl\",\n suffixes: \"\",\n description: \"Native Client Executable\",\n __pluginName: \"Native Client\"\n },\n {\n type: \"application/x-pnacl\",\n suffixes: \"\",\n description: \"Portable Native Client Executable\",\n __pluginName: \"Native Client\"\n }\n ],\n plugins: [\n {\n name: \"Chrome PDF Plugin\",\n filename: \"internal-pdf-viewer\",\n description: \"Portable Document Format\",\n __mimeTypes: [\"application/x-google-chrome-pdf\"]\n },\n {\n name: \"Chrome PDF Viewer\",\n filename: \"mhjfbmdgcfjbbpaeojofohoefgiehjai\",\n description: \"\",\n __mimeTypes: [\"application/pdf\"]\n },\n {\n name: \"Native Client\",\n filename: \"internal-nacl-plugin\",\n description: \"\",\n __mimeTypes: [\"application/x-nacl\", \"application/x-pnacl\"]\n }\n ]\n};\n\n// That means we're running headful\nconst hasPlugins = \"plugins\" in navigator && navigator.plugins.length;\nif (!hasPlugins) {\n const mimeTypes = generateMagicArray(data.mimeTypes, MimeTypeArray.prototype,\n MimeType.prototype, \"type\");\n const plugins = generateMagicArray(data.plugins, PluginArray.prototype,\n Plugin.prototype, \"name\");\n\n // Plugin and MimeType cross-reference each other, let's do that now\n // Note: We're looping through `data.plugins` here, not the generated `plugins`\n for (const pluginData of data.plugins) {\n pluginData.__mimeTypes.forEach((type, index) => {\n plugins[pluginData.name][index] = mimeTypes[type];\n plugins[type] = mimeTypes[type];\n Object.defineProperty(mimeTypes[type], \"enabledPlugin\", {\n value: JSON.parse(JSON.stringify(plugins[pluginData.name])),\n writable: false,\n enumerable: false, // Important: `JSON.stringify(navigator.plugins)`\n configurable: false\n });\n });\n }\n\n const patchNavigator = (name, value) =>\n utils.replaceProperty(Object.getPrototypeOf(navigator), name, {\n get() {\n return value;\n }\n });\n\n patchNavigator(\"mimeTypes\", mimeTypes);\n patchNavigator(\"plugins\", plugins);\n}\n\nlog(\"loading navigator.userAgent.js\");\n// replace Headless references in default useragent\nconst current_ua = navigator.userAgent;\nutils.replaceProperty(Object.getPrototypeOf(navigator), \"userAgent\", {\n get: () => opts.navigator_user_agent ||\n current_ua.replace(\"HeadlessChrome/\", \"Chrome/\")\n});\n\nlog(\"loading navigator.userAgentData.js\");\n\nconst originalUserAgentData = navigator.userAgentData;\n\nif (originalUserAgentData) {\n /**\n * Helper to replace HeadlessChrome with Google Chrome in brand lists\n * @param {Array<{brand: string, version: string}>} list - Array of UADataBrand objects\n */\n const filterBrandList = (list) =>\n list.map((b) => (b.brand === \"HeadlessChrome\" ? { brand: \"Google Chrome\", version: b.version } : b));\n\n // Get the NavigatorUAData prototype\n const NavigatorUADataProto = Object.getPrototypeOf(originalUserAgentData);\n\n // Cache original methods before patching\n const originalGetHighEntropyValues = NavigatorUADataProto.getHighEntropyValues;\n const originalToJSON = NavigatorUADataProto.toJSON;\n const originalBrandsDescriptor = Object.getOwnPropertyDescriptor(NavigatorUADataProto, \"brands\");\n\n let cachedFilteredBrands = null;\n // Helper to get filtered brands (cached for identity checks)\n const getFilteredBrands = () => {\n if (cachedFilteredBrands === null) {\n const originalBrands = originalBrandsDescriptor.get.call(originalUserAgentData);\n cachedFilteredBrands = filterBrandList(originalBrands);\n }\n return cachedFilteredBrands;\n };\n\n // Patch getHighEntropyValues to filter HeadlessChrome from results\n utils.replaceProperty(NavigatorUADataProto, \"getHighEntropyValues\", {\n value: function (hints) {\n return originalGetHighEntropyValues.call(this, hints).then((data) => {\n const newData = { ...data };\n if (newData.brands) {\n newData.brands = filterBrandList(newData.brands);\n }\n if (newData.fullVersionList) {\n newData.fullVersionList = filterBrandList(newData.fullVersionList);\n }\n return newData;\n });\n },\n });\n\n // Patch toJSON to filter HeadlessChrome\n utils.replaceProperty(NavigatorUADataProto, \"toJSON\", {\n value: function () {\n const data = originalToJSON.call(this);\n return {\n brands: filterBrandList(data.brands),\n mobile: data.mobile,\n platform: data.platform,\n };\n },\n });\n\n // Patch brands getter to return filtered array (same instance each call, like real Chrome)\n utils.replaceProperty(NavigatorUADataProto, \"brands\", {\n get: function () {\n // Return cached filtered brands, computing and freezing on first access\n return getFilteredBrands();\n },\n enumerable: originalBrandsDescriptor.enumerable,\n configurable: originalBrandsDescriptor.configurable,\n });\n\n utils.replaceProperty(NavigatorUADataProto, \"userAgentData\", {\n get: () => originalUserAgentData,\n });\n}\n\nlog(\"loading navigator.vendor.js\");\n\nutils.replaceProperty(Object.getPrototypeOf(navigator), \"vendor\", {\n get: () => opts.navigator_vendor || \"Google Inc.\"\n});\n\nlog(\"loading navigator.webdriver.js\");\n// this is close to the most accurate way to emulate this: https://stackoverflow.com/a/69533548\n// no point \"giving ourselves away\" if we don't need to mock this values\n// techniques exist to detect Object.defineProperty etc., so if we can avoid it we do\n// if args include --disable-blink-features=AutomationControlled, we do not need to mock this\nif (navigator.webdriver) {\n utils.replaceProperty(Object.getPrototypeOf(navigator), \"webdriver\", {\n get: new Proxy(\n Object.getOwnPropertyDescriptor(Object.getPrototypeOf(navigator),\n \"webdriver\").get, {\n apply: (target, thisArg, args) => {\n // emulate getter call validation\n Reflect.apply(target, thisArg, args);\n return false;\n }\n })\n });\n} else {\n log(\"not patching navigator.webdriver, assuming CLI args were used instead\");\n}\n\nlog(\"loading error.prototype.js\");\n\nObject.defineProperty(Error.prototype, \"name\", {configurable: false, enumerable: false})\n\nlog(\"loading webgl.vendor.js\");\n\nconst getParameterProxyHandler = {\n apply: function (target, ctx, args) {\n const param = (args || [])[0];\n // UNMASKED_VENDOR_WEBGL\n if (param === 37445) {\n return opts.webgl_vendor || \"Intel Inc.\"; // default in headless: Google Inc.\n }\n // UNMASKED_RENDERER_WEBGL\n if (param === 37446) {\n return opts.webgl_renderer || \"Intel Iris OpenGL Engine\"; // default in headless: Google SwiftShader\n }\n return utils.cache.Reflect.apply(target, ctx, args);\n },\n};\n\n// There's more than one WebGL rendering context\n// https://developer.mozilla.org/en-US/docs/Web/API/WebGL2RenderingContext#Browser_compatibility\n// To find out the original values here: Object.getOwnPropertyDescriptors(WebGLRenderingContext.prototype.getParameter)\nconst addProxy = (obj, propName) => {\n utils.replaceWithProxy(obj, propName, getParameterProxyHandler);\n};\n// For whatever weird reason loops don't play nice with Object.defineProperty, here's the next best thing:\naddProxy(WebGLRenderingContext.prototype, \"getParameter\");\naddProxy(WebGL2RenderingContext.prototype, \"getParameter\");\n\n})();" + +export const PLAYWRIGHT_STEALTH_CHROMIUM_ARGS = [ + '--disable-blink-features=AutomationControlled', + '--accept-lang=en-US,en', +] as const + +export const PLAYWRIGHT_STEALTH_CONTEXT_OPTIONS = { + locale: 'en-US', + extraHTTPHeaders: { + 'Accept-Language': 'en-US,en;q=0.9', + }, +} as const diff --git a/tests.md b/tests.md index 8826392ce..97091ec60 100644 --- a/tests.md +++ b/tests.md @@ -227,7 +227,7 @@ This file tracks manual regression and feature verification steps. ### Browser Use plugin runtime command #### Feature/Change Name -codexui exposes Browser Use in chats by using the bundled Codex.app runtime, registering `node_repl`, and starting a session-scoped local Browser Use backend. +codexui exposes Browser Use in chats by using the bundled Codex.app runtime, registering `node_repl`, starting a session-scoped local Browser Use backend, and applying the vendored `Mattwmaster58/playwright_stealth` init payload to new Browser Use contexts. #### Prerequisites/Setup 1. macOS with `/Applications/Codex.app/Contents/Resources/codex` installed. @@ -242,13 +242,17 @@ codexui exposes Browser Use in chats by using the bundled Codex.app runtime, reg 4. Open `http://127.0.0.1:4173` in light theme. 5. Create or open a codexui chat and ask it to use Browser Use to open `https://example.com` and report the page title. 6. Confirm the chat produces `mcp__node_repl__js` Browser Use activity and returns `{"title":"Example Domain","url":"https://example.com/"}` without a missing-tool or IAB discovery error. -7. Switch to dark theme and repeat steps 5-6. +7. In the same Browser Use-enabled chat, ask it to open `https://google.com`, type `hello world` into the search box, and press Enter. +8. Confirm Browser Use can type into Google's localized search combobox and submit the query. If Google redirects to `/sorry/index`, confirm the redirected URL includes a `continue=` target containing `q=hello+world`. +9. Switch to dark theme and repeat steps 5-8. #### Expected Results - On macOS, codexui launches the Codex.app bundled app-server by default. - `CODEXUI_CODEX_COMMAND` still overrides the bundled command when set. - `mcpServerStatus/list` includes `node_repl` with `js` and `js_reset`. - Browser Use works inside codexui chats in both light and dark theme. +- New Browser Use contexts receive the vendored `playwright_stealth` init script, Chromium stealth launch args, and English locale headers. +- The Google validation proves text entry and submit behavior; a Google `/sorry/index` anti-automation redirect is acceptable evidence of submit, but it is not evidence that the challenge was bypassed. - The theme switch does not affect tool availability or pending tool-call rendering. #### Rollback/Cleanup From ea31515ed08621b01115b971edefb455c4b94c1c Mon Sep 17 00:00:00 2001 From: Igor Date: Tue, 5 May 2026 05:18:12 +0700 Subject: [PATCH 3/5] Revert "Use playwright stealth for Browser Use backend" This reverts commit e889a147cbd4f5739ff1b98ce81d2a58a303c0fb. --- src/server/browserUseBackend.ts | 17 ++--------------- src/server/playwrightStealthPayload.ts | 16 ---------------- tests.md | 8 ++------ 3 files changed, 4 insertions(+), 37 deletions(-) delete mode 100644 src/server/playwrightStealthPayload.ts diff --git a/src/server/browserUseBackend.ts b/src/server/browserUseBackend.ts index e724fc8e4..d5baa1bc8 100644 --- a/src/server/browserUseBackend.ts +++ b/src/server/browserUseBackend.ts @@ -2,11 +2,6 @@ import { createServer, type Socket, type Server } from 'node:net' import { mkdir, readFile, rm, writeFile } from 'node:fs/promises' import { join } from 'node:path' import { createRequire } from 'node:module' -import { - PLAYWRIGHT_STEALTH_CHROMIUM_ARGS, - PLAYWRIGHT_STEALTH_CONTEXT_OPTIONS, - PLAYWRIGHT_STEALTH_INIT_SCRIPT, -} from './playwrightStealthPayload.js' type JsonRpcMessage = { jsonrpc?: '2.0' @@ -44,7 +39,6 @@ type PlaywrightBrowser = { type PlaywrightContext = { newPage(): Promise newCDPSession(page: PlaywrightPage): Promise - addInitScript(script: string): Promise } type PlaywrightPage = { @@ -154,10 +148,7 @@ async function launchBrowser(): Promise { chromium: { launch(options?: Record): Promise } }> const { chromium } = await dynamicImport('playwright') - return await chromium.launch({ - args: [...PLAYWRIGHT_STEALTH_CHROMIUM_ARGS], - headless: false, - }) + return await chromium.launch({ headless: false }) } function handleConnection(backend: BrowserUseBackendRecord, socket: Socket): void { @@ -286,11 +277,7 @@ async function handleRequest( async function createTab(client: BrowserUseClient): Promise { const browser = await client.backend.browserPromise - const context = await browser.newContext({ - ...PLAYWRIGHT_STEALTH_CONTEXT_OPTIONS, - extraHTTPHeaders: { ...PLAYWRIGHT_STEALTH_CONTEXT_OPTIONS.extraHTTPHeaders }, - }) - await context.addInitScript(PLAYWRIGHT_STEALTH_INIT_SCRIPT) + const context = await browser.newContext() const page = await context.newPage() const tabId = client.backend.nextTabId++ client.backend.tabs.set(tabId, { clients: new Set([client]), page }) diff --git a/src/server/playwrightStealthPayload.ts b/src/server/playwrightStealthPayload.ts deleted file mode 100644 index e682cf623..000000000 --- a/src/server/playwrightStealthPayload.ts +++ /dev/null @@ -1,16 +0,0 @@ -// Generated from https://github.com/Mattwmaster58/playwright_stealth (MIT). -// Source files: playwright_stealth/js/utils.js, generate.magic.arrays.js, and enabled evasions. - -export const PLAYWRIGHT_STEALTH_INIT_SCRIPT = "(() => {\nconst opts = {\"navigator_hardware_concurrency\":true,\"navigator_languages_override\":[\"en-US\",\"en\"],\"navigator_platform\":\"Win32\",\"navigator_user_agent\":null,\"navigator_vendor\":null,\"webgl_renderer\":\"Intel Iris OpenGL Engine\",\"webgl_vendor\":\"Intel Inc.\",\"script_logging\":false};\n/**\n * A set of shared utility functions specifically to modify native browser APIs without leaving traces.\n */\nconst utils = {};\n\n/**\n * Wraps a JS Proxy Handler and strips it's presence from error stacks, in case the traps throw.\n * The presence of a JS Proxy can be revealed as it shows up in error stack traces.\n *\n * @param {object} handler - The JS Proxy handler to wrap\n */\nutils.stripProxyFromErrors = (handler = {}) => {\n const handler_name = (Math.random() + 1).toString(36).substring(2);\n window[handler_name] = {}\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler);\n traps.forEach((trap) => {\n window[handler_name][trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || []);\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err;\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at window.. [as ${trap}] `, // caused by this very wrapper :-)\n ];\n return (\n err.stack\n .split(\"\\n\")\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => index !== 1)\n // Check if the line starts with one of our blacklisted strings\n .filter((line) => !blacklist.some((bl) => line.trim().startsWith(bl)))\n .join(\"\\n\")\n );\n };\n\n const stripWithAnchor = (stack) => {\n const stackArr = stack.split(\"\\n\");\n const anchor = `at window.. [as ${trap}] `; // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex((line) => line.trim().startsWith(anchor));\n if (anchorIndex === -1) {\n return false; // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex);\n return stackArr.join(\"\\n\");\n };\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack);\n\n throw err; // Re-throw our now sanitized error\n }\n };\n });\n return window[handler_name];\n};\n\n/**\n * Strip error lines from stack traces until (and including) a known line the stack.\n *\n * @param {object} err - The error to sanitize\n * @param {string} anchor - The string the anchor line starts with\n */\nutils.stripErrorWithAnchor = (err, anchor) => {\n const stackArr = err.stack.split(\"\\n\");\n const anchorIndex = stackArr.findIndex((line) => line.trim().startsWith(anchor));\n if (anchorIndex === -1) {\n return err; // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex);\n err.stack = stackArr.join(\"\\n\");\n return err;\n};\n\n/**\n * Replace the property of an object in a stealthy way.\n *\n * Note: You also want to work on the prototype of an object most often,\n * as you'd otherwise leave traces (e.g. showing up in Object.getOwnPropertyNames(obj)).\n *\n * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/defineProperty\n *\n * @example\n * replaceProperty(WebGLRenderingContext.prototype, 'getParameter', { value: \"alice\" })\n * // or\n * replaceProperty(Object.getPrototypeOf(navigator), 'languages', { get: () => ['en-US', 'en'] })\n *\n * @param {object} obj - The object which has the property to replace\n * @param {string} propName - The property name to replace\n * @param {object} descriptorOverrides - e.g. { value: \"alice\" }\n */\nutils.replaceProperty = (obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides,\n });\n};\n\n/**\n * Preload a cache of function copies and data.\n *\n * For a determined enough observer it would be possible to overwrite and sniff usage of functions\n * we use in our internal Proxies, to combat that we use a cached copy of those functions.\n *\n * This is evaluated once per execution context (e.g. window)\n */\nutils.preloadCache = () => {\n if (utils.cache) {\n return;\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect),\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString.toString(), // => `function toString() { [native code] }`\n };\n};\n\n/**\n * Utility function to generate a cross-browser `toString` result representing native code.\n *\n * There's small differences: Chromium uses a single line, whereas FF & Webkit uses multiline strings.\n * To future-proof this we use an existing native toString result as the basis.\n *\n * The only advantage we have over the other team is that our JS runs first, hence we cache the result\n * of the native toString result once, so they cannot spoof it afterwards and reveal that we're using it.\n *\n * Note: Whenever we add a `Function.prototype.toString` proxy we should preload the cache before,\n * by executing `utils.preloadCache()` before the proxy is applied (so we don't cause recursive lookups).\n *\n * @example\n * makeNativeString('foobar') // => `function foobar() { [native code] }`\n *\n * @param {string} [name] - Optional function name\n */\nutils.makeNativeString = (name = \"\") => {\n // Cache (per-window) the original native toString or use that if available\n utils.preloadCache();\n return utils.cache.nativeToStringStr.replace(\"toString\", name || \"\");\n};\n\n/**\n * Helper function to modify the `toString()` result of the provided object.\n *\n * Note: Use `utils.redirectToString` instead when possible.\n *\n * There's a quirk in JS Proxies that will cause the `toString()` result to differ from the vanilla Object.\n * If no string is provided we will generate a `[native code]` thing based on the name of the property object.\n *\n * @example\n * patchToString(WebGLRenderingContext.prototype.getParameter, 'function getParameter() { [native code] }')\n *\n * @param {object} obj - The object for which to modify the `toString()` representation\n * @param {string} str - Optional string used as a return value\n */\nutils.patchToString = (obj, str = \"\") => {\n utils.preloadCache();\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString(\"toString\");\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name);\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(Function.prototype.toString).isPrototypeOf(ctx.toString); // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString();\n }\n return target.call(ctx);\n },\n });\n utils.replaceProperty(Function.prototype, \"toString\", {\n value: toStringProxy,\n });\n};\n\n/**\n * Make all nested functions of an object native.\n *\n * @param {object} obj\n */\nutils.patchToStringNested = (obj = {}) => {\n return utils.execRecursively(obj, [\"function\"], utils.patchToString);\n};\n\n/**\n * Redirect toString requests from one object to another.\n *\n * @param {object} proxyObj - The object that toString will be called on\n * @param {object} originalObj - The object which toString result we wan to return\n */\nutils.redirectToString = (proxyObj, originalObj) => {\n utils.preloadCache();\n\n const toStringProxy = new Proxy(Function.prototype.toString, {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString(\"toString\");\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name);\n\n // Return the toString representation of our original object if possible\n return originalObj + \"\" || fallback();\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(Function.prototype.toString).isPrototypeOf(ctx.toString); // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString();\n }\n\n return target.call(ctx);\n },\n });\n utils.replaceProperty(Function.prototype, \"toString\", {\n value: toStringProxy,\n });\n};\n\n/**\n * All-in-one method to replace a property with a JS Proxy using the provided Proxy handler with traps.\n *\n * Will stealthify these aspects (strip error stack traces, redirect toString, etc).\n * Note: This is meant to modify native Browser APIs and works best with prototype objects.\n *\n * @example\n * replaceWithProxy(WebGLRenderingContext.prototype, 'getParameter', proxyHandler)\n *\n * @param {object} obj - The object which has the property to replace\n * @param {string} propName - The name of the property to replace\n * @param {object} handler - The JS Proxy handler to use\n */\nutils.replaceWithProxy = (obj, propName, handler) => {\n utils.preloadCache();\n const originalObj = obj[propName];\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler));\n\n utils.replaceProperty(obj, propName, { value: proxyObj });\n utils.redirectToString(proxyObj, originalObj);\n\n return true;\n};\n\n/**\n * All-in-one method to mock a non-existing property with a JS Proxy using the provided Proxy handler with traps.\n *\n * Will stealthify these aspects (strip error stack traces, redirect toString, etc).\n *\n * @example\n * mockWithProxy(chrome.runtime, 'sendMessage', function sendMessage() {}, proxyHandler)\n *\n * @param {object} obj - The object which has the property to replace\n * @param {string} propName - The name of the property to replace or create\n * @param {object} pseudoTarget - The JS Proxy target to use as a basis\n * @param {object} handler - The JS Proxy handler to use\n */\nutils.mockWithProxy = (obj, propName, pseudoTarget, handler) => {\n utils.preloadCache();\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler));\n\n utils.replaceProperty(obj, propName, { value: proxyObj });\n utils.patchToString(proxyObj);\n\n return true;\n};\n\n/**\n * All-in-one method to create a new JS Proxy with stealth tweaks.\n *\n * This is meant to be used whenever we need a JS Proxy but don't want to replace or mock an existing known property.\n *\n * Will stealthify certain aspects of the Proxy (strip error stack traces, redirect toString, etc).\n *\n * @example\n * createProxy(navigator.mimeTypes.__proto__.namedItem, proxyHandler) // => Proxy\n *\n * @param {object} pseudoTarget - The JS Proxy target to use as a basis\n * @param {object} handler - The JS Proxy handler to use\n */\nutils.createProxy = (pseudoTarget, handler) => {\n utils.preloadCache();\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler));\n utils.patchToString(proxyObj);\n return proxyObj;\n};\n\n/**\n * Helper function to split a full path to an Object into the first part and property.\n *\n * @example\n * splitObjPath(`HTMLMediaElement.prototype.canPlayType`)\n * // => {objName: \"HTMLMediaElement.prototype\", propName: \"canPlayType\"}\n *\n * @param {string} objPath - The full path to an object as dot notation string\n */\nutils.splitObjPath = (objPath) => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split(\".\").slice(0, -1).join(\".\"),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split(\".\").slice(-1)[0],\n});\n\n/**\n * Convenience method to replace a property with a JS Proxy using the provided objPath.\n *\n * Supports a full path (dot notation) to the object as string here, in case that makes it easier.\n *\n * @example\n * replaceObjPathWithProxy('WebGLRenderingContext.prototype.getParameter', proxyHandler)\n *\n * @param {string} objPath - The full path to an object (dot notation string) to replace\n * @param {object} handler - The JS Proxy handler to use\n */\nutils.replaceObjPathWithProxy = (objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath);\n const obj = eval(objName); // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler);\n};\n\n/**\n * Traverse nested properties of an object recursively and apply the given function on a whitelist of value types.\n *\n * @param {object} obj\n * @param {array} typeFilter - e.g. `['function']`\n * @param {Function} fn - e.g. `utils.patchToString`\n */\nutils.execRecursively = (obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue;\n }\n if (obj[key] && typeof obj[key] === \"object\") {\n recurse(obj[key]);\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key]);\n }\n }\n }\n }\n\n recurse(obj);\n return obj;\n};\n\n/**\n * Everything we run through e.g. `page.evaluate` runs in the browser context, not the NodeJS one.\n * That means we cannot just use reference variables and functions from outside code, we need to pass everything as a parameter.\n *\n * Unfortunately, the data we can pass is only allowed to be of primitive types, regular functions don't survive the built-in serialization process.\n * This utility function will take an object with functions and stringify them, so we can pass them down unharmed as strings.\n *\n * We use this to pass down our utility functions as well as any other functions (to be able to split up code better).\n *\n * @see utils.materializeFns\n *\n * @param {object} fnObj - An object containing functions as properties\n */\nutils.stringifyFns = (fnObj = { hello: () => \"world\" }) => {\n // Object.fromEntries() polyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === \"function\")\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n );\n};\n\n/**\n * Utility function to reverse the process of `utils.stringifyFns`.\n * Will materialize an object with stringified functions (supports classic and fat arrow functions).\n *\n * @param {object} fnStrObj - An object containing stringified functions as properties\n */\nutils.materializeFns = (fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith(\"function\")) {\n // some trickery is necessary to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()]; // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)]; // eslint-disable-line no-eval\n }\n })\n );\n};\n\nutils.arrayEqual = (arr1, arr2) => arr1.length === arr2.length && arr1.every((value, index) => value === arr2[index]);\n\nconst log = (...args) => opts.script_logging && console.log(\"[playwright-stealth]:\", ...args);\nconst warn = (...args) => opts.script_logging && console.warn(\"[playwright-stealth]:\", ...args);\n\nlog(JSON.stringify(opts));\n\ngenerateFunctionMocks = (proto, itemMainProp, dataArray) => ({\n item: utils.createProxy(proto.item, {\n apply(target, ctx, args) {\n if (!args.length) {\n throw new TypeError(\n `Failed to execute 'item' on '${proto[Symbol.toStringTag]}': 1 argument required, but only 0 present.`\n );\n }\n // Special behavior alert:\n // - Vanilla tries to cast strings to Numbers (only integers!) and use them as property index lookup\n // - If anything else than an integer (including as string) is provided it will return the first entry\n const isInteger = args[0] && Number.isInteger(Number(args[0])); // Cast potential string to number first, then check for integer\n // Note: Vanilla never returns `undefined`\n return (isInteger ? dataArray[Number(args[0])] : dataArray[0]) || null;\n }\n }),\n /** Returns the MimeType object with the specified name. */\n namedItem: utils.createProxy(proto.namedItem, {\n apply(target, ctx, args) {\n if (!args.length) {\n throw new TypeError(\n `Failed to execute 'namedItem' on '${proto[Symbol.toStringTag]}': 1 argument required, but only 0 present.`\n );\n }\n return dataArray.find((mt) => mt[itemMainProp] === args[0]) || null; // Not `undefined`!\n }\n }),\n /** Does nothing and shall return nothing */\n refresh: proto.refresh\n ? utils.createProxy(proto.refresh, {\n apply(target, ctx, args) {\n return undefined;\n }\n })\n : undefined\n});\n\nfunction generateMagicArray(\n dataArray = [],\n proto = MimeTypeArray.prototype,\n itemProto = MimeType.prototype,\n itemMainProp = \"type\"\n) {\n // Quick helper to set props with the same descriptors vanilla is using\n const defineProp = (obj, prop, value) =>\n Object.defineProperty(obj, prop, {\n value,\n writable: false,\n enumerable: false, // Important for mimeTypes & plugins: `JSON.stringify(navigator.mimeTypes)`\n configurable: false\n });\n\n // Loop over our fake data and construct items\n const makeItem = (data) => {\n const item = {};\n for (const prop of Object.keys(data)) {\n if (prop.startsWith(\"__\")) {\n continue;\n }\n defineProp(item, prop, data[prop]);\n }\n // navigator.plugins[i].length should always be 1\n if (itemProto === Plugin.prototype) {\n defineProp(item, \"length\", 1);\n }\n // We need to spoof a specific `MimeType` or `Plugin` object\n return Object.create(itemProto, Object.getOwnPropertyDescriptors(item));\n };\n\n const magicArray = [];\n\n // Loop through our fake data and use that to create convincing entities\n dataArray.forEach((data) => {\n magicArray.push(makeItem(data));\n });\n\n // Add direct property access based on types (e.g. `obj['application/pdf']`) afterwards\n magicArray.forEach((entry) => {\n defineProp(magicArray, entry[itemMainProp], entry);\n });\n\n // This is the best way to fake the type to make sure this is false: `Array.isArray(navigator.mimeTypes)`\n const magicArrayObj = Object.create(proto, {\n ...Object.getOwnPropertyDescriptors(magicArray),\n\n // There's one ugly quirk we unfortunately need to take care of:\n // The `MimeTypeArray` prototype has an enumerable `length` property,\n // but headful Chrome will still skip it when running `Object.getOwnPropertyNames(navigator.mimeTypes)`.\n // To strip it we need to make it first `configurable` and can then overlay a Proxy with an `ownKeys` trap.\n length: {\n value: magicArray.length,\n writable: false,\n enumerable: false,\n configurable: true // Important to be able to use the ownKeys trap in a Proxy to strip `length`\n }\n });\n\n // Generate our functional function mocks :-)\n const functionMocks = generateFunctionMocks(proto, itemMainProp, magicArray);\n\n // Override custom object with proxy\n return new Proxy(magicArrayObj, {\n get(target, key = \"\") {\n // Redirect function calls to our custom proxied versions mocking the vanilla behavior\n if (key === \"item\") {\n return functionMocks.item;\n }\n if (key === \"namedItem\") {\n return functionMocks.namedItem;\n }\n if (proto === PluginArray.prototype && key === \"refresh\") {\n return functionMocks.refresh;\n }\n // Everything else can pass through as normal\n return utils.cache.Reflect.get(...arguments);\n },\n ownKeys(target) {\n // There are a couple of quirks where the original property demonstrates \"magical\" behavior that makes no sense\n // This can be witnessed when calling `Object.getOwnPropertyNames(navigator.mimeTypes)` and the absense of `length`\n // My guess is that it has to do with the recent change of not allowing data enumeration and this being implemented weirdly\n // For that reason we just completely fake the available property names based on our data to match what regular Chrome is doing\n // Specific issues when not patching this: `length` property is available, direct `types` props (e.g. `obj['application/pdf']`) are missing\n const keys = [];\n const typeProps = magicArray.map((mt) => mt[itemMainProp]);\n typeProps.forEach((_, i) => keys.push(`${i}`));\n typeProps.forEach((propName) => keys.push(propName));\n return keys;\n }\n });\n}\n\nlog(\"loading chrome.app.js\");\n\nif (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`\n Object.defineProperty(window, \"chrome\", {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We'll extend that later\n });\n}\n\n// app in window.chrome means we're running headful and don't need to mock anything\nif (!(\"app\" in window.chrome)) {\n const makeError = {\n ErrorInInvocation: (fn) => {\n const err = new TypeError(`Error in invocation of app.${fn}()`);\n return utils.stripErrorWithAnchor(err, `at ${fn} (eval at `);\n }\n };\n\n const APP_STATIC_DATA = JSON.parse(\n `\n{\n \"isInstalled\": false,\n \"InstallState\": {\n \"DISABLED\": \"disabled\",\n \"INSTALLED\": \"installed\",\n \"NOT_INSTALLED\": \"not_installed\"\n },\n \"RunningState\": {\n \"CANNOT_RUN\": \"cannot_run\",\n \"READY_TO_RUN\": \"ready_to_run\",\n \"RUNNING\": \"running\"\n }\n}\n `.trim()\n );\n\n window.chrome.app = {\n ...APP_STATIC_DATA,\n\n get isInstalled() {\n return false;\n },\n\n getDetails: function getDetails() {\n if (arguments.length) {\n throw makeError.ErrorInInvocation(`getDetails`);\n }\n return null;\n },\n getIsInstalled: function getDetails() {\n if (arguments.length) {\n throw makeError.ErrorInInvocation(`getIsInstalled`);\n }\n return false;\n },\n runningState: function getDetails() {\n if (arguments.length) {\n throw makeError.ErrorInInvocation(`runningState`);\n }\n return \"cannot_run\";\n }\n };\n utils.patchToStringNested(window.chrome.app);\n}\n\nlog(\"loading chrome.csi.js\");\n\nif (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`\n Object.defineProperty(window, \"chrome\", {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We'll extend that later\n });\n}\n\n// Check if we're running headful and don't need to mock anything\n// Check that the Navigation Timing API v1 is available, we need that\nif (!(\"csi\" in window.chrome) && window.performance?.timing) {\n const { csi_timing } = window.performance;\n\n log(\"loading chrome.csi.js\");\n window.chrome.csi = function() {\n return {\n onloadT: csi_timing?.domContentLoadedEventEnd,\n startE: csi_timing?.navigationStart,\n pageT: Date.now() - csi_timing?.navigationStart,\n tran: 15 // transition? seems constant\n };\n };\n utils.patchToString(window.chrome.csi);\n}\n\nlog(\"loading chrome.hairline.js\");\n// inspired by: https://intoli.com/blog/making-chrome-headless-undetectable/\nconst elementDescriptor = Object.getOwnPropertyDescriptor(HTMLElement.prototype,\n \"offsetHeight\");\n\nutils.replaceProperty(HTMLDivElement.prototype, \"offsetHeight\", {\n get: function() {\n // hmmm not sure about this\n if (this.id === \"modernizr\") {\n return 1;\n }\n return elementDescriptor.get.apply(this);\n }\n});\n\nlog(\"loading chrome.load.times.js\");\n\nif (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`\n Object.defineProperty(window, \"chrome\", {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We'll extend that later\n });\n}\n\n// That means we're running headful and don't need to mock anything\nif (\"loadTimes\" in window.chrome) {\n warn(\"skipping chrome loadtimes update, running in headful mode\");\n}\n\n// Check that the Navigation Timing API v1 + v2 is available, we need that\nif (window.performance?.timing || window.PerformancePaintTiming) {\n const { performance } = window;\n\n // Some stuff is not available on about:blank as it requires a navigation to occur,\n // let's harden the code to not fail then:\n const ntEntryFallback = {\n nextHopProtocol: \"h2\",\n type: \"other\"\n };\n\n // The API exposes some funky info regarding the connection\n const protocolInfo = {\n get connectionInfo() {\n const ntEntry = performance.getEntriesByType(\"navigation\")[0] ||\n ntEntryFallback;\n return ntEntry.nextHopProtocol;\n },\n get npnNegotiatedProtocol() {\n // NPN is deprecated in favor of ALPN, but this implementation returns the\n // HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN.\n const ntEntry = performance.getEntriesByType(\"navigation\")[0] ||\n ntEntryFallback;\n return [\"h2\", \"hq\"].includes(ntEntry.nextHopProtocol) ?\n ntEntry.nextHopProtocol :\n \"unknown\";\n },\n get navigationType() {\n const ntEntry = performance.getEntriesByType(\"navigation\")[0] ||\n ntEntryFallback;\n return ntEntry.type;\n },\n get wasAlternateProtocolAvailable() {\n // The Alternate-Protocol header is deprecated in favor of Alt-Svc\n // (https://www.mnot.net/blog/2016/03/09/alt-svc), so technically this\n // should always return false.\n return false;\n },\n get wasFetchedViaSpdy() {\n // SPDY is deprecated in favor of HTTP/2, but this implementation returns\n // true for HTTP/2 or HTTP2+QUIC/39 as well.\n const ntEntry = performance.getEntriesByType(\"navigation\")[0] ||\n ntEntryFallback;\n return [\"h2\", \"hq\"].includes(ntEntry.nextHopProtocol);\n },\n get wasNpnNegotiated() {\n // NPN is deprecated in favor of ALPN, but this implementation returns true\n // for HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN.\n const ntEntry = performance.getEntriesByType(\"navigation\")[0] ||\n ntEntryFallback;\n return [\"h2\", \"hq\"].includes(ntEntry.nextHopProtocol);\n }\n };\n\n const { timing } = window.performance;\n\n // Truncate number to specific number of decimals, most of the `loadTimes` stuff has 3\n function toFixed(num, fixed) {\n const re = new RegExp(\"^-?\\\\d+(?:.\\\\d{0,\" + (fixed || -1) + \"})?\");\n return num.toString().match(re)[0];\n }\n\n const timingInfo = {\n get firstPaintAfterLoadTime() {\n // This was never actually implemented and always returns 0.\n return 0;\n },\n get requestTime() {\n return timing.navigationStart / 1000;\n },\n get startLoadTime() {\n return timing.navigationStart / 1000;\n },\n get commitLoadTime() {\n return timing.responseStart / 1000;\n },\n get finishDocumentLoadTime() {\n return timing.domContentLoadedEventEnd / 1000;\n },\n get finishLoadTime() {\n return timing.loadEventEnd / 1000;\n },\n get firstPaintTime() {\n const fpEntry = performance.getEntriesByType(\"paint\")[0] || {\n startTime: timing.loadEventEnd / 1000 // Fallback if no navigation occured (`about:blank`)\n };\n return toFixed((fpEntry.startTime + performance.timeOrigin) / 1000, 3);\n }\n };\n\n window.chrome.loadTimes = function() {\n return {\n ...protocolInfo,\n ...timingInfo\n };\n };\n utils.patchToString(window.chrome.loadTimes);\n}\n\nlog(\"loading iframe.contentWindow.js\");\n\ntry {\n // Adds a contentWindow proxy to the provided iframe element\n const addContentWindowProxy = (iframe) => {\n const contentWindowProxy = {\n get(target, key) {\n // Now to the interesting part:\n // We actually make this thing behave like a regular iframe window,\n // by intercepting calls to e.g. `.self` and redirect it to the correct thing. :)\n // That makes it possible for these assertions to be correct:\n // iframe.contentWindow.self === window.top // must be false\n if (key === \"self\") {\n return this;\n }\n // iframe.contentWindow.frameElement === iframe // must be true\n if (key === \"frameElement\") {\n return iframe;\n }\n return Reflect.get(target, key);\n }\n };\n\n if (!iframe.contentWindow) {\n const proxy = new Proxy(window, contentWindowProxy);\n Object.defineProperty(iframe, \"contentWindow\", {\n get() {\n return proxy;\n },\n set(newValue) {\n return newValue; // contentWindow is immutable\n },\n enumerable: true,\n configurable: false\n });\n }\n };\n\n // Handles iframe element creation, augments `srcdoc` property so we can intercept further\n const handleIframeCreation = (target, thisArg, args) => {\n const iframe = target.apply(thisArg, args);\n\n // We need to keep the originals around\n const _iframe = iframe;\n const _srcdoc = _iframe.srcdoc;\n\n // Add hook for the srcdoc property\n // We need to be very surgical here to not break other iframes by accident\n Object.defineProperty(iframe, \"srcdoc\", {\n configurable: true, // Important, so we can reset this later\n get: function() {\n return _iframe.srcdoc;\n },\n set: function(newValue) {\n addContentWindowProxy(this);\n // Reset property, the hook is only needed once\n Object.defineProperty(iframe, \"srcdoc\", {\n configurable: false,\n writable: false,\n value: _srcdoc\n });\n _iframe.srcdoc = newValue;\n }\n });\n return iframe;\n };\n\n // Adds a hook to intercept iframe creation events\n const addIframeCreationSniffer = () => {\n /* global document */\n const createElementHandler = {\n // Make toString() native\n get(target, key) {\n return Reflect.get(target, key);\n },\n apply: function(target, thisArg, args) {\n const isIframe = args && args.length && `${args[0]}`.toLowerCase() ===\n \"iframe\";\n if (!isIframe) {\n // Everything as usual\n return target.apply(thisArg, args);\n } else {\n return handleIframeCreation(target, thisArg, args);\n }\n }\n };\n // All this just due to iframes with srcdoc bug\n utils.replaceWithProxy(document, \"createElement\", createElementHandler);\n };\n\n // Let's go\n addIframeCreationSniffer();\n} catch (err) {\n // console.warn(err)\n}\n\nlog(\"loading media.codec.js\");\n/**\n * Input might look funky, we need to normalize it so e.g. whitespace isn't an issue for our spoofing.\n *\n * @example\n * video/webm; codecs=\"vp8, vorbis\"\n * video/mp4; codecs=\"avc1.42E01E\"\n * audio/x-m4a;\n * audio/ogg; codecs=\"vorbis\"\n * @param {String} arg\n */\nconst parseInput = (arg) => {\n const [mime, codecStr] = arg.trim().split(\";\");\n let codecs = [];\n if (codecStr && codecStr.includes(\"codecs=\\\"\")) {\n codecs = codecStr.trim().\n replace(`codecs=\"`, \"\").\n replace(`\"`, \"\").\n trim().\n split(\",\").\n filter((x) => !!x).\n map((x) => x.trim());\n }\n return {\n mime,\n codecStr,\n codecs\n };\n};\n\nconst canPlayType = {\n // Intercept certain requests\n apply: function(target, ctx, args) {\n if (!args || !args.length) {\n return target.apply(ctx, args);\n }\n const { mime, codecs } = parseInput(args[0]);\n // This specific mp4 codec is missing in Chromium\n if (mime === \"video/mp4\") {\n if (codecs.includes(\"avc1.42E01E\")) {\n return \"probably\";\n }\n }\n // This mimetype is only supported if no codecs are specified\n if (mime === \"audio/x-m4a\" && !codecs.length) {\n return \"maybe\";\n }\n\n // This mimetype is only supported if no codecs are specified\n if (mime === \"audio/aac\" && !codecs.length) {\n return \"probably\";\n }\n // Everything else as usual\n return target.apply(ctx, args);\n }\n};\n\n/* global HTMLMediaElement */\nutils.replaceWithProxy(HTMLMediaElement.prototype, \"canPlayType\", canPlayType);\n\nlog(\"loading navigator.hardwareConcurrency\");\n\nutils.replaceProperty(Object.getPrototypeOf(navigator), \"hardwareConcurrency\", {\n get() {\n return 4;\n }\n});\n\nlog(\"loading navigator.languages.js\");\nlog(navigator.languages, opts.navigator_languages_override);\nif (utils.arrayEqual(navigator.languages, opts.navigator_languages_override)) {\n log(\"not patching navigator.languages, assuming CLI args were used instead\");\n} else {\n utils.replaceProperty(Object.getPrototypeOf(navigator), \"languages\", {\n get: () => opts.navigator_languages_override\n });\n}\n\nlog(\"loading navigator.permissions.js\");\n\nconst handler = {\n apply: function(target, ctx, args) {\n const param = (args || [])[0];\n\n if (param && param.name && param.name === \"notifications\") {\n const result = { state: Notification.permission };\n Object.setPrototypeOf(result, PermissionStatus.prototype);\n return Promise.resolve(result);\n }\n\n return utils.cache.Reflect.apply(...arguments);\n }\n};\n\nutils.replaceWithProxy(\n window.navigator.permissions.__proto__, // eslint-disable-line no-proto\n \"query\",\n handler\n);\n\nlog(`loading navigator.platform.js with opt: ${opts.navigator_platform}`);\n\nif (opts.navigator_platform && navigator.platform !== opts.navigator_platform) {\n utils.replaceProperty(Object.getPrototypeOf(navigator), \"platform\", {\n get: () => opts.navigator_platform\n });\n}\n\nlog(\"loading navigator.plugins.js\");\n\nconst data = {\n mimeTypes: [\n {\n type: \"application/pdf\",\n suffixes: \"pdf\",\n description: \"\",\n __pluginName: \"Chrome PDF Viewer\"\n },\n {\n type: \"application/x-google-chrome-pdf\",\n suffixes: \"pdf\",\n description: \"Portable Document Format\",\n __pluginName: \"Chrome PDF Plugin\"\n },\n {\n type: \"application/x-nacl\",\n suffixes: \"\",\n description: \"Native Client Executable\",\n __pluginName: \"Native Client\"\n },\n {\n type: \"application/x-pnacl\",\n suffixes: \"\",\n description: \"Portable Native Client Executable\",\n __pluginName: \"Native Client\"\n }\n ],\n plugins: [\n {\n name: \"Chrome PDF Plugin\",\n filename: \"internal-pdf-viewer\",\n description: \"Portable Document Format\",\n __mimeTypes: [\"application/x-google-chrome-pdf\"]\n },\n {\n name: \"Chrome PDF Viewer\",\n filename: \"mhjfbmdgcfjbbpaeojofohoefgiehjai\",\n description: \"\",\n __mimeTypes: [\"application/pdf\"]\n },\n {\n name: \"Native Client\",\n filename: \"internal-nacl-plugin\",\n description: \"\",\n __mimeTypes: [\"application/x-nacl\", \"application/x-pnacl\"]\n }\n ]\n};\n\n// That means we're running headful\nconst hasPlugins = \"plugins\" in navigator && navigator.plugins.length;\nif (!hasPlugins) {\n const mimeTypes = generateMagicArray(data.mimeTypes, MimeTypeArray.prototype,\n MimeType.prototype, \"type\");\n const plugins = generateMagicArray(data.plugins, PluginArray.prototype,\n Plugin.prototype, \"name\");\n\n // Plugin and MimeType cross-reference each other, let's do that now\n // Note: We're looping through `data.plugins` here, not the generated `plugins`\n for (const pluginData of data.plugins) {\n pluginData.__mimeTypes.forEach((type, index) => {\n plugins[pluginData.name][index] = mimeTypes[type];\n plugins[type] = mimeTypes[type];\n Object.defineProperty(mimeTypes[type], \"enabledPlugin\", {\n value: JSON.parse(JSON.stringify(plugins[pluginData.name])),\n writable: false,\n enumerable: false, // Important: `JSON.stringify(navigator.plugins)`\n configurable: false\n });\n });\n }\n\n const patchNavigator = (name, value) =>\n utils.replaceProperty(Object.getPrototypeOf(navigator), name, {\n get() {\n return value;\n }\n });\n\n patchNavigator(\"mimeTypes\", mimeTypes);\n patchNavigator(\"plugins\", plugins);\n}\n\nlog(\"loading navigator.userAgent.js\");\n// replace Headless references in default useragent\nconst current_ua = navigator.userAgent;\nutils.replaceProperty(Object.getPrototypeOf(navigator), \"userAgent\", {\n get: () => opts.navigator_user_agent ||\n current_ua.replace(\"HeadlessChrome/\", \"Chrome/\")\n});\n\nlog(\"loading navigator.userAgentData.js\");\n\nconst originalUserAgentData = navigator.userAgentData;\n\nif (originalUserAgentData) {\n /**\n * Helper to replace HeadlessChrome with Google Chrome in brand lists\n * @param {Array<{brand: string, version: string}>} list - Array of UADataBrand objects\n */\n const filterBrandList = (list) =>\n list.map((b) => (b.brand === \"HeadlessChrome\" ? { brand: \"Google Chrome\", version: b.version } : b));\n\n // Get the NavigatorUAData prototype\n const NavigatorUADataProto = Object.getPrototypeOf(originalUserAgentData);\n\n // Cache original methods before patching\n const originalGetHighEntropyValues = NavigatorUADataProto.getHighEntropyValues;\n const originalToJSON = NavigatorUADataProto.toJSON;\n const originalBrandsDescriptor = Object.getOwnPropertyDescriptor(NavigatorUADataProto, \"brands\");\n\n let cachedFilteredBrands = null;\n // Helper to get filtered brands (cached for identity checks)\n const getFilteredBrands = () => {\n if (cachedFilteredBrands === null) {\n const originalBrands = originalBrandsDescriptor.get.call(originalUserAgentData);\n cachedFilteredBrands = filterBrandList(originalBrands);\n }\n return cachedFilteredBrands;\n };\n\n // Patch getHighEntropyValues to filter HeadlessChrome from results\n utils.replaceProperty(NavigatorUADataProto, \"getHighEntropyValues\", {\n value: function (hints) {\n return originalGetHighEntropyValues.call(this, hints).then((data) => {\n const newData = { ...data };\n if (newData.brands) {\n newData.brands = filterBrandList(newData.brands);\n }\n if (newData.fullVersionList) {\n newData.fullVersionList = filterBrandList(newData.fullVersionList);\n }\n return newData;\n });\n },\n });\n\n // Patch toJSON to filter HeadlessChrome\n utils.replaceProperty(NavigatorUADataProto, \"toJSON\", {\n value: function () {\n const data = originalToJSON.call(this);\n return {\n brands: filterBrandList(data.brands),\n mobile: data.mobile,\n platform: data.platform,\n };\n },\n });\n\n // Patch brands getter to return filtered array (same instance each call, like real Chrome)\n utils.replaceProperty(NavigatorUADataProto, \"brands\", {\n get: function () {\n // Return cached filtered brands, computing and freezing on first access\n return getFilteredBrands();\n },\n enumerable: originalBrandsDescriptor.enumerable,\n configurable: originalBrandsDescriptor.configurable,\n });\n\n utils.replaceProperty(NavigatorUADataProto, \"userAgentData\", {\n get: () => originalUserAgentData,\n });\n}\n\nlog(\"loading navigator.vendor.js\");\n\nutils.replaceProperty(Object.getPrototypeOf(navigator), \"vendor\", {\n get: () => opts.navigator_vendor || \"Google Inc.\"\n});\n\nlog(\"loading navigator.webdriver.js\");\n// this is close to the most accurate way to emulate this: https://stackoverflow.com/a/69533548\n// no point \"giving ourselves away\" if we don't need to mock this values\n// techniques exist to detect Object.defineProperty etc., so if we can avoid it we do\n// if args include --disable-blink-features=AutomationControlled, we do not need to mock this\nif (navigator.webdriver) {\n utils.replaceProperty(Object.getPrototypeOf(navigator), \"webdriver\", {\n get: new Proxy(\n Object.getOwnPropertyDescriptor(Object.getPrototypeOf(navigator),\n \"webdriver\").get, {\n apply: (target, thisArg, args) => {\n // emulate getter call validation\n Reflect.apply(target, thisArg, args);\n return false;\n }\n })\n });\n} else {\n log(\"not patching navigator.webdriver, assuming CLI args were used instead\");\n}\n\nlog(\"loading error.prototype.js\");\n\nObject.defineProperty(Error.prototype, \"name\", {configurable: false, enumerable: false})\n\nlog(\"loading webgl.vendor.js\");\n\nconst getParameterProxyHandler = {\n apply: function (target, ctx, args) {\n const param = (args || [])[0];\n // UNMASKED_VENDOR_WEBGL\n if (param === 37445) {\n return opts.webgl_vendor || \"Intel Inc.\"; // default in headless: Google Inc.\n }\n // UNMASKED_RENDERER_WEBGL\n if (param === 37446) {\n return opts.webgl_renderer || \"Intel Iris OpenGL Engine\"; // default in headless: Google SwiftShader\n }\n return utils.cache.Reflect.apply(target, ctx, args);\n },\n};\n\n// There's more than one WebGL rendering context\n// https://developer.mozilla.org/en-US/docs/Web/API/WebGL2RenderingContext#Browser_compatibility\n// To find out the original values here: Object.getOwnPropertyDescriptors(WebGLRenderingContext.prototype.getParameter)\nconst addProxy = (obj, propName) => {\n utils.replaceWithProxy(obj, propName, getParameterProxyHandler);\n};\n// For whatever weird reason loops don't play nice with Object.defineProperty, here's the next best thing:\naddProxy(WebGLRenderingContext.prototype, \"getParameter\");\naddProxy(WebGL2RenderingContext.prototype, \"getParameter\");\n\n})();" - -export const PLAYWRIGHT_STEALTH_CHROMIUM_ARGS = [ - '--disable-blink-features=AutomationControlled', - '--accept-lang=en-US,en', -] as const - -export const PLAYWRIGHT_STEALTH_CONTEXT_OPTIONS = { - locale: 'en-US', - extraHTTPHeaders: { - 'Accept-Language': 'en-US,en;q=0.9', - }, -} as const diff --git a/tests.md b/tests.md index 97091ec60..8826392ce 100644 --- a/tests.md +++ b/tests.md @@ -227,7 +227,7 @@ This file tracks manual regression and feature verification steps. ### Browser Use plugin runtime command #### Feature/Change Name -codexui exposes Browser Use in chats by using the bundled Codex.app runtime, registering `node_repl`, starting a session-scoped local Browser Use backend, and applying the vendored `Mattwmaster58/playwright_stealth` init payload to new Browser Use contexts. +codexui exposes Browser Use in chats by using the bundled Codex.app runtime, registering `node_repl`, and starting a session-scoped local Browser Use backend. #### Prerequisites/Setup 1. macOS with `/Applications/Codex.app/Contents/Resources/codex` installed. @@ -242,17 +242,13 @@ codexui exposes Browser Use in chats by using the bundled Codex.app runtime, reg 4. Open `http://127.0.0.1:4173` in light theme. 5. Create or open a codexui chat and ask it to use Browser Use to open `https://example.com` and report the page title. 6. Confirm the chat produces `mcp__node_repl__js` Browser Use activity and returns `{"title":"Example Domain","url":"https://example.com/"}` without a missing-tool or IAB discovery error. -7. In the same Browser Use-enabled chat, ask it to open `https://google.com`, type `hello world` into the search box, and press Enter. -8. Confirm Browser Use can type into Google's localized search combobox and submit the query. If Google redirects to `/sorry/index`, confirm the redirected URL includes a `continue=` target containing `q=hello+world`. -9. Switch to dark theme and repeat steps 5-8. +7. Switch to dark theme and repeat steps 5-6. #### Expected Results - On macOS, codexui launches the Codex.app bundled app-server by default. - `CODEXUI_CODEX_COMMAND` still overrides the bundled command when set. - `mcpServerStatus/list` includes `node_repl` with `js` and `js_reset`. - Browser Use works inside codexui chats in both light and dark theme. -- New Browser Use contexts receive the vendored `playwright_stealth` init script, Chromium stealth launch args, and English locale headers. -- The Google validation proves text entry and submit behavior; a Google `/sorry/index` anti-automation redirect is acceptable evidence of submit, but it is not evidence that the challenge was bypassed. - The theme switch does not affect tool availability or pending tool-call rendering. #### Rollback/Cleanup From 96a67049362c6121f3688080797bc01104d06d19 Mon Sep 17 00:00:00 2001 From: Igor Date: Tue, 5 May 2026 05:27:23 +0700 Subject: [PATCH 4/5] Harden Browser Use backend startup --- src/server/browserUseBackend.ts | 131 ++++++++++++++++++++++------- src/server/codexAppServerBridge.ts | 7 +- tests.md | 5 +- 3 files changed, 107 insertions(+), 36 deletions(-) diff --git a/src/server/browserUseBackend.ts b/src/server/browserUseBackend.ts index d5baa1bc8..765d61dd2 100644 --- a/src/server/browserUseBackend.ts +++ b/src/server/browserUseBackend.ts @@ -1,5 +1,7 @@ import { createServer, type Socket, type Server } from 'node:net' -import { mkdir, readFile, rm, writeFile } from 'node:fs/promises' +import { createHash } from 'node:crypto' +import { access, mkdir, readFile, rm, writeFile } from 'node:fs/promises' +import { homedir, platform } from 'node:os' import { join } from 'node:path' import { createRequire } from 'node:module' @@ -67,10 +69,9 @@ type BrowserUseClient = { } const BROWSER_USE_SOCKET_DIR = '/tmp/codex-browser-use' +const MAX_BROWSER_USE_FRAME_BYTES = 10 * 1024 * 1024 const CODEX_BROWSER_USE_PEER_AUTHORIZATION = '/Applications/Codex.app/Contents/Resources/native/browser-use-peer-authorization.node' -const BROWSER_USE_CLIENT_PATH = - '/Users/igor/.codex/plugins/cache/openai-bundled/browser-use/0.1.0-alpha1/scripts/browser-client.mjs' const BROWSER_USE_NATIVE_CREATE_SOURCE = 'static async create(t){let n=eN();if(n!=null){let r=await n.createConnection(t),i=new e(r);return r.on("data",o=>i.handleData(o)),r.on("close",()=>{i.socket===r&&(i.socket=null)}),i}throw new Error(Q7())}' const BROWSER_USE_CODEXUI_CREATE_SOURCE = @@ -87,7 +88,7 @@ export async function ensureBrowserUseBackendForSession(sessionId: string): Prom await ensureBrowserUseClientFallbackPatch() await mkdir(BROWSER_USE_SOCKET_DIR, { recursive: true }) - const socketPath = join(BROWSER_USE_SOCKET_DIR, `codexui-${process.pid}-${normalizedSessionId}.sock`) + const socketPath = join(BROWSER_USE_SOCKET_DIR, `codexui-${process.pid}-${hashSessionId(normalizedSessionId)}.sock`) await rm(socketPath, { force: true }) const backend: BrowserUseBackendRecord = { @@ -100,38 +101,89 @@ export async function ensureBrowserUseBackendForSession(sessionId: string): Prom } browserUseBackends.set(normalizedSessionId, backend) - await new Promise((resolve, reject) => { - const onError = (error: Error) => { - backend.server.off('listening', onListening) - reject(error) - } - const onListening = () => { - backend.server.off('error', onError) - resolve() + try { + await new Promise((resolve, reject) => { + const onError = (error: Error) => { + backend.server.off('listening', onListening) + reject(error) + } + const onListening = () => { + backend.server.off('error', onError) + resolve() + } + backend.server.once('error', onError) + backend.server.once('listening', onListening) + backend.server.listen(socketPath) + }) + } catch (error) { + browserUseBackends.delete(normalizedSessionId) + await rm(socketPath, { force: true }) + const browser = await backend.browserPromise.catch(() => null) + await browser?.close() + throw error + } +} + +export async function tryEnsureBrowserUseBackendForSession(sessionId: string): Promise { + try { + if (platform() !== 'darwin') { + return } - backend.server.once('error', onError) - backend.server.once('listening', onListening) - backend.server.listen(socketPath) - }) + await ensureBrowserUseBackendForSession(sessionId) + } catch (error) { + console.warn('[browser-use] failed to initialize backend:', error instanceof Error ? error.message : String(error)) + } } async function ensureBrowserUseClientFallbackPatch(): Promise { browserUseClientPatchPromise ??= (async () => { - const source = await readFile(BROWSER_USE_CLIENT_PATH, 'utf8') - if (source.includes(BROWSER_USE_CODEXUI_CREATE_SOURCE)) { - return - } - if (!source.includes(BROWSER_USE_NATIVE_CREATE_SOURCE)) { - throw new Error('Browser Use client transport shape changed; cannot install codexui fallback.') + try { + const clientPath = resolveBrowserUseClientPath() + if (!clientPath) { + return + } + try { + await access(clientPath) + } catch { + return + } + const source = await readFile(clientPath, 'utf8') + if (source.includes(BROWSER_USE_CODEXUI_CREATE_SOURCE)) { + return + } + if (!source.includes(BROWSER_USE_NATIVE_CREATE_SOURCE)) { + console.warn('[browser-use] client transport shape changed; codexui fallback was not installed.') + return + } + await writeFile( + clientPath, + source.replace(BROWSER_USE_NATIVE_CREATE_SOURCE, BROWSER_USE_CODEXUI_CREATE_SOURCE), + ) + } catch (error) { + console.warn('[browser-use] client fallback patch skipped:', error instanceof Error ? error.message : String(error)) } - await writeFile( - BROWSER_USE_CLIENT_PATH, - source.replace(BROWSER_USE_NATIVE_CREATE_SOURCE, BROWSER_USE_CODEXUI_CREATE_SOURCE), - ) })() await browserUseClientPatchPromise } +function resolveBrowserUseClientPath(): string | null { + const explicitPath = process.env.CODEXUI_BROWSER_USE_CLIENT_PATH?.trim() + if (explicitPath) { + return explicitPath + } + const codexHome = process.env.CODEX_HOME?.trim() || join(homedir(), '.codex') + return join( + codexHome, + 'plugins', + 'cache', + 'openai-bundled', + 'browser-use', + '0.1.0-alpha1', + 'scripts', + 'browser-client.mjs', + ) +} + export async function closeBrowserUseBackends(): Promise { const backends = Array.from(browserUseBackends.values()) browserUseBackends.clear() @@ -144,10 +196,9 @@ export async function closeBrowserUseBackends(): Promise { } async function launchBrowser(): Promise { - const dynamicImport = new Function('specifier', 'return import(specifier)') as (specifier: string) => Promise<{ + const { chromium } = require('playwright') as { chromium: { launch(options?: Record): Promise } - }> - const { chromium } = await dynamicImport('playwright') + } return await chromium.launch({ headless: false }) } @@ -167,7 +218,10 @@ function handleConnection(backend: BrowserUseBackendRecord, socket: Socket): voi socket.on('data', (chunk) => { client.pendingData = Buffer.concat([client.pendingData, chunk]) - const parsed = parseFramedMessages(client.pendingData) + const parsed = parseFramedMessages(client.pendingData, socket) + if (!parsed) { + return + } client.pendingData = parsed.remainingData for (const message of parsed.messages) { void handleMessage(client, message) @@ -190,22 +244,35 @@ function authorizeSocketPeer(socket: Socket): void { } } -function parseFramedMessages(data: Buffer): { messages: JsonRpcMessage[], remainingData: Buffer } { +function parseFramedMessages(data: Buffer, socket: Socket): { messages: JsonRpcMessage[], remainingData: Buffer } | null { const messages: JsonRpcMessage[] = [] let offset = 0 while (data.length - offset >= 4) { const size = data.readUInt32LE(offset) + if (size > MAX_BROWSER_USE_FRAME_BYTES) { + socket.destroy() + return null + } const end = offset + 4 + size if (data.length < end) { break } const text = data.subarray(offset + 4, end).toString('utf8') - messages.push(JSON.parse(text) as JsonRpcMessage) + try { + messages.push(JSON.parse(text) as JsonRpcMessage) + } catch { + socket.destroy() + return null + } offset = end } return { messages, remainingData: data.subarray(offset) } } +function hashSessionId(sessionId: string): string { + return createHash('sha256').update(sessionId).digest('hex').slice(0, 32) +} + async function handleMessage(client: BrowserUseClient, message: JsonRpcMessage): Promise { if (message.id == null || typeof message.method !== 'string') { return diff --git a/src/server/codexAppServerBridge.ts b/src/server/codexAppServerBridge.ts index cfb5a6109..9e5e8e920 100644 --- a/src/server/codexAppServerBridge.ts +++ b/src/server/codexAppServerBridge.ts @@ -11,7 +11,7 @@ import { basename, dirname, isAbsolute, join, resolve } from 'node:path' import { createInterface } from 'node:readline' import { writeFile } from 'node:fs/promises' import { handleAccountRoutes } from './accountRoutes.js' -import { ensureBrowserUseBackendForSession } from './browserUseBackend.js' +import { closeBrowserUseBackends, tryEnsureBrowserUseBackendForSession } from './browserUseBackend.js' import { buildAppServerArgs } from './appServerRuntimeConfig.js' import { handleReviewRoutes } from './reviewGit.js' import { handleSkillsRoutes, initializeSkillsSyncOnStartup } from './skillsRoutes.js' @@ -5276,7 +5276,7 @@ export function createCodexBridgeMiddleware(): CodexBridgeMiddleware { const params = asRecord(body.params) const threadId = typeof params?.threadId === 'string' ? params.threadId : '' if (threadId) { - await ensureBrowserUseBackendForSession(threadId) + await tryEnsureBrowserUseBackendForSession(threadId) } } @@ -5289,7 +5289,7 @@ export function createCodexBridgeMiddleware(): CodexBridgeMiddleware { const rpcThread = asRecord(rpcRecord?.thread) const threadId = typeof rpcThread?.id === 'string' ? rpcThread.id : '' if (threadId) { - await ensureBrowserUseBackendForSession(threadId) + await tryEnsureBrowserUseBackendForSession(threadId) } } @@ -6592,6 +6592,7 @@ export function createCodexBridgeMiddleware(): CodexBridgeMiddleware { telegramBridge.stop() terminalManager.dispose() backendQueueProcessor.dispose() + void closeBrowserUseBackends() appServer.dispose() } middleware.subscribeNotifications = ( diff --git a/tests.md b/tests.md index 8826392ce..b8258ece4 100644 --- a/tests.md +++ b/tests.md @@ -242,13 +242,16 @@ codexui exposes Browser Use in chats by using the bundled Codex.app runtime, reg 4. Open `http://127.0.0.1:4173` in light theme. 5. Create or open a codexui chat and ask it to use Browser Use to open `https://example.com` and report the page title. 6. Confirm the chat produces `mcp__node_repl__js` Browser Use activity and returns `{"title":"Example Domain","url":"https://example.com/"}` without a missing-tool or IAB discovery error. -7. Switch to dark theme and repeat steps 5-6. +7. Temporarily point `CODEX_HOME` or `CODEXUI_BROWSER_USE_CLIENT_PATH` at a location without the Browser Use client and confirm a normal non-Browser Use chat still starts instead of returning a 502. +8. Switch to dark theme and repeat steps 5-7. #### Expected Results - On macOS, codexui launches the Codex.app bundled app-server by default. - `CODEXUI_CODEX_COMMAND` still overrides the bundled command when set. - `mcpServerStatus/list` includes `node_repl` with `js` and `js_reset`. - Browser Use works inside codexui chats in both light and dark theme. +- Browser Use setup is best-effort: missing plugin files, changed client patch shape, or backend startup errors are logged and do not block normal `turn/start` or `thread/start` RPC calls. +- Browser Use socket names are derived from a bounded hash of the session id, malformed socket frames close the socket instead of throwing, and backend cleanup runs during bridge disposal. - The theme switch does not affect tool availability or pending tool-call rendering. #### Rollback/Cleanup From 2ce504790180bdd2faca4e044d2f48b1d12d80f8 Mon Sep 17 00:00:00 2001 From: Igor Date: Tue, 5 May 2026 07:18:52 +0700 Subject: [PATCH 5/5] Make Browser Use backend lazy --- src/server/browserUseBackend.ts | 16 +++++++++++----- tests.md | 6 +++--- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/server/browserUseBackend.ts b/src/server/browserUseBackend.ts index 765d61dd2..103b4ed13 100644 --- a/src/server/browserUseBackend.ts +++ b/src/server/browserUseBackend.ts @@ -27,7 +27,7 @@ type BrowserUseTab = { type BrowserUseBackendRecord = { server: Server socketPath: string - browserPromise: Promise + browserPromise: Promise | null tabs: Map nextTabId: number sessionId: string @@ -94,7 +94,7 @@ export async function ensureBrowserUseBackendForSession(sessionId: string): Prom const backend: BrowserUseBackendRecord = { server: createServer((socket) => handleConnection(backend, socket)), socketPath, - browserPromise: launchBrowser(), + browserPromise: null, tabs: new Map(), nextTabId: 1, sessionId: normalizedSessionId, @@ -118,7 +118,7 @@ export async function ensureBrowserUseBackendForSession(sessionId: string): Prom } catch (error) { browserUseBackends.delete(normalizedSessionId) await rm(socketPath, { force: true }) - const browser = await backend.browserPromise.catch(() => null) + const browser = await backend.browserPromise?.catch(() => null) await browser?.close() throw error } @@ -190,7 +190,7 @@ export async function closeBrowserUseBackends(): Promise { await Promise.allSettled(backends.map(async (backend) => { await new Promise((resolve) => backend.server.close(() => resolve())) await rm(backend.socketPath, { force: true }) - const browser = await backend.browserPromise.catch(() => null) + const browser = await backend.browserPromise?.catch(() => null) await browser?.close() })) } @@ -233,6 +233,7 @@ function authorizeSocketPeer(socket: Socket): void { try { const fd = (socket as Socket & { _handle?: { fd?: number } })._handle?.fd if (typeof fd !== 'number') { + socket.destroy() return } const nativeModule = require(CODEX_BROWSER_USE_PEER_AUTHORIZATION) as { @@ -343,7 +344,7 @@ async function handleRequest( } async function createTab(client: BrowserUseClient): Promise { - const browser = await client.backend.browserPromise + const browser = await getBrowser(client.backend) const context = await browser.newContext() const page = await context.newPage() const tabId = client.backend.nextTabId++ @@ -351,6 +352,11 @@ async function createTab(client: BrowserUseClient): Promise { return await serializeTab(tabId, client.backend.tabs.get(tabId), true) } +async function getBrowser(backend: BrowserUseBackendRecord): Promise { + backend.browserPromise ??= launchBrowser() + return await backend.browserPromise +} + async function getTabs(backend: BrowserUseBackendRecord): Promise { const tabs: BrowserUseTab[] = [] for (const [tabId, tab] of backend.tabs) { diff --git a/tests.md b/tests.md index b8258ece4..8c08677c5 100644 --- a/tests.md +++ b/tests.md @@ -227,7 +227,7 @@ This file tracks manual regression and feature verification steps. ### Browser Use plugin runtime command #### Feature/Change Name -codexui exposes Browser Use in chats by using the bundled Codex.app runtime, registering `node_repl`, and starting a session-scoped local Browser Use backend. +codexui exposes Browser Use in chats by using the bundled Codex.app runtime, registering `node_repl`, and registering a session-scoped local Browser Use backend whose browser launches lazily on first tab use. #### Prerequisites/Setup 1. macOS with `/Applications/Codex.app/Contents/Resources/codex` installed. @@ -249,9 +249,9 @@ codexui exposes Browser Use in chats by using the bundled Codex.app runtime, reg - On macOS, codexui launches the Codex.app bundled app-server by default. - `CODEXUI_CODEX_COMMAND` still overrides the bundled command when set. - `mcpServerStatus/list` includes `node_repl` with `js` and `js_reset`. -- Browser Use works inside codexui chats in both light and dark theme. +- Browser Use works inside codexui chats in both light and dark theme, and Chromium launches only after Browser Use requests a tab. - Browser Use setup is best-effort: missing plugin files, changed client patch shape, or backend startup errors are logged and do not block normal `turn/start` or `thread/start` RPC calls. -- Browser Use socket names are derived from a bounded hash of the session id, malformed socket frames close the socket instead of throwing, and backend cleanup runs during bridge disposal. +- Browser Use socket names are derived from a bounded hash of the session id, malformed socket frames close the socket instead of throwing, unauthorized socket peers are closed, and backend cleanup runs during bridge disposal. - The theme switch does not affect tool availability or pending tool-call rendering. #### Rollback/Cleanup