From 63f32899d3c955cd79f8947e42a4d991eb7327d0 Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Wed, 10 Jun 2026 12:59:18 -0500 Subject: [PATCH 01/15] feat: add profile creation functionality --- src/@types/types.d.ts | 11 ++- src/lib/agent-client.ts | 88 +++++++++++++++++++-- src/skills/auth-profile.md | 63 +++++++++++++++ src/skills/index.ts | 8 ++ src/tools/agent.ts | 19 +++-- src/tools/schemas.ts | 145 +++++++++++++++++++++++----------- test/lib/agent-client.spec.ts | 17 ++++ test/skills/skills.spec.ts | 5 +- test/tools/schemas.spec.ts | 27 +++++++ 9 files changed, 325 insertions(+), 58 deletions(-) create mode 100644 src/skills/auth-profile.md diff --git a/src/@types/types.d.ts b/src/@types/types.d.ts index 505498e..c375faf 100644 --- a/src/@types/types.d.ts +++ b/src/@types/types.d.ts @@ -27,6 +27,7 @@ import type { CrawlParamsSchema, } from '../tools/crawl.js'; import type { AgentParamsSchema } from '../tools/agent.js'; +import type { CreateProfileParams } from '../tools/schemas.js'; import type { ProxyOptionsSchema } from '../lib/agent-client.js'; /* ------------------------------------------------------------------ */ @@ -161,6 +162,13 @@ export interface ActiveSession { readonly token: string; readonly proxy?: ProxyOptions; readonly profile?: string; + // When set, this session was opened in profile-creation mode: the WS is bound + // to a creation session from POST /profile rather than a fresh launch. Feeds + // the session-cache key (see getSessionKey), so it's readonly. + readonly createProfile?: CreateProfileParams; + // The creation session id returned by POST /profile. Reconnects attach to it + // via /chromium/agent?sessionId rather than launching a new browser. + creationSessionId?: string; reconnecting?: Promise; skillState: SkillFireState; lastUsedAt: number; @@ -208,7 +216,8 @@ export type SkillId = | 'dynamic-content' | 'screenshots' | 'tabs' - | 'autonomous-login'; + | 'autonomous-login' + | 'auth-profile'; export interface DetectContext { snapshot?: SnapshotResult; diff --git a/src/lib/agent-client.ts b/src/lib/agent-client.ts index 5c24a7e..8f81de0 100644 --- a/src/lib/agent-client.ts +++ b/src/lib/agent-client.ts @@ -3,6 +3,7 @@ import WebSocket from 'ws'; import { z } from 'zod'; import { createSkillState } from '../skills/index.js'; import { hashToken, isMeaningfulBody } from './utils.js'; +import type { CreateProfileParams } from '../tools/schemas.js'; import type { ActiveSession, AgentMessage, @@ -232,10 +233,12 @@ const getSessionKey = ( token: string, proxy?: ProxyOptions, profile?: string, + createProfile?: CreateProfileParams, ): string => (mcpSessionId ?? `stdio:${hashToken(token)}`) + proxyFingerprint(proxy) + - (profile ? KEY_SEP + 'profile#' + hashToken(profile) : ''); + (profile ? KEY_SEP + 'profile#' + hashToken(profile) : '') + + (createProfile ? KEY_SEP + 'create#' + hashToken(createProfile.name) : ''); /** * Build the WebSocket URL for `/chromium/agent`: normalize trailing slashes, @@ -247,10 +250,17 @@ export const buildAgentWsUrl = ( token: string, proxy?: ProxyOptions, profile?: string, + sessionId?: string, ): string => { const base = apiUrl.replace(/^http/i, 'ws').replace(/\/+$/, ''); const url = new URL(base + '/chromium/agent'); url.searchParams.set('token', token); + // A creation session already owns its proxy/profile (baked in at POST /profile); + // the WS only needs to attach to it by id, so proxy/profile params are skipped. + if (sessionId) { + url.searchParams.set('sessionId', sessionId); + return url.toString(); + } if (proxy?.proxy) url.searchParams.set('proxy', proxy.proxy); if (proxy?.proxyCountry) url.searchParams.set('proxyCountry', proxy.proxyCountry); @@ -391,14 +401,67 @@ const readUpgradeError = ( res.on('close', finish); }); +/** Result of POST /profile: a tracked, non-headless creation session. */ +interface CreationSessionInfo { + id: string; + name: string; + connect: string; + stop: string; +} + +// POST /profile launches a non-headless browser, which can take several seconds. +const CREATE_PROFILE_TIMEOUT_MS = 60_000; + +/** + * Open a profile-creation session via POST /profile. Returns the tracked + * session id the agent WS then attaches to with `?sessionId`. Non-2xx responses + * throw UpgradeError so the tool layer's retry/4xx classification applies + * uniformly with the WS-upgrade path. + */ +const postCreateProfile = async ( + apiUrl: string, + token: string, + createProfile: CreateProfileParams, +): Promise => { + const base = apiUrl.replace(/\/+$/, ''); + const url = new URL(base + '/profile'); + url.searchParams.set('token', token); + + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), CREATE_PROFILE_TIMEOUT_MS); + let res: Response; + try { + res = await fetch(url.toString(), { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(createProfile), + signal: controller.signal, + }); + } catch (err) { + throw new Error( + `POST /profile failed: ${err instanceof Error ? err.message : String(err)}`, + { cause: err }, + ); + } finally { + clearTimeout(timer); + } + + if (!res.ok) { + const body = await res.text().catch(() => ''); + throw new UpgradeError(res.status, res.statusText, body); + } + return (await res.json()) as CreationSessionInfo; +}; + const connect = ( apiUrl: string, token: string, proxy?: ProxyOptions, profile?: string, + sessionId?: string, ): Promise => new Promise((resolve, reject) => { - const wsUrl = buildAgentWsUrl(apiUrl, token, proxy, profile); + const wsUrl = buildAgentWsUrl(apiUrl, token, proxy, profile, sessionId); const ws = new WebSocket(wsUrl); let settled = false; @@ -521,9 +584,10 @@ export const getOrCreateSession = async ( token: string, proxy?: ProxyOptions, profile?: string, + createProfile?: CreateProfileParams, ): Promise => { sweepSessions(); - const key = getSessionKey(mcpSessionId, token, proxy, profile); + const key = getSessionKey(mcpSessionId, token, proxy, profile, createProfile); const existing = sessions.get(key); if (existing && existing.ws.readyState === WebSocket.OPEN) { @@ -546,7 +610,12 @@ export const getOrCreateSession = async ( } const creation = (async (): Promise => { - const ws = await connect(apiUrl, token, proxy, profile); + // Profile-creation mode: launch a tracked session via POST /profile, then + // attach the agent WS to it by id. Otherwise launch a fresh agent browser. + const creationSessionId = createProfile + ? (await postCreateProfile(apiUrl, token, createProfile)).id + : undefined; + const ws = await connect(apiUrl, token, proxy, profile, creationSessionId); const session: ActiveSession = { ws, msgId: 0, @@ -554,6 +623,8 @@ export const getOrCreateSession = async ( token, proxy, profile, + createProfile, + creationSessionId, skillState: createSkillState(), lastUsedAt: Date.now(), }; @@ -595,11 +666,14 @@ export const send = async ( ): Promise => { if (session.ws.readyState !== WebSocket.OPEN) { if (!session.reconnecting) { + // A creation session must re-attach to the same browser by id — a fresh + // connect() would launch a new one and lose all auth progress. session.reconnecting = connect( session.apiUrl, session.token, session.proxy, session.profile, + session.creationSessionId, ).finally(() => { session.reconnecting = undefined; }); @@ -636,8 +710,9 @@ export const closeSession = ( token: string, proxy?: ProxyOptions, profile?: string, + createProfile?: CreateProfileParams, ): void => { - const key = getSessionKey(mcpSessionId, token, proxy, profile); + const key = getSessionKey(mcpSessionId, token, proxy, profile, createProfile); const session = sessions.get(key); if (session) { try { @@ -659,8 +734,9 @@ export const destroySession = ( token: string, proxy?: ProxyOptions, profile?: string, + createProfile?: CreateProfileParams, ): void => { - const key = getSessionKey(mcpSessionId, token, proxy, profile); + const key = getSessionKey(mcpSessionId, token, proxy, profile, createProfile); const session = sessions.get(key); if (session) { try { diff --git a/src/skills/auth-profile.md b/src/skills/auth-profile.md new file mode 100644 index 0000000..8a281bc --- /dev/null +++ b/src/skills/auth-profile.md @@ -0,0 +1,63 @@ +# Authenticated Profiles + +A **profile** is a server-side bundle of cookies, localStorage, and IndexedDB +captured from a live agent session and replayed on future sessions that connect +with `profile=`. Use it whenever a task needs the browser to start +already signed in. + +## Recipe — creating a profile + +1. **Open a creation session.** Call `browserless_agent` with a top-level + `createProfile` object — do NOT pass `profile` (the two are mutually + exclusive). The MCP tool calls `POST /profile` for you, attaches the WS + to the creation session, and gives you a non-headless browser with a + 10-minute keepalive: + ```json + { + "createProfile": { "name": "github" }, + "commands": [ + { "method": "goto", "params": { "url": "https://github.com/login" } } + ] + } + ``` +2. **Drive the auth flow like a normal task.** Type credentials (use values + the user supplied — never invent them), submit, and handle any + MFA/CAPTCHA step. If a CAPTCHA appears, load the `captchas` skill and + run `solve`. +3. **Verify you are actually signed in before saving.** Re-snapshot and + confirm at least one of: + - an authenticated-only element (account menu, "Sign out" link, avatar) + - the URL is the post-login destination (not `/login`, `/signin`, or an + error path) + - a known auth cookie name appears in `document.cookie` + If none of these hold, do NOT save — a logged-out profile is worse than + no profile. +4. **Call `saveProfile`** as the next command (JSON-RPC, no `Browserless.` + prefix): + ```json + { "method": "saveProfile", "params": { "name": "github" } } + ``` + Pass the same `name` you opened the session with. If the same + `(token, name)` pair already exists, the server returns a `BAD_PARAMS` + error telling you to use `refreshProfile` — switch and retry once. Do + not retry `saveProfile` with the same name. +5. **Inspect the result.** A successful save returns: + ```json + { + "ok": true, + "profileId": "...", + "name": "github", + "cookieCount": 12, + "originCount": 3, + "skippedOriginsCount": 0, + "skippedIdbDatabasesCount": 0, + "skippedIdbStoresCount": 0 + } + ``` + + - `cookieCount === 0` is a red flag — the site likely uses session-only + cookies or storage you can't capture. Tell the user. + - Any non-zero `skipped*` count means partial capture — surface it. +6. **Close** the session. Tell the user the profile name and how to use it + ("future calls can pass `profile: \"github\"`"). Do not echo cookie + values or any captured state. diff --git a/src/skills/index.ts b/src/skills/index.ts index 342a266..1632a70 100644 --- a/src/skills/index.ts +++ b/src/skills/index.ts @@ -166,6 +166,14 @@ const SKILL_SPECS: SkillSpec[] = [ ], ], }, + { + // No auto-fire triggers: there is no snapshot/error/command signal for + // "about to create a profile". The model loads it by id via + // browserless_skill, prompted by the createProfile field description. + id: 'auth-profile', + path: 'src/skills/auth-profile.md', + triggers: [], + }, { id: 'captchas', path: 'src/skills/captchas.md', diff --git a/src/tools/agent.ts b/src/tools/agent.ts index 1c1b7b0..e6876f2 100644 --- a/src/tools/agent.ts +++ b/src/tools/agent.ts @@ -176,6 +176,7 @@ export function registerAgentTools( const proxy = params.proxy; const profile = params.profile; + const createProfile = params.createProfile; const sendAnalytics = (success: boolean) => { analytics?.fireToolRequest(token, 'browserless_agent', { @@ -188,6 +189,7 @@ export function registerAgentTools( proxy_sticky: !!proxy?.proxySticky, proxy_external: !!proxy?.externalProxyServer, profile_used: !!profile, + create_profile: !!createProfile, }); }; @@ -201,7 +203,7 @@ export function registerAgentTools( } if (commands.length === 1 && commands[0].method === 'close') { - closeSession(mcpSessionId, token, proxy, profile); + closeSession(mcpSessionId, token, proxy, profile, createProfile); sendAnalytics(true); return [{ type: 'text' as const, text: 'Browser session closed.' }]; } @@ -215,6 +217,7 @@ export function registerAgentTools( token, proxy, profile, + createProfile, ); } catch (connErr: unknown) { // No retry when the server gave a definitive 4xx — re-attempting @@ -223,7 +226,7 @@ export function registerAgentTools( if (isRetry || !isRetryableUpgradeError(connErr)) { throw new UserError(formatConnectError(connErr)); } - destroySession(mcpSessionId, token, proxy, profile); + destroySession(mcpSessionId, token, proxy, profile, createProfile); return runCommands(true); } @@ -236,7 +239,7 @@ export function registerAgentTools( let crossOriginBaseline: string | undefined = agentSession.lastUrl; for (const cmd of commands) { if (cmd.method === 'close') { - closeSession(mcpSessionId, token, proxy, profile); + closeSession(mcpSessionId, token, proxy, profile, createProfile); results.push({ method: 'close', result: { closed: true } }); closedDuringBatch = true; break; @@ -250,7 +253,7 @@ export function registerAgentTools( try { resp = await send(agentSession, cmd.method, cmd.params); } catch (sendErr: unknown) { - destroySession(mcpSessionId, token, proxy, profile); + destroySession(mcpSessionId, token, proxy, profile, createProfile); const errMessage = sendErr instanceof Error ? sendErr.message : String(sendErr); if (!isRetry) { @@ -276,7 +279,13 @@ export function registerAgentTools( if (resp.error) { const err = resp.error; if (err.code && FATAL_CODES.has(err.code)) { - destroySession(mcpSessionId, token, proxy, profile); + destroySession( + mcpSessionId, + token, + proxy, + profile, + createProfile, + ); if (!isRetry) { return runCommands(true); } diff --git a/src/tools/schemas.ts b/src/tools/schemas.ts index e9a1eca..d052d7e 100644 --- a/src/tools/schemas.ts +++ b/src/tools/schemas.ts @@ -516,58 +516,115 @@ export const AgentCommandSchema = z.union([ GenericCommandSchema, ]); -export const AgentParamsSchema = z.object({ - method: z - .string() +// Proxy block for a profile-creation session. Mirrors the POST /profile body +// proxy shape (type/sticky/country/city/state/preset) so it passes straight +// through — distinct from the top-level agent proxy fields (proxy/proxyCountry…). +const CreateProfileProxySchema = z.object({ + type: z + .literal('residential') .optional() - .default('') - .describe( - 'The BQL method to execute (used for single-command calls). ' + - 'When using "commands" array, this field is ignored.', - ), - params: z - .record(z.string(), z.unknown()) + .describe('Routing tier. Only "residential" is supported today.'), + sticky: z + .boolean() .optional() - .default({}) - .describe('Parameters for the method (used for single-command calls).'), - commands: z - .array(AgentCommandSchema) + .describe('Keep the same IP for the lifetime of the creation session.'), + country: z + .string() .optional() - .describe( - 'Optional: batch multiple commands in one call. When provided, "method" and "params" ' + - 'are ignored and commands are executed sequentially. Only the final result is returned. ' + - 'Use this to batch actions that share the same page state (e.g. filling a form: ' + - 'type email + type password + click submit). Do NOT batch across navigations.', - ), - proxy: ProxyOptionsSchema.optional().describe( - 'Residential / external proxy config. Read once at session creation. ' + - 'Changing requires close() + a new session call.', - ), - profile: profileField( - 'when the agent session connects', - ' The profile is fixed for the lifetime of the agent session; ' + - 'passing a different profile value opens a separate browser session.', - ), - rationale: z + .describe('Two-letter country code (e.g. "us").'), + city: z.string().optional().describe('City-level targeting (plan-gated).'), + state: z.string().optional().describe('State/region targeting (plan-gated).'), + preset: z .string() .optional() - .describe( - 'A short user-facing reason for this call. HARD BUDGET: 50 characters. ' + - 'Surfaced live in interactive UIs as the progress label. Write it for ' + - 'a human watching, in present-continuous form ("Logging in", "Filling ' + - 'the search form", "Checking the time", "Closing the cookie banner"). ' + - 'If your first draft is longer than 50 chars, REWORD IT to fit — ' + - 'compress to the essence; do NOT just chop. Bad: "Read page title and ' + - 'body text to determine why snapshot is empty" (64). Good: "Diagnosing ' + - 'empty snapshot" (24). Bad: "Filling out a very detailed multi-field ' + - 'signup form" (51). Good: "Filling the signup form" (23). Never use ' + - 'jargon, raw method names ("evaluate", "click"), JS, full URLs, or ' + - 'credentials. Include exactly one per `browserless_agent` call, even ' + - 'when batching commands.', - ), + .describe('Named proxy preset (plan-dependent).'), }); +const CreateProfileSchema = z + .object({ + name: z + .string() + .min(1) + .max(255) + .refine((s) => /^[^\s/?#]+$/.test(s), { + message: 'name must match /^[^\\s/?#]+$/ (no whitespace, /, ?, #)', + }) + .describe( + 'Name to save the profile under. Reused as the saveProfile name.', + ), + proxy: CreateProfileProxySchema.optional(), + browser: z.enum(['chrome', 'chromium', 'stealth']).optional(), + stealth: z.boolean().optional(), + }) + .describe( + 'Open this session in profile-creation mode. The MCP tool POSTs /profile ' + + 'with these params, attaches the agent WS to the returned creation session ' + + '(non-headless, 10-minute keepalive), and expects a saveProfile call before ' + + 'close. Mutually exclusive with `profile`. Load the `auth-profile` skill ' + + '(via browserless_skill) for the full create-then-save recipe.', + ); + +export const AgentParamsSchema = z + .object({ + method: z + .string() + .optional() + .default('') + .describe( + 'The BQL method to execute (used for single-command calls). ' + + 'When using "commands" array, this field is ignored.', + ), + params: z + .record(z.string(), z.unknown()) + .optional() + .default({}) + .describe('Parameters for the method (used for single-command calls).'), + commands: z + .array(AgentCommandSchema) + .optional() + .describe( + 'Optional: batch multiple commands in one call. When provided, "method" and "params" ' + + 'are ignored and commands are executed sequentially. Only the final result is returned. ' + + 'Use this to batch actions that share the same page state (e.g. filling a form: ' + + 'type email + type password + click submit). Do NOT batch across navigations.', + ), + proxy: ProxyOptionsSchema.optional().describe( + 'Residential / external proxy config. Read once at session creation. ' + + 'Changing requires close() + a new session call.', + ), + profile: profileField( + 'when the agent session connects', + ' The profile is fixed for the lifetime of the agent session; ' + + 'passing a different profile value opens a separate browser session.', + ), + createProfile: CreateProfileSchema.optional(), + rationale: z + .string() + .optional() + .describe( + 'A short user-facing reason for this call. HARD BUDGET: 50 characters. ' + + 'Surfaced live in interactive UIs as the progress label. Write it for ' + + 'a human watching, in present-continuous form ("Logging in", "Filling ' + + 'the search form", "Checking the time", "Closing the cookie banner"). ' + + 'If your first draft is longer than 50 chars, REWORD IT to fit — ' + + 'compress to the essence; do NOT just chop. Bad: "Read page title and ' + + 'body text to determine why snapshot is empty" (64). Good: "Diagnosing ' + + 'empty snapshot" (24). Bad: "Filling out a very detailed multi-field ' + + 'signup form" (51). Good: "Filling the signup form" (23). Never use ' + + 'jargon, raw method names ("evaluate", "click"), JS, full URLs, or ' + + 'credentials. Include exactly one per `browserless_agent` call, even ' + + 'when batching commands.', + ), + }) + .refine((v) => !(v.profile && v.createProfile), { + message: + '`profile` (hydrate an existing profile) and `createProfile` (author a new ' + + 'one) cannot both be set', + }); + /** A single validated agent command. */ export type AgentCommand = z.infer; /** The full `browserless_agent` tool params (single command, batch, proxy, profile). */ export type AgentParams = z.infer; +/** Params for opening a profile-creation session (POST /profile passthrough). */ +export type CreateProfileParams = z.infer; diff --git a/test/lib/agent-client.spec.ts b/test/lib/agent-client.spec.ts index ee002c9..3836aa3 100644 --- a/test/lib/agent-client.spec.ts +++ b/test/lib/agent-client.spec.ts @@ -158,6 +158,23 @@ describe('agent-client buildAgentWsUrl', () => { expect(url.searchParams.get('proxyCountry')).to.equal('us'); expect(url.searchParams.get('profile')).to.equal('my-login'); }); + + it('attaches to a creation session by id and omits proxy/profile', () => { + const url = new URL( + buildAgentWsUrl( + 'http://localhost:3000', + 'tok', + { proxy: 'residential', proxyCountry: 'us' }, + 'my-login', + 'sess-abc123', + ), + ); + expect(url.searchParams.get('sessionId')).to.equal('sess-abc123'); + expect(url.searchParams.get('token')).to.equal('tok'); + // A creation session owns its own proxy/profile from POST /profile. + expect(url.searchParams.has('proxy')).to.equal(false); + expect(url.searchParams.has('profile')).to.equal(false); + }); }); describe('agent-client proxyFingerprint', () => { diff --git a/test/skills/skills.spec.ts b/test/skills/skills.spec.ts index e7e16dd..2092833 100644 --- a/test/skills/skills.spec.ts +++ b/test/skills/skills.spec.ts @@ -36,8 +36,8 @@ const CLOUD = 'https://production.browserless.io'; const SELF_HOSTED = 'https://browserless.example.com'; describe('skills/registry', () => { - it('loads all nine skill bodies', () => { - expect(skillsRegistry).to.have.lengthOf(9); + it('loads all ten skill bodies', () => { + expect(skillsRegistry).to.have.lengthOf(10); const ids = skillsRegistry.map((s) => s.id); expect(ids).to.have.members([ 'shadow-dom', @@ -49,6 +49,7 @@ describe('skills/registry', () => { 'screenshots', 'tabs', 'autonomous-login', + 'auth-profile', ]); for (const skill of skillsRegistry) { expect(skill.body, `${skill.id} body`).to.be.a('string').and.not.empty; diff --git a/test/tools/schemas.spec.ts b/test/tools/schemas.spec.ts index e796058..e63a137 100644 --- a/test/tools/schemas.spec.ts +++ b/test/tools/schemas.spec.ts @@ -241,3 +241,30 @@ describe('profile field (shared profileField helper)', () => { expect(result.success).to.equal(false); }); }); + +describe('createProfile field', () => { + it('accepts a createProfile object on its own', () => { + const parsed = AgentParamsSchema.parse({ + createProfile: { name: 'github' }, + commands: [ + { method: 'goto', params: { url: 'https://github.com/login' } }, + ], + }); + expect(parsed.createProfile?.name).to.equal('github'); + }); + + it('rejects createProfile and profile together (mutually exclusive)', () => { + const result = AgentParamsSchema.safeParse({ + profile: 'github', + createProfile: { name: 'github' }, + }); + expect(result.success).to.equal(false); + }); + + it('rejects a createProfile name containing whitespace, /, ?, or #', () => { + for (const name of ['has space', 'a/b', 'a?b', 'a#b']) { + const result = AgentParamsSchema.safeParse({ createProfile: { name } }); + expect(result.success, name).to.equal(false); + } + }); +}); From a0d6bb2f548a427938607562ec4c5de521c2aaf5 Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Wed, 10 Jun 2026 12:59:18 -0500 Subject: [PATCH 02/15] chore: prettier --- src/skills/auth-profile.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/skills/auth-profile.md b/src/skills/auth-profile.md index 8a281bc..3faaf7e 100644 --- a/src/skills/auth-profile.md +++ b/src/skills/auth-profile.md @@ -42,6 +42,7 @@ already signed in. error telling you to use `refreshProfile` — switch and retry once. Do not retry `saveProfile` with the same name. 5. **Inspect the result.** A successful save returns: + ```json { "ok": true, @@ -58,6 +59,7 @@ already signed in. - `cookieCount === 0` is a red flag — the site likely uses session-only cookies or storage you can't capture. Tell the user. - Any non-zero `skipped*` count means partial capture — surface it. + 6. **Close** the session. Tell the user the profile name and how to use it ("future calls can pass `profile: \"github\"`"). Do not echo cookie values or any captured state. From eb350eb49314d01751a2cda16cd3c1727e257bb8 Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Wed, 10 Jun 2026 14:01:16 -0500 Subject: [PATCH 03/15] fix: clarify profile binding requirements in agent session documentation --- src/skills/autonomous-login.md | 85 +++++++++++++++++----------------- src/tools/schemas.ts | 7 ++- 2 files changed, 47 insertions(+), 45 deletions(-) diff --git a/src/skills/autonomous-login.md b/src/skills/autonomous-login.md index badb03b..f909560 100644 --- a/src/skills/autonomous-login.md +++ b/src/skills/autonomous-login.md @@ -1,95 +1,94 @@ # Autonomous Login -Page wants auth. **Default: don't.** Logins are intrusive and can damage account state. Proceed only when both gates pass. +Page wants auth. **Default: don't.** Logins are intrusive and can damage account state. Proceed only when the gates below pass. -## Gate 1 — Login required for continuing _this_ task? +## Gate 0 — Did you drop your session binding? -If the user's task is literally "log in / post / DM", or needs login to continue, gate passed. For extract/read/observe tasks, check whether the wall actually blocks the goal: +`profile` (and `proxy`) bind **each** call to its hydrated session. If an earlier call this flow was logged in but this one looks logged out, the cause is almost certainly a missing `profile`/`proxy` param on **this** call — not stale cookies. Re-issue the call **with** the binding before treating the wall as real. Never re-authenticate to repair a parameter you forgot to pass. -- Target content already in DOM beneath the wall? Read it directly. -- Dismiss available (`Maybe later`, `Skip`, modal `×`)? Click it. -- Alternative path — public mirror, archive.org, RSS, JSON endpoint, deep link? +## Gate 1 — Login required to continue _this_ task? -If the rest of the task completes without auth → `LOGIN_NOT_NEEDED`. Wikipedia, public docs/news, public read-only profiles. +Task is literally "log in / post / DM" or needs login to proceed → pass. For read/extract tasks, check the wall actually blocks the goal: -## Gate 2 — Credentials unambiguously for _this_ site? +- Content already in DOM beneath the wall → read it. +- Dismiss available (`Maybe later`, `Skip`, `×`) → click it. +- Alt path (public mirror, archive.org, RSS, JSON endpoint, deep link) → use it. + +Task completes without auth → `LOGIN_NOT_NEEDED` (Wikipedia, public docs/news, public read-only profiles). -**Password is not required to pass Gate 2.** Many sites use magic-link / email-only / passkey auth — an email alone (or any contextually-matched identifier) can be sufficient. Don't preemptively fail Gate 2 because no password is in context; let the form tell you at runtime. Only fail Gate 2 if the form actually demands a credential type you don't have. +## Gate 2 — Credentials unambiguously for _this_ site? -Identified **contextually** by name-to-domain correspondence — fixed names not required. Bar is **extraordinary evidence**, not plausibility. +**Password not required** — magic-link / email-only / passkey sites accept an email (or any contextually-matched identifier) alone. Don't fail early for a missing password; let the form demand it at runtime. Fail only if the form requires a credential type you lack. -- ✅ `instagram.com` + `instagramHandle` / `instagramPassword` -- ✅ `LOGIN_USERNAME` / `LOGIN_PASSWORD` paired with `LOGIN_TARGET_URL` whose host matches -- ❌ `wikipedia.org` + `instagramHandle` (names belong to a different service) -- ❌ Bare `username` / `password` with no domain qualifier (ambiguous) +Match **contextually** by name-to-domain correspondence (fixed names not required). Bar is **extraordinary evidence**, not plausibility. -Absent / ambiguous / multiple plausible pairs → `MISSING_CONTEXT`. TOTP follows the same rule. +- ✅ `instagram.com` + `instagramHandle`/`instagramPassword` +- ✅ `LOGIN_USERNAME`/`LOGIN_PASSWORD` + `LOGIN_TARGET_URL` host matches +- ❌ `wikipedia.org` + `instagramHandle` (different service) +- ❌ bare `username`/`password`, no domain qualifier (ambiguous) ---- +Absent / ambiguous / multiple plausible pairs → `MISSING_CONTEXT`. TOTP same rule. -If either gate fails, stop and emit the matching `reason_code`. Rest runs only when both pass. +Gate 1 or 2 fails → stop, emit the matching `reason_code`. (Gate 0 isn't a stop — it means fix the call and retry.) Continue only when both pass. ## Reach the form - Password input in snapshot → continue. -- Sign-in link/button visible → click, wait, re-snapshot. -- Email-first (username only): type username, click `Continue` / `Next`, `waitForSelector` on `input[type="password"]` (10000ms), re-snapshot. -- After two transitions with no password input → `FORM_NOT_FOUND`. +- Sign-in link/button → click, wait, re-snapshot. +- Email-first → type username, click `Continue`/`Next`, `waitForSelector` on `input[type="password"]` (10000ms), re-snapshot. +- Two transitions, still no password → `FORM_NOT_FOUND`. ## Sanity check -Confirm login (not signup/reset): submit name is `Sign in` / `Log in` / `Continue` (not `Sign up` / `Register` / `Reset`), and exactly **one** password field present. Else `FORM_NOT_FOUND`. +Login, not signup/reset: submit reads `Sign in`/`Log in`/`Continue` (not `Sign up`/`Register`/`Reset`) and exactly **one** password field. Else `FORM_NOT_FOUND`. ## Field selection (anchor off password) -- **Password**: `input[type="password"]`. With multiples: matches `/password/i` and **not** `confirm|new password`. -- **Username** (first match): same-form `input[type="email"]` → input matching `/email|username|user|login|account/i` → visible text/email/tel input immediately preceding the password in `ref` order. -- **Submit** (first match): same-form button matching `/^(sign in|log in|login|continue|submit)$/i` → `button[type="submit"]` in form → the only non-SSO visible button (skip `Continue with Google` etc. unless context names that provider). +- **Password**: `input[type="password"]`; with multiples, matches `/password/i` and not `confirm|new password`. +- **Username** (first hit): same-form `input[type="email"]` → `/email|username|user|login|account/i` → visible text/email/tel input immediately preceding the password in `ref` order. +- **Submit** (first hit): same-form button `/^(sign in|log in|login|continue|submit)$/i` → `button[type="submit"]` in form → the only non-SSO visible button (skip `Continue with Google` etc. unless context names that provider). -Any missing → `FORM_NOT_FOUND` with what's missing. +Anything missing → `FORM_NOT_FOUND` (say what's missing). ## Submit -Single batched call (type username, type password, click submit) with Gate-2 values. Then `waitForNavigation` (10000ms) or `waitForResponse` on `*`. If both time out, verify anyway — page may have updated in place. Re-snapshot. +One batched call (type username, type password, click submit) with Gate-2 values → `waitForNavigation` (10000ms) or `waitForResponse` on `*`. Both time out → verify anyway (page may update in place). Re-snapshot. **Never retype the same credentials to retry** — caller's call. ## Verify success (any one, priority order) 1. URL no longer matches `/login|signin|sign-in|log-in|auth|sso|account\/sign/i`. 2. Password input absent from new snapshot. -3. Authed-state element matching `/log out|sign out|my account|profile|dashboard|avatar/i`. +3. Authed element matching `/log out|sign out|my account|profile|dashboard|avatar/i`. -If none holds: +None holds: -- Form error matching `/invalid|incorrect|wrong|doesn'?t match|not recognized|please try again/i` → `INVALID_CREDENTIALS`. -- Captcha indicator → invoke `captchas` skill, re-verify. Unsolvable → `CAPTCHA_BLOCKED`. +- Error matching `/invalid|incorrect|wrong|doesn'?t match|not recognized|please try again/i` → `INVALID_CREDENTIALS`. +- Captcha → invoke `captchas` skill, re-verify; unsolvable → `CAPTCHA_BLOCKED`. - MFA prompt → MFA branch. - No change, no error → `SUBMIT_NO_FEEDBACK`. -**Never retype the same credentials to retry.** Caller's call. - ## MFA branch -Required when snapshot has `autocomplete="one-time-code"`, numeric input with `maxlength` ∈ {4, 6, 8}, or label/`name`/`placeholder` matching `/code|verification|otp|2fa|two[- ]?factor|authenticator/i`. +Triggered by `autocomplete="one-time-code"`, numeric input with `maxlength` ∈ {4,6,8}, or label/`name`/`placeholder` matching `/code|verification|otp|2fa|two[- ]?factor|authenticator/i`. -- Contextually-matched TOTP available (same Gate-2 rule) → type, click submit, re-verify. -- **No matching TOTP in context → ask the user for the code in plain text and STOP this turn. Do not call `close`. Do not emit the final JSON block. Leave the agent session open so the next turn can resume — the OTP input is still on the page and the cookies/state are intact.** When the user replies with a code, treat it as the TOTP value, type + click submit + re-verify. If the user declines or says they don't have one → `MFA_INPUT_MISSING`. Never attempt SMS/email/WebAuthn flows. -- TOTP rejected (`/invalid|expired|incorrect/i`) → ask user for a fresh code (same don't-close rule); after one fresh-code rejection → `MFA_FAILED`. -- Second MFA prompt after first cleared → `UNEXPECTED_STATE`. +- Contextually-matched TOTP (Gate-2 rule) → type, submit, re-verify. +- **No matching TOTP → ask the user for the code in plain text and STOP this turn. Do NOT `close`, do NOT emit the final JSON. Leave the session open so the OTP input and cookies/state survive to next turn.** User replies → treat as the TOTP, type + submit + re-verify. User declines / has none → `MFA_INPUT_MISSING`. Never attempt SMS/email/WebAuthn. +- TOTP rejected (`/invalid|expired|incorrect/i`) → ask for a fresh code (same don't-close rule); one fresh-code rejection → `MFA_FAILED`. +- Second MFA prompt after the first cleared → `UNEXPECTED_STATE`. ## Final response -Call `close`, then emit **exactly one** fenced JSON block — nothing before or after, no prose. Fields: `success`, `reason_code`, `final_url`, `evidence`, `steps_taken` (JSON-RPC call count; batched call = 1). On failure, `success: false` and `final_url` = current URL. +`close`, then emit **exactly one** fenced JSON block — nothing before or after, no prose. Fields: `success`, `reason_code`, `final_url`, `evidence`, `steps_taken` (JSON-RPC call count; batched call = 1). On failure: `success: false`, `final_url` = current URL. `reason_code` ∈ `SUCCESS` | `LOGIN_NOT_NEEDED` | `MISSING_CONTEXT` | `INVALID_CREDENTIALS` | `MFA_INPUT_MISSING` | `MFA_FAILED` | `CAPTCHA_BLOCKED` | `FORM_NOT_FOUND` | `SUBMIT_NO_FEEDBACK` | `FIELD_TYPE_MISMATCH` | `UNEXPECTED_STATE`. ## Don't - Log in just because a form is visible — gates first. -- Use credentials whose names don't unambiguously belong to this site. -- Guess among multiple plausible pairs — `MISSING_CONTEXT`. -- Retry with the same credentials after failure. +- Re-authenticate to fix an apparent logout before confirming you passed `profile`/`proxy` (Gate 0). +- Use credentials whose names don't unambiguously belong to this site; guess among plausible pairs (→ `MISSING_CONTEXT`); or retry the same credentials after failure. - Try SSO buttons unless the task names that provider. - `evaluate` to set input `value` — use `type` so real keystrokes fire. - Leak credentials into narration, errors, or non-`type.params.text` fields. -- Emit anything other than the final JSON block in your last _terminal_ message (ask-the-user turns are not terminal — emit plain prose and stop without `close`). -- Close the session while waiting for a user-supplied OTP — leave it open so cookies, page state, and the OTP input survive the round-trip. +- Emit anything but the final JSON in your last _terminal_ message (ask-the-user turns aren't terminal — plain prose, stop, no `close`). +- `close` while awaiting a user-supplied OTP — leave the session open so cookies, page state, and the OTP input survive the round-trip. diff --git a/src/tools/schemas.ts b/src/tools/schemas.ts index d052d7e..2818106 100644 --- a/src/tools/schemas.ts +++ b/src/tools/schemas.ts @@ -594,8 +594,11 @@ export const AgentParamsSchema = z ), profile: profileField( 'when the agent session connects', - ' The profile is fixed for the lifetime of the agent session; ' + - 'passing a different profile value opens a separate browser session.', + ' `profile` binds each call to its hydrated session — you MUST pass it on ' + + 'every call in a multi-call flow, not just the first. A call that omits ' + + '`profile` runs in the default, un-hydrated session and will look logged ' + + 'out; if that happens, re-issue the call WITH `profile` before concluding ' + + 'the session expired. A different `profile` value opens a separate session.', ), createProfile: CreateProfileSchema.optional(), rationale: z From d5de3e2bc06463c47ce2a8471b3c008f76457028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andy=20Mart=C3=ADnez?= <93541085+andyMrtnzP@users.noreply.github.com> Date: Wed, 10 Jun 2026 14:12:43 -0500 Subject: [PATCH 04/15] Update src/lib/agent-client.ts Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- src/lib/agent-client.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/lib/agent-client.ts b/src/lib/agent-client.ts index 8f81de0..2b27910 100644 --- a/src/lib/agent-client.ts +++ b/src/lib/agent-client.ts @@ -450,7 +450,13 @@ const postCreateProfile = async ( const body = await res.text().catch(() => ''); throw new UpgradeError(res.status, res.statusText, body); } - return (await res.json()) as CreationSessionInfo; + const json = await res.json(); + if (!json.id || typeof json.id !== 'string') { + throw new Error( + `POST /profile returned malformed response: missing or invalid 'id' field`, + ); + } + return json as CreationSessionInfo; }; const connect = ( From 9461b4ef1ac7b43efa294a2465676f46d6d81169 Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Wed, 10 Jun 2026 14:17:40 -0500 Subject: [PATCH 05/15] fix: update saveProfile instructions to clarify existing profile handling --- src/skills/auth-profile.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/skills/auth-profile.md b/src/skills/auth-profile.md index 3faaf7e..59d15dc 100644 --- a/src/skills/auth-profile.md +++ b/src/skills/auth-profile.md @@ -37,10 +37,11 @@ already signed in. ```json { "method": "saveProfile", "params": { "name": "github" } } ``` - Pass the same `name` you opened the session with. If the same - `(token, name)` pair already exists, the server returns a `BAD_PARAMS` - error telling you to use `refreshProfile` — switch and retry once. Do - not retry `saveProfile` with the same name. + Pass the same `name` you opened the session with. If a profile with that + name already exists for this token, the call returns `ok: false` with an + `error` saying the profile already exists. Don't retry `saveProfile` with the + same name — choose a different name, or tell the user a profile by that name + already exists. 5. **Inspect the result.** A successful save returns: ```json From 5f9f7c2532cf517efdb277f2cc5a552662e95259 Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Wed, 10 Jun 2026 14:31:41 -0500 Subject: [PATCH 06/15] fix: cast response JSON to Partial in postCreateProfile --- src/lib/agent-client.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/agent-client.ts b/src/lib/agent-client.ts index 2b27910..1544964 100644 --- a/src/lib/agent-client.ts +++ b/src/lib/agent-client.ts @@ -450,7 +450,7 @@ const postCreateProfile = async ( const body = await res.text().catch(() => ''); throw new UpgradeError(res.status, res.statusText, body); } - const json = await res.json(); + const json = (await res.json()) as Partial; if (!json.id || typeof json.id !== 'string') { throw new Error( `POST /profile returned malformed response: missing or invalid 'id' field`, From 70763c6183b5ec3d0156abc22fbee615b40659f4 Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Wed, 10 Jun 2026 14:38:49 -0500 Subject: [PATCH 07/15] fix: improve error handling for UpgradeError and validate response structure in postCreateProfile --- src/lib/agent-client.ts | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/lib/agent-client.ts b/src/lib/agent-client.ts index 1544964..f4bd570 100644 --- a/src/lib/agent-client.ts +++ b/src/lib/agent-client.ts @@ -150,6 +150,9 @@ const NON_RETRYABLE_UPGRADE_STATUSES = new Set([400, 401, 403, 404]); export const isRetryableUpgradeError = (err: unknown): boolean => { if (err instanceof UpgradeError) { + // A 2xx UpgradeError is a structurally-bad success response — retrying + // can't fix the shape (and may duplicate side effects), so don't. + if (err.statusCode >= 200 && err.statusCode < 300) return false; return !NON_RETRYABLE_UPGRADE_STATUSES.has(err.statusCode); } return true; @@ -450,10 +453,17 @@ const postCreateProfile = async ( const body = await res.text().catch(() => ''); throw new UpgradeError(res.status, res.statusText, body); } - const json = (await res.json()) as Partial; - if (!json.id || typeof json.id !== 'string') { - throw new Error( - `POST /profile returned malformed response: missing or invalid 'id' field`, + const json: unknown = await res.json(); + if ( + typeof json !== 'object' || + json === null || + typeof (json as { id?: unknown }).id !== 'string' || + !(json as { id: string }).id + ) { + throw new UpgradeError( + res.status, + res.statusText, + `POST /profile returned a malformed response (missing or invalid "id")`, ); } return json as CreationSessionInfo; From 47f772ee1713032d464de5994e44ebbefa540290 Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Thu, 11 Jun 2026 09:29:27 -0500 Subject: [PATCH 08/15] =?UTF-8?q?feat:=20AUTO-83=20autologin=20support=20?= =?UTF-8?q?=E2=80=94=20runner-owned=20session=20+=20login=20verify=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lets the enterprise autologin runner own the browser session and drive the agent against it, instead of relying on the model to emit createProfile. - Thread x-browserless-session-id (header) / browserlessSessionId (query) through hybridAuthenticate → ToolRunContext → agent tool as attachSessionId; the agent opens /chromium/agent?sessionId= to attach to the runner's pre-created profile session. - Open-only fast-path: a createProfile call with no command opens the session and returns without hitting the agent route's id/method check. - autonomous-login skill: a signed-in account's visible display name usually won't equal the typed email/username — that is not a failure; judge success only by URL change / password-field absence / authed element, never by identity mismatch. Co-Authored-By: Claude Opus 4.8 --- src/@types/types.d.ts | 7 ++++ src/index.ts | 26 ++++++++++++-- src/lib/agent-client.ts | 49 +++++++++++++++++++++----- src/lib/define-tool.ts | 6 ++++ src/skills/autonomous-login.md | 2 ++ src/tools/agent.ts | 64 +++++++++++++++++++++++++++++++--- 6 files changed, 138 insertions(+), 16 deletions(-) diff --git a/src/@types/types.d.ts b/src/@types/types.d.ts index c375faf..25c08c6 100644 --- a/src/@types/types.d.ts +++ b/src/@types/types.d.ts @@ -37,6 +37,13 @@ import type { ProxyOptionsSchema } from '../lib/agent-client.js'; export interface BrowserlessSession extends Record { token: string; apiUrl: string; + /** + * A pre-created browser session id to ATTACH to (via /chromium/agent?sessionId), + * threaded by the caller through the `x-browserless-session-id` header. Used by + * the autologin runner, which does POST /profile itself and hands the agent the + * resulting id instead of letting the model open a `createProfile` session. + */ + attachSessionId?: string; } export interface SupabaseJwtPayload { diff --git a/src/index.ts b/src/index.ts index 7748da5..cb073a2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -117,18 +117,34 @@ const hybridAuthenticate = params.get('browserlessUrl') ?? config.browserlessApiUrl; + // A pre-created session id to attach to, threaded by the autologin + // runner. The agent tool opens /chromium/agent?sessionId= instead + // of doing its own POST /profile. + const attachSessionId = + (request.headers['x-browserless-session-id'] as string) ?? + params.get('browserlessSessionId') ?? + undefined; + // JWTs have 3 dot-separated base64url segments; plain API keys do not. const isJwt = headerToken ? headerToken.split('.').length === 3 : false; // 1. Authorization header with plain API key if (headerToken && !isJwt) { - return { token: headerToken, apiUrl } as BrowserlessSession; + return { + token: headerToken, + apiUrl, + attachSessionId, + } as BrowserlessSession; } // 2. ?token= query param const directToken = params.get('token') || undefined; if (directToken) { - return { token: directToken, apiUrl } as BrowserlessSession; + return { + token: directToken, + apiUrl, + attachSessionId, + } as BrowserlessSession; } // 3. Authorization header with JWT → decode Supabase token directly @@ -138,7 +154,11 @@ const hybridAuthenticate = config.supabaseServiceRoleKey, headerToken, ); - return { token: apiKey, apiUrl } as BrowserlessSession; + return { + token: apiKey, + apiUrl, + attachSessionId, + } as BrowserlessSession; } throw new Error( diff --git a/src/lib/agent-client.ts b/src/lib/agent-client.ts index f4bd570..b0b01f7 100644 --- a/src/lib/agent-client.ts +++ b/src/lib/agent-client.ts @@ -237,11 +237,13 @@ const getSessionKey = ( proxy?: ProxyOptions, profile?: string, createProfile?: CreateProfileParams, + attachSessionId?: string, ): string => (mcpSessionId ?? `stdio:${hashToken(token)}`) + proxyFingerprint(proxy) + (profile ? KEY_SEP + 'profile#' + hashToken(profile) : '') + - (createProfile ? KEY_SEP + 'create#' + hashToken(createProfile.name) : ''); + (createProfile ? KEY_SEP + 'create#' + hashToken(createProfile.name) : '') + + (attachSessionId ? KEY_SEP + 'attach#' + attachSessionId : ''); /** * Build the WebSocket URL for `/chromium/agent`: normalize trailing slashes, @@ -601,9 +603,17 @@ export const getOrCreateSession = async ( proxy?: ProxyOptions, profile?: string, createProfile?: CreateProfileParams, + attachSessionId?: string, ): Promise => { sweepSessions(); - const key = getSessionKey(mcpSessionId, token, proxy, profile, createProfile); + const key = getSessionKey( + mcpSessionId, + token, + proxy, + profile, + createProfile, + attachSessionId, + ); const existing = sessions.get(key); if (existing && existing.ws.readyState === WebSocket.OPEN) { @@ -626,11 +636,16 @@ export const getOrCreateSession = async ( } const creation = (async (): Promise => { - // Profile-creation mode: launch a tracked session via POST /profile, then - // attach the agent WS to it by id. Otherwise launch a fresh agent browser. - const creationSessionId = createProfile - ? (await postCreateProfile(apiUrl, token, createProfile)).id - : undefined; + // Three modes for the session to attach to: + // - attachSessionId: a session the caller already created (autologin + // runner did POST /profile itself) — attach by id, no POST here. + // - createProfile: open a tracked session via POST /profile, then attach. + // - neither: launch a fresh agent browser. + const creationSessionId = attachSessionId + ? attachSessionId + : createProfile + ? (await postCreateProfile(apiUrl, token, createProfile)).id + : undefined; const ws = await connect(apiUrl, token, proxy, profile, creationSessionId); const session: ActiveSession = { ws, @@ -727,8 +742,16 @@ export const closeSession = ( proxy?: ProxyOptions, profile?: string, createProfile?: CreateProfileParams, + attachSessionId?: string, ): void => { - const key = getSessionKey(mcpSessionId, token, proxy, profile, createProfile); + const key = getSessionKey( + mcpSessionId, + token, + proxy, + profile, + createProfile, + attachSessionId, + ); const session = sessions.get(key); if (session) { try { @@ -751,8 +774,16 @@ export const destroySession = ( proxy?: ProxyOptions, profile?: string, createProfile?: CreateProfileParams, + attachSessionId?: string, ): void => { - const key = getSessionKey(mcpSessionId, token, proxy, profile, createProfile); + const key = getSessionKey( + mcpSessionId, + token, + proxy, + profile, + createProfile, + attachSessionId, + ); const session = sessions.get(key); if (session) { try { diff --git a/src/lib/define-tool.ts b/src/lib/define-tool.ts index 46d2dbf..7bd4aff 100644 --- a/src/lib/define-tool.ts +++ b/src/lib/define-tool.ts @@ -45,6 +45,11 @@ export interface ToolRunContext

{ }) => Promise; /** MCP session id (httpStream transport) or undefined for stdio — used by agent tool. */ sessionId: string | undefined; + /** + * Pre-created browser session id to attach to (from the `x-browserless-session-id` + * header). When set, the agent tool attaches to it instead of opening its own. + */ + attachSessionId?: string; } export interface ToolDefinition { @@ -142,6 +147,7 @@ export function defineTool( apiUrl, reportProgress, sessionId, + attachSessionId: s?.attachSessionId, }); } catch (err) { if (err instanceof ProfileNotFoundError) { diff --git a/src/skills/autonomous-login.md b/src/skills/autonomous-login.md index f909560..45f713c 100644 --- a/src/skills/autonomous-login.md +++ b/src/skills/autonomous-login.md @@ -60,6 +60,8 @@ One batched call (type username, type password, click submit) with Gate-2 values 2. Password input absent from new snapshot. 3. Authed element matching `/log out|sign out|my account|profile|dashboard|avatar/i`. +The visible account/display name will usually NOT equal the email or username you typed (it's the profile's display name, often a real name) — that's expected, NOT a mismatch. Never mark a login failed because the shown identity differs from the credential; judge only by the three signals above. + None holds: - Error matching `/invalid|incorrect|wrong|doesn'?t match|not recognized|please try again/i` → `INVALID_CREDENTIALS`. diff --git a/src/tools/agent.ts b/src/tools/agent.ts index e6876f2..478f019 100644 --- a/src/tools/agent.ts +++ b/src/tools/agent.ts @@ -162,6 +162,7 @@ export function registerAgentTools( token, apiUrl, sessionId: mcpSessionId, + attachSessionId, }) => { const commands: Array<{ method: string; @@ -203,11 +204,44 @@ export function registerAgentTools( } if (commands.length === 1 && commands[0].method === 'close') { - closeSession(mcpSessionId, token, proxy, profile, createProfile); + closeSession( + mcpSessionId, + token, + proxy, + profile, + createProfile, + attachSessionId, + ); sendAnalytics(true); return [{ type: 'text' as const, text: 'Browser session closed.' }]; } + // Open-only call: no real command (e.g. `createProfile`/`profile`/`proxy` + // set with no method/commands). Dispatching the empty-method default would + // make the agent route reject it as `Missing required id/method`, so just + // open (or reuse) the session and report it's ready for follow-up commands. + if (commands.length === 1 && !commands[0].method) { + try { + await getOrCreateSession( + mcpSessionId, + apiUrl, + token, + proxy, + profile, + createProfile, + attachSessionId, + ); + } catch (connErr: unknown) { + sendAnalytics(false); + throw new UserError(formatConnectError(connErr)); + } + sendAnalytics(true); + const text = createProfile + ? `Profile-creation session "${createProfile.name}" is open (non-headless). Send commands to drive the login, then call saveProfile.` + : 'Browser session is open. Send commands to drive it.'; + return [{ type: 'text' as const, text }]; + } + const runCommands = async (isRetry: boolean): Promise => { let agentSession; try { @@ -218,6 +252,7 @@ export function registerAgentTools( proxy, profile, createProfile, + attachSessionId, ); } catch (connErr: unknown) { // No retry when the server gave a definitive 4xx — re-attempting @@ -226,7 +261,14 @@ export function registerAgentTools( if (isRetry || !isRetryableUpgradeError(connErr)) { throw new UserError(formatConnectError(connErr)); } - destroySession(mcpSessionId, token, proxy, profile, createProfile); + destroySession( + mcpSessionId, + token, + proxy, + profile, + createProfile, + attachSessionId, + ); return runCommands(true); } @@ -239,7 +281,14 @@ export function registerAgentTools( let crossOriginBaseline: string | undefined = agentSession.lastUrl; for (const cmd of commands) { if (cmd.method === 'close') { - closeSession(mcpSessionId, token, proxy, profile, createProfile); + closeSession( + mcpSessionId, + token, + proxy, + profile, + createProfile, + attachSessionId, + ); results.push({ method: 'close', result: { closed: true } }); closedDuringBatch = true; break; @@ -253,7 +302,14 @@ export function registerAgentTools( try { resp = await send(agentSession, cmd.method, cmd.params); } catch (sendErr: unknown) { - destroySession(mcpSessionId, token, proxy, profile, createProfile); + destroySession( + mcpSessionId, + token, + proxy, + profile, + createProfile, + attachSessionId, + ); const errMessage = sendErr instanceof Error ? sendErr.message : String(sendErr); if (!isRetry) { From e8e27ee8c02ba13db8c3fd80934f352bf2c28684 Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Fri, 19 Jun 2026 23:52:24 -0500 Subject: [PATCH 09/15] feat: add LoadSecretCommandSchema for secure credential handling --- src/tools/schemas.ts | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/tools/schemas.ts b/src/tools/schemas.ts index 2818106..1d9169c 100644 --- a/src/tools/schemas.ts +++ b/src/tools/schemas.ts @@ -162,6 +162,27 @@ const TypeCommandSchema = z.object({ }), }); +const LoadSecretCommandSchema = z.object({ + method: z.literal('loadSecret'), + params: z.object({ + ref: z + .string() + .describe( + 'The credential reference/alias to inject (e.g. an op:// reference). ' + + 'The secret value is resolved server-side and typed into the field — ' + + 'you never see it. Use this for ALL passwords and usernames from a ' + + 'secrets vault; never put a secret value in `type`.', + ), + selector: z + .string() + .optional() + .describe( + 'CSS selector of the input to fill. If omitted, the secret is injected ' + + 'into the currently focused element (click/focus the field first).', + ), + }), +}); + const SelectCommandSchema = z.object({ method: z.literal('select'), params: z.object({ @@ -474,6 +495,7 @@ const specificCommandSchemas = [ CloseTabCommandSchema, ClickCommandSchema, TypeCommandSchema, + LoadSecretCommandSchema, SelectCommandSchema, CheckboxCommandSchema, HoverCommandSchema, From 01de01312a874f380940b3d7f52e7aaba472e4aa Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Sat, 20 Jun 2026 00:49:44 -0500 Subject: [PATCH 10/15] feat: add tests for loadSecret command handling in AgentParamsSchema --- test/tools/schemas.spec.ts | 39 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/test/tools/schemas.spec.ts b/test/tools/schemas.spec.ts index e63a137..ad84e5e 100644 --- a/test/tools/schemas.spec.ts +++ b/test/tools/schemas.spec.ts @@ -242,6 +242,45 @@ describe('profile field (shared profileField helper)', () => { }); }); +describe('loadSecret command', () => { + it('accepts a loadSecret command with ref + selector', () => { + const parsed = AgentParamsSchema.parse({ + commands: [ + { + method: 'loadSecret', + params: { + ref: 'op://Automation/imdb/password', + selector: 'input#ap_password', + }, + }, + ], + }); + const cmd = parsed.commands?.[0]; + expect(cmd?.method).to.equal('loadSecret'); + expect((cmd?.params as { ref?: string })?.ref).to.equal( + 'op://Automation/imdb/password', + ); + }); + + it('accepts a loadSecret command with ref only (selector optional)', () => { + const result = AgentParamsSchema.safeParse({ + commands: [ + { method: 'loadSecret', params: { ref: 'op://Automation/imdb/username' } }, + ], + }); + expect(result.success).to.equal(true); + }); + + it('rejects a loadSecret command missing ref', () => { + const result = AgentParamsSchema.safeParse({ + commands: [ + { method: 'loadSecret', params: { selector: 'input#ap_email' } }, + ], + }); + expect(result.success).to.equal(false); + }); +}); + describe('createProfile field', () => { it('accepts a createProfile object on its own', () => { const parsed = AgentParamsSchema.parse({ From ee7eb1d48e87da35182d0b502c0a3800c2c5753e Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Sat, 20 Jun 2026 00:49:44 -0500 Subject: [PATCH 11/15] chore: prettier --- test/tools/schemas.spec.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/tools/schemas.spec.ts b/test/tools/schemas.spec.ts index ad84e5e..5902115 100644 --- a/test/tools/schemas.spec.ts +++ b/test/tools/schemas.spec.ts @@ -265,7 +265,10 @@ describe('loadSecret command', () => { it('accepts a loadSecret command with ref only (selector optional)', () => { const result = AgentParamsSchema.safeParse({ commands: [ - { method: 'loadSecret', params: { ref: 'op://Automation/imdb/username' } }, + { + method: 'loadSecret', + params: { ref: 'op://Automation/imdb/username' }, + }, ], }); expect(result.success).to.equal(true); From c279cac8e47415eb60d8a4c7467edbf577d0a5a4 Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Sat, 20 Jun 2026 01:07:30 -0500 Subject: [PATCH 12/15] feat: refactor session handling in hybridAuthenticate and getOrCreateSession functions --- src/index.ts | 23 ++++++++--------------- src/lib/agent-client.ts | 12 +++++++----- src/tools/agent.ts | 1 + 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/src/index.ts b/src/index.ts index cb073a2..7798a7d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -128,23 +128,20 @@ const hybridAuthenticate = // JWTs have 3 dot-separated base64url segments; plain API keys do not. const isJwt = headerToken ? headerToken.split('.').length === 3 : false; + // apiUrl/attachSessionId are the same across every auth path; only the + // resolved token differs. + const session = (token: string): BrowserlessSession => + ({ token, apiUrl, attachSessionId }) as BrowserlessSession; + // 1. Authorization header with plain API key if (headerToken && !isJwt) { - return { - token: headerToken, - apiUrl, - attachSessionId, - } as BrowserlessSession; + return session(headerToken); } // 2. ?token= query param const directToken = params.get('token') || undefined; if (directToken) { - return { - token: directToken, - apiUrl, - attachSessionId, - } as BrowserlessSession; + return session(directToken); } // 3. Authorization header with JWT → decode Supabase token directly @@ -154,11 +151,7 @@ const hybridAuthenticate = config.supabaseServiceRoleKey, headerToken, ); - return { - token: apiKey, - apiUrl, - attachSessionId, - } as BrowserlessSession; + return session(apiKey); } throw new Error( diff --git a/src/lib/agent-client.ts b/src/lib/agent-client.ts index b0b01f7..aef6bd4 100644 --- a/src/lib/agent-client.ts +++ b/src/lib/agent-client.ts @@ -641,11 +641,13 @@ export const getOrCreateSession = async ( // runner did POST /profile itself) — attach by id, no POST here. // - createProfile: open a tracked session via POST /profile, then attach. // - neither: launch a fresh agent browser. - const creationSessionId = attachSessionId - ? attachSessionId - : createProfile - ? (await postCreateProfile(apiUrl, token, createProfile)).id - : undefined; + let creationSessionId: string | undefined; + if (attachSessionId) { + creationSessionId = attachSessionId; + } else if (createProfile) { + creationSessionId = (await postCreateProfile(apiUrl, token, createProfile)) + .id; + } const ws = await connect(apiUrl, token, proxy, profile, creationSessionId); const session: ActiveSession = { ws, diff --git a/src/tools/agent.ts b/src/tools/agent.ts index 478f019..f6d34b9 100644 --- a/src/tools/agent.ts +++ b/src/tools/agent.ts @@ -341,6 +341,7 @@ export function registerAgentTools( proxy, profile, createProfile, + attachSessionId, ); if (!isRetry) { return runCommands(true); From d3dd0c22ede7e51423a8bf2cfdb134af0ef4f56f Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Sat, 20 Jun 2026 01:07:30 -0500 Subject: [PATCH 13/15] chore: prettier --- src/lib/agent-client.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/lib/agent-client.ts b/src/lib/agent-client.ts index aef6bd4..7a92948 100644 --- a/src/lib/agent-client.ts +++ b/src/lib/agent-client.ts @@ -645,8 +645,9 @@ export const getOrCreateSession = async ( if (attachSessionId) { creationSessionId = attachSessionId; } else if (createProfile) { - creationSessionId = (await postCreateProfile(apiUrl, token, createProfile)) - .id; + creationSessionId = ( + await postCreateProfile(apiUrl, token, createProfile) + ).id; } const ws = await connect(apiUrl, token, proxy, profile, creationSessionId); const session: ActiveSession = { From 6211829e050646f37b65c4735dad0077d0348ec5 Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Mon, 22 Jun 2026 18:51:42 -0500 Subject: [PATCH 14/15] feat: enhance iframe handling --- src/@types/types.d.ts | 9 +++++++++ src/lib/agent-format.ts | 35 ++++++++++++++++++++++++++++++++--- src/skills/shadow-dom.md | 10 +++++++++- src/skills/system-prompt.ts | 5 ++++- 4 files changed, 54 insertions(+), 5 deletions(-) diff --git a/src/@types/types.d.ts b/src/@types/types.d.ts index 25c08c6..9cdcbf4 100644 --- a/src/@types/types.d.ts +++ b/src/@types/types.d.ts @@ -141,6 +141,7 @@ export interface SnapshotElement { focused?: boolean; required?: boolean; ariaLabel?: string; + frameId?: string; } export interface TabInfo { @@ -150,6 +151,13 @@ export interface TabInfo { active: boolean; } +// for iframe handling +export interface FrameInfo { + frameId: string; + url: string; + crossOrigin: boolean; +} + export interface SnapshotResult { url: string; title: string; @@ -158,6 +166,7 @@ export interface SnapshotResult { tabs?: TabInfo[]; activeTargetId?: string | null; detectedChallenges?: string[]; + frames?: FrameInfo[]; } export interface ActiveSession { diff --git a/src/lib/agent-format.ts b/src/lib/agent-format.ts index b1ff2dd..0c99265 100644 --- a/src/lib/agent-format.ts +++ b/src/lib/agent-format.ts @@ -5,6 +5,7 @@ export type { SnapshotResult, SnapshotElement, TabInfo, + FrameInfo, } from '../@types/types.js'; const safeOrigin = (url: string): string | undefined => { @@ -118,10 +119,16 @@ export const formatConnectError = (err: unknown): string => { /** * Format a single snapshot element as a compact one-liner: - * [ref] tag role "name" ref=selector value="…" (state) + * [ref] tag role "name" ref=selector value="…" (state) [frame#N] * e.g. [7] input checkbox "Remember me" ref=input#remember (checked, required) + * `frameLabels` maps a frameId to its display label (frame#1, …); when an + * element carries a frameId, the label is appended so the agent sees which + * iframe it lives in. */ -const formatElement = (el: SnapshotElement): string => { +const formatElement = ( + el: SnapshotElement, + frameLabels?: Map, +): string => { const parts: string[] = [`[${el.ref}]`, el.tag, el.role]; const name = el.name || el.text || ''; if (name) parts.push(`"${name}"`); @@ -141,6 +148,9 @@ const formatElement = (el: SnapshotElement): string => { if (el.required) flags.push('required'); if (flags.length) parts.push(`(${flags.join(', ')})`); + const frameLabel = el.frameId && frameLabels?.get(el.frameId); + if (frameLabel) parts.push(`[${frameLabel}]`); + return parts.join(' '); }; @@ -166,10 +176,29 @@ export const formatSnapshot = (snapshot: SnapshotResult): string => { } } + // Label cross-origin iframes (frame#1, …) and list them so the agent knows + // which elements live in a frame and that their deep-ref selectors pierce it. + const frameLabels = new Map(); + if (snapshot.frames?.length) { + snapshot.frames.forEach((frame, i) => + frameLabels.set(frame.frameId, `frame#${i + 1}`), + ); + lines.push(`Frames (${snapshot.frames.length} iframes):`); + for (const frame of snapshot.frames) { + const origin = frame.crossOrigin ? 'cross-origin' : 'same-origin'; + lines.push( + ` ${frameLabels.get(frame.frameId)} ${frame.url} (${origin})`, + ); + } + lines.push( + 'Elements tagged [frame#N] live in that iframe; their deep-ref selectors pierce it — pass as-is to click/type/hover.', + ); + } + lines.push(''); for (const el of snapshot.elements) { - lines.push(formatElement(el)); + lines.push(formatElement(el, frameLabels)); } lines.push('--- END SNAPSHOT ---'); diff --git a/src/skills/shadow-dom.md b/src/skills/shadow-dom.md index a589285..83a2aa6 100644 --- a/src/skills/shadow-dom.md +++ b/src/skills/shadow-dom.md @@ -2,6 +2,14 @@ Snapshot contains `deep-ref=` selectors, or you hit `SELECTOR_NOT_FOUND` on regular selector. Page using shadow DOM or iframes — read before next action. +## Iframes in the snapshot + +Iframes (same-origin and cross-origin) are now snapshotted too. When present: +- Snapshot shows a `Frames (N iframes):` block listing each frame's label, URL, and origin. +- Elements inside a frame are tagged `[frame#N]` and carry a ready `deep-ref=` selector — cross-origin uses `< *url* css`, same-origin uses `< css`. Pass it as-is to `click`/`type`/`hover`/`checkbox` — no frame switching, no hand-construction. + +Only build a deep selector by hand (below) when a frame element wasn't surfaced (a11y-empty widget, capped snapshot). + ## Deep selectors: `< ` prefix Browserless deep selectors start with `< ` (less-than, space). Space mandatory. Format: @@ -20,7 +28,7 @@ When snapshot lists `deep-ref=< button#deny`, pass to `click` / `type` / `hover` ## Constructing deep selectors for iframes snapshot didn't surface -Snapshots only include accessible content. Iframes (captcha/payment widgets) often have nothing meaningful in accessibility tree. Build selector by hand: +Fallback only — most cross-origin iframes are now in the snapshot (see above). Some widgets still have nothing meaningful in the accessibility tree. Build selector by hand: - `< *google.com/recaptcha* #recaptcha-anchor` — reCAPTCHA checkbox - `< *hcaptcha.com* #checkbox` — hCaptcha checkbox diff --git a/src/skills/system-prompt.ts b/src/skills/system-prompt.ts index e79b640..eac67d3 100644 --- a/src/skills/system-prompt.ts +++ b/src/skills/system-prompt.ts @@ -50,7 +50,10 @@ Load manually via **browserless_skill** if suspected but not injected: ## Selectors - Use **ref=** (CSS) or **deep-ref=** (starts \`< \`) exactly as shown in snapshot - Example: \`[3] button "Sign In" ref=button#submit\` → \`"button#submit"\` -- deep-ref for shadow DOM — see \`shadow-dom\` skill +- deep-ref for shadow DOM / iframes — see \`shadow-dom\` skill + +## Iframes +Snapshots include a \`Frames\` list (cross-origin iframes) when present. Elements inside a frame are tagged \`[frame#N]\` and carry a \`deep-ref=< *url* css\` selector that already pierces the frame — pass it as-is to \`click\`/\`type\`/\`hover\`/\`checkbox\`. No frame switching needed. captcha/payment widgets (reCAPTCHA, hCaptcha, Stripe, Turnstile) show up here. \`shadow-dom\` skill auto-loads when frames present. ## Tabs Snapshots include \`tabs\` + \`activeTargetId\` — no getTabs needed. Multi-tab / \`snapshot { targetId }\` in \`tabs\` skill (auto-loads when >1 tab). From 0d5e25785ed9c3aa8cff43d47ccd93366a6320a1 Mon Sep 17 00:00:00 2001 From: Anderson Martinez Date: Mon, 22 Jun 2026 18:51:42 -0500 Subject: [PATCH 15/15] chore: prettier --- src/skills/shadow-dom.md | 1 + 1 file changed, 1 insertion(+) diff --git a/src/skills/shadow-dom.md b/src/skills/shadow-dom.md index 83a2aa6..4de70ed 100644 --- a/src/skills/shadow-dom.md +++ b/src/skills/shadow-dom.md @@ -5,6 +5,7 @@ Snapshot contains `deep-ref=` selectors, or you hit `SELECTOR_NOT_FOUND` on regu ## Iframes in the snapshot Iframes (same-origin and cross-origin) are now snapshotted too. When present: + - Snapshot shows a `Frames (N iframes):` block listing each frame's label, URL, and origin. - Elements inside a frame are tagged `[frame#N]` and carry a ready `deep-ref=` selector — cross-origin uses `< *url* css`, same-origin uses `< css`. Pass it as-is to `click`/`type`/`hover`/`checkbox` — no frame switching, no hand-construction.