From 0c2c000bdbc47e247464685f74a00d7fc399dbf5 Mon Sep 17 00:00:00 2001 From: MagMueller Date: Fri, 10 Apr 2026 19:20:56 -0700 Subject: [PATCH 1/2] fix: bump Python to 3.10, drop archived selenium-wire, note webhook example needs Python 3.12 - browser-use-python: bump requires-python from 3.9 to 3.10 (generated models use PEP 604 `str | None` union syntax which breaks on 3.9, contradicting metadata) - docs(playwright): remove archived selenium-wire reference (github.com/wkeeling/selenium-wire has been read-only since Jan 2024) - docs(webhooks): note that the Python verify_webhook example requires Python 3.12+ because of f-string nested quotes in `separators=(',', ':')` - regenerate llms.txt / llms-full.txt Co-Authored-By: Claude Opus 4.6 (1M context) --- browser-use-python/pyproject.toml | 4 +- browser-use-python/uv.lock | 78 +----- .../browser/playwright-puppeteer-selenium.mdx | 2 +- docs/cloud/guides/webhooks.mdx | 4 + docs/cloud/llms-full.txt | 263 ++++++++++++++++-- docs/cloud/llms.txt | 20 +- docs/llms-full.txt | 244 +++++++++++++++- docs/llms.txt | 2 + docs/open-source/llms-full.txt | 193 ++++++++++--- 9 files changed, 667 insertions(+), 143 deletions(-) diff --git a/browser-use-python/pyproject.toml b/browser-use-python/pyproject.toml index 2b04eb63..eb1a7a25 100644 --- a/browser-use-python/pyproject.toml +++ b/browser-use-python/pyproject.toml @@ -8,7 +8,7 @@ version = "3.4.2" description = "Python SDK for the Browser Use cloud API" readme = "README.md" license = "MIT" -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = ["httpx>=0.24", "pydantic>=2.0"] [project.optional-dependencies] @@ -25,7 +25,7 @@ markers = [ ] [tool.pyright] -pythonVersion = "3.9" +pythonVersion = "3.10" typeCheckingMode = "basic" include = ["src", "tests", "examples"] diff --git a/browser-use-python/uv.lock b/browser-use-python/uv.lock index 93aedad4..1f5054e7 100644 --- a/browser-use-python/uv.lock +++ b/browser-use-python/uv.lock @@ -1,10 +1,6 @@ version = 1 revision = 3 -requires-python = ">=3.9" -resolution-markers = [ - "python_full_version >= '3.10'", - "python_full_version < '3.10'", -] +requires-python = ">=3.10" [[package]] name = "annotated-types" @@ -41,8 +37,7 @@ dependencies = [ [package.optional-dependencies] dev = [ { name = "pyright" }, - { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "pytest", version = "9.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "pytest" }, ] examples = [ { name = "python-dotenv" }, @@ -51,8 +46,7 @@ examples = [ [package.dev-dependencies] dev = [ { name = "pyright" }, - { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "pytest", version = "9.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "pytest" }, ] [package.metadata] @@ -147,25 +141,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, ] -[[package]] -name = "iniconfig" -version = "2.1.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.10'", -] -sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, -] - [[package]] name = "iniconfig" version = "2.3.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.10'", -] sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, @@ -305,19 +284,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" }, { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" }, { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" }, - { url = "https://files.pythonhosted.org/packages/54/db/160dffb57ed9a3705c4cbcbff0ac03bdae45f1ca7d58ab74645550df3fbd/pydantic_core-2.41.5-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:8bfeaf8735be79f225f3fefab7f941c712aaca36f1128c9d7e2352ee1aa87bdf", size = 2107999, upload-time = "2025-11-04T13:42:03.885Z" }, - { url = "https://files.pythonhosted.org/packages/a3/7d/88e7de946f60d9263cc84819f32513520b85c0f8322f9b8f6e4afc938383/pydantic_core-2.41.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:346285d28e4c8017da95144c7f3acd42740d637ff41946af5ce6e5e420502dd5", size = 1929745, upload-time = "2025-11-04T13:42:06.075Z" }, - { url = "https://files.pythonhosted.org/packages/d5/c2/aef51e5b283780e85e99ff19db0f05842d2d4a8a8cd15e63b0280029b08f/pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a75dafbf87d6276ddc5b2bf6fae5254e3d0876b626eb24969a574fff9149ee5d", size = 1920220, upload-time = "2025-11-04T13:42:08.457Z" }, - { url = "https://files.pythonhosted.org/packages/c7/97/492ab10f9ac8695cd76b2fdb24e9e61f394051df71594e9bcc891c9f586e/pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7b93a4d08587e2b7e7882de461e82b6ed76d9026ce91ca7915e740ecc7855f60", size = 2067296, upload-time = "2025-11-04T13:42:10.817Z" }, - { url = "https://files.pythonhosted.org/packages/ec/23/984149650e5269c59a2a4c41d234a9570adc68ab29981825cfaf4cfad8f4/pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e8465ab91a4bd96d36dde3263f06caa6a8a6019e4113f24dc753d79a8b3a3f82", size = 2231548, upload-time = "2025-11-04T13:42:13.843Z" }, - { url = "https://files.pythonhosted.org/packages/71/0c/85bcbb885b9732c28bec67a222dbed5ed2d77baee1f8bba2002e8cd00c5c/pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:299e0a22e7ae2b85c1a57f104538b2656e8ab1873511fd718a1c1c6f149b77b5", size = 2362571, upload-time = "2025-11-04T13:42:16.208Z" }, - { url = "https://files.pythonhosted.org/packages/c0/4a/412d2048be12c334003e9b823a3fa3d038e46cc2d64dd8aab50b31b65499/pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:707625ef0983fcfb461acfaf14de2067c5942c6bb0f3b4c99158bed6fedd3cf3", size = 2068175, upload-time = "2025-11-04T13:42:18.911Z" }, - { url = "https://files.pythonhosted.org/packages/73/f4/c58b6a776b502d0a5540ad02e232514285513572060f0d78f7832ca3c98b/pydantic_core-2.41.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f41eb9797986d6ebac5e8edff36d5cef9de40def462311b3eb3eeded1431e425", size = 2177203, upload-time = "2025-11-04T13:42:22.578Z" }, - { url = "https://files.pythonhosted.org/packages/ed/ae/f06ea4c7e7a9eead3d165e7623cd2ea0cb788e277e4f935af63fc98fa4e6/pydantic_core-2.41.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0384e2e1021894b1ff5a786dbf94771e2986ebe2869533874d7e43bc79c6f504", size = 2148191, upload-time = "2025-11-04T13:42:24.89Z" }, - { url = "https://files.pythonhosted.org/packages/c1/57/25a11dcdc656bf5f8b05902c3c2934ac3ea296257cc4a3f79a6319e61856/pydantic_core-2.41.5-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:f0cd744688278965817fd0839c4a4116add48d23890d468bc436f78beb28abf5", size = 2343907, upload-time = "2025-11-04T13:42:27.683Z" }, - { url = "https://files.pythonhosted.org/packages/96/82/e33d5f4933d7a03327c0c43c65d575e5919d4974ffc026bc917a5f7b9f61/pydantic_core-2.41.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:753e230374206729bf0a807954bcc6c150d3743928a73faffee51ac6557a03c3", size = 2322174, upload-time = "2025-11-04T13:42:30.776Z" }, - { url = "https://files.pythonhosted.org/packages/81/45/4091be67ce9f469e81656f880f3506f6a5624121ec5eb3eab37d7581897d/pydantic_core-2.41.5-cp39-cp39-win32.whl", hash = "sha256:873e0d5b4fb9b89ef7c2d2a963ea7d02879d9da0da8d9d4933dee8ee86a8b460", size = 1990353, upload-time = "2025-11-04T13:42:33.111Z" }, - { url = "https://files.pythonhosted.org/packages/44/8a/a98aede18db6e9cd5d66bcacd8a409fcf8134204cdede2e7de35c5a2c5ef/pydantic_core-2.41.5-cp39-cp39-win_amd64.whl", hash = "sha256:e4f4a984405e91527a0d62649ee21138f8e3d0ef103be488c1dc11a80d7f184b", size = 2015698, upload-time = "2025-11-04T13:42:35.484Z" }, { url = "https://files.pythonhosted.org/packages/11/72/90fda5ee3b97e51c494938a4a44c3a35a9c96c19bba12372fb9c634d6f57/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034", size = 2115441, upload-time = "2025-11-04T13:42:39.557Z" }, { url = "https://files.pythonhosted.org/packages/1f/53/8942f884fa33f50794f119012dc6a1a02ac43a56407adaac20463df8e98f/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c", size = 1930291, upload-time = "2025-11-04T13:42:42.169Z" }, { url = "https://files.pythonhosted.org/packages/79/c8/ecb9ed9cd942bce09fc888ee960b52654fbdbede4ba6c2d6e0d3b1d8b49c/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2", size = 1948632, upload-time = "2025-11-04T13:42:44.564Z" }, @@ -366,42 +332,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/82/a2c93e32800940d9573fb28c346772a14778b84ba7524e691b324620ab89/pyright-1.1.408-py3-none-any.whl", hash = "sha256:090b32865f4fdb1e0e6cd82bf5618480d48eecd2eb2e70f960982a3d9a4c17c1", size = 6399144, upload-time = "2026-01-08T08:07:37.082Z" }, ] -[[package]] -name = "pytest" -version = "8.4.2" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.10'", -] -dependencies = [ - { name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" }, - { name = "exceptiongroup", marker = "python_full_version < '3.10'" }, - { name = "iniconfig", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "packaging", marker = "python_full_version < '3.10'" }, - { name = "pluggy", marker = "python_full_version < '3.10'" }, - { name = "pygments", marker = "python_full_version < '3.10'" }, - { name = "tomli", marker = "python_full_version < '3.10'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" }, -] - [[package]] name = "pytest" version = "9.0.2" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.10'", -] dependencies = [ - { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" }, - { name = "exceptiongroup", marker = "python_full_version == '3.10.*'" }, - { name = "iniconfig", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "packaging", marker = "python_full_version >= '3.10'" }, - { name = "pluggy", marker = "python_full_version >= '3.10'" }, - { name = "pygments", marker = "python_full_version >= '3.10'" }, - { name = "tomli", marker = "python_full_version == '3.10.*'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } wheels = [ diff --git a/docs/cloud/browser/playwright-puppeteer-selenium.mdx b/docs/cloud/browser/playwright-puppeteer-selenium.mdx index 85134290..e9de754a 100644 --- a/docs/cloud/browser/playwright-puppeteer-selenium.mdx +++ b/docs/cloud/browser/playwright-puppeteer-selenium.mdx @@ -56,7 +56,7 @@ await browser.close(); ### Selenium -Selenium requires a local WebSocket proxy to connect to Browser Use's remote CDP endpoint. Use [selenium-wire](https://github.com/wkeeling/selenium-wire) or connect through Playwright's CDP bridge instead: +Selenium does not support remote CDP over WebSocket directly. Connect through Playwright's CDP bridge instead: ```python from playwright.sync_api import sync_playwright diff --git a/docs/cloud/guides/webhooks.mdx b/docs/cloud/guides/webhooks.mdx index 212a7cca..3ae28819 100644 --- a/docs/cloud/guides/webhooks.mdx +++ b/docs/cloud/guides/webhooks.mdx @@ -36,6 +36,10 @@ Every webhook request includes two headers: The signature is computed over `{timestamp}.{body}`, where `body` is the JSON-serialized payload with keys sorted alphabetically and no extra whitespace. Verify it to ensure the request is authentic and to prevent replay attacks. + + The Python example below uses an f-string with nested quotes that requires **Python 3.12+**. On older versions, extract `separators=(',', ':')` into a variable first. + + ```python Python import hashlib diff --git a/docs/cloud/llms-full.txt b/docs/cloud/llms-full.txt index 68954b88..4acca447 100644 --- a/docs/cloud/llms-full.txt +++ b/docs/cloud/llms-full.txt @@ -1199,7 +1199,7 @@ await browser.close(); ### Selenium -Selenium requires a local WebSocket proxy to connect to Browser Use's remote CDP endpoint. Use [selenium-wire](https://github.com/wkeeling/selenium-wire) or connect through Playwright's CDP bridge instead: +Selenium does not support remote CDP over WebSocket directly. Connect through Playwright's CDP bridge instead: ```python from playwright.sync_api import sync_playwright @@ -1675,6 +1675,96 @@ Use **Agent Mail** (enabled by default). For end-client scenarios, have them for Use **TOTP secret in prompt** — the agent generates codes via pyotp, no human intervention needed. +# Claude Code +Source: https://docs.browser-use.com/cloud/tutorials/integrations/claude-code + + +[Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview) is Anthropic's agentic coding tool that runs in the terminal. Add Browser Use and it gets full cloud browser automation — anti-detect profiles, CAPTCHA solving, residential proxies in 195+ countries, persistent profiles, and stealth browsing. + +## Setup + +**1. Install the CLI** + +```bash +curl -fsSL https://browser-use.com/cli/install.sh | bash +``` + +**2. Verify the installation** + +```bash +browser-use doctor +``` + +**3. Install the skill** + +The Browser Use skill teaches Claude Code the full CLI command set. Install it from [skills.sh](https://skills.sh/browser-use/browser-use/browser-use): + +```bash +npx skills add https://github.com/browser-use/browser-use --skill browser-use +``` + +**4. Connect to cloud browsers** + +Sign up at [cloud.browser-use.com](https://cloud.browser-use.com) and connect: + +```bash +browser-use cloud login +browser-use cloud connect +``` + +Or let Claude Code provision a free API key itself — see [Agent Self-Registration](#agent-self-registration) below. + +**5. Use it** + +Claude Code uses its bash tool to run CLI commands directly: + +``` +> Use browser-use to open github.com/trending and summarize the top repos +``` + +For the complete CLI reference and advanced features like sessions, tunnels, and Python execution, see the [Browser Use CLI docs](https://docs.browser-use.com/open-source/browser-use-cli). + +## Agent Self-Registration + +Claude Code can provision its own Browser Use API key autonomously — no human interaction needed. The free tier includes unlimited browser hours, free proxies in 195+ countries, persistent browser profiles, CAPTCHA solving, and stealth browsing at zero cost. + +Install the Browser Use CLI: + +```bash +curl -fsSL https://browser-use.com/cli/install.sh | bash +``` + +Claude Code runs two CLI commands using its bash tool: + +**1. Request a challenge** + +```bash +browser-use cloud signup +``` + +Returns a `Challenge ID` and `Challenge` — an obfuscated math problem that requires LLM reasoning to solve. + +**2. Solve the challenge and verify** + +Claude Code solves the math problem, then verifies with the answer (a string with 2 decimal places, e.g. `"13.60"`): + +```bash +browser-use cloud signup --verify +``` + +The CLI saves the API key to `~/.browser-use/config.json` automatically. No extra configuration needed — Claude Code can start using cloud browser commands immediately. + +### Claim the account (optional) + +If the human wants to see the account in the dashboard later: + +```bash +browser-use cloud signup --claim +``` + +Returns a claim URL valid for 1 hour. The human can follow the link to sign up and claim the agent-created account. + + # OpenClaw Source: https://docs.browser-use.com/cloud/tutorials/integrations/openclaw @@ -1771,6 +1861,156 @@ Once the skill is loaded, OpenClaw agents can use the `browser-use` CLI to navig For the complete CLI reference and advanced features like cloud browsers, tunnels, sessions, and Python execution, see the [README](https://github.com/browser-use/browser-use/blob/main/browser_use/skill_cli/README.md) and the [Browser Use docs](https://docs.browser-use.com). +# Hermes Agent +Source: https://docs.browser-use.com/cloud/tutorials/integrations/hermes-agent + + +[Hermes Agent](https://github.com/nousresearch/hermes-agent) is an open-source, self-improving AI agent by Nous Research. It has built-in browser automation tools that work with local Chromium out of the box. Add Browser Use and those tools run on cloud browsers with anti-detect profiles, residential proxies in 195+ countries, and stealth browsing. + +Two ways to set it up: configure Browser Use as Hermes's cloud browser backend, or install the Browser Use CLI and let Hermes drive it directly. + +## Option 1: Cloud Browser Backend + +Hermes has built-in browser tools (`browser_navigate`, `browser_click`, `browser_snapshot`, etc.) that default to local Chromium. Point them at Browser Use cloud browsers instead — no extra dependencies, same Hermes experience. + +### Setup + +**1. Get your API key** + +Sign up at [cloud.browser-use.com](https://cloud.browser-use.com) and copy your API key from [Settings → API Keys](https://cloud.browser-use.com/settings?tab=api-keys&new=1). + +Or let the agent provision one itself — see [Agent Self-Registration](#agent-self-registration) below. + +**2. Configure Hermes** + +Run the setup wizard: + +```bash +hermes setup tools +``` + +Select **Browser Automation**, then **Browser Use**, and paste your API key when prompted. + +Or configure manually — add your key to `~/.hermes/.env`: + +```bash +BROWSER_USE_API_KEY=your_key_here +``` + +And set the provider in `~/.hermes/config.yaml`: + +```yaml +browser: + cloud_provider: browser-use +``` + +**3. Use it** + +Just chat with Hermes — any browsing tasks automatically route through Browser Use cloud browsers: + +``` +> Find the top trending repositories on GitHub today and summarize them +``` + +## Option 2: Browser Use CLI + +The [Browser Use CLI](https://docs.browser-use.com/open-source/browser-use-cli) is a standalone tool that gives Hermes browser automation through terminal commands. Hermes drives the browser directly via its terminal tool — giving you shared browser sessions across agents, persistent logins and cookies, profile management, and access to Browser Use's full command surface. + +### Setup + +**1. Install the CLI** + +```bash +curl -fsSL https://browser-use.com/cli/install.sh | bash +``` + +**2. Verify the installation** + +```bash +browser-use doctor +``` + +**3. Install the skill** + +The Browser Use skill teaches Hermes the full CLI command set. Install it from [skills.sh](https://skills.sh/browser-use/browser-use/browser-use): + +```bash +hermes skills install skills-sh/browser-use/browser-use/browser-use +``` + +Or ask Hermes directly in chat to install it. + +**4. Connect to cloud browsers** + +Log in with your API key: + +```bash +browser-use cloud login +``` + +Or let the agent provision one itself — see [Agent Self-Registration](#agent-self-registration) below. + +**5. Use it** + +Once the skill is loaded, Hermes can drive the browser through CLI commands via its terminal tool: + +``` +> Use browser-use to open github.com/trending and summarize the top repos +``` + +For the complete CLI reference and advanced features like cloud browsers, tunnels, sessions, and Python execution, see the [Browser Use CLI docs](https://docs.browser-use.com/open-source/browser-use-cli). + +## Agent Self-Registration + +Hermes can provision its own Browser Use API key autonomously — no human interaction needed. This works with both options above. + +Install the Browser Use CLI: + +```bash +curl -fsSL https://browser-use.com/cli/install.sh | bash +``` + +The agent runs three CLI commands using its terminal tool: + +**1. Request a challenge** + +```bash +browser-use cloud signup +``` + +Returns a `Challenge ID` and `Challenge` — an obfuscated math problem that requires LLM reasoning to solve. + +**2. Solve the challenge and verify** + +The agent solves the math problem, then verifies with the answer (a string with 2 decimal places, e.g. `"13.60"`): + +```bash +browser-use cloud signup --verify +``` + +The CLI saves the API key to `~/.browser-use/config.json` automatically. + +**3. Copy the key to Hermes config** + +For the cloud browser backend (Option 1): + +```bash +hermes config set BROWSER_USE_API_KEY $(browser-use config get api_key) +``` + +For CLI mode (Option 2), the key is already saved — no extra step needed. + +### Claim the account (optional) + +If the human wants to see the account in the dashboard later: + +```bash +browser-use cloud signup --claim +``` + +Returns a claim URL valid for 1 hour. The human can follow the link to sign up and claim the agent-created account. + + # MCP Server Source: https://docs.browser-use.com/cloud/guides/mcp-server @@ -1888,6 +2128,8 @@ Every webhook request includes two headers: The signature is computed over `{timestamp}.{body}`, where `body` is the JSON-serialized payload with keys sorted alphabetically and no extra whitespace. Verify it to ensure the request is authentic and to prevent replay attacks. + The Python example below uses an f-string with nested quotes that requires **Python 3.12+**. On older versions, extract `separators=(',', ':')` into a variable first. + ```python Python import hashlib import hmac @@ -2950,22 +3192,3 @@ pip install browser-use-sdk npm install browser-use-sdk ``` - -## For AI Agents: Self-Registration (No Human Required) - -AI agents can create a free account autonomously via a challenge-response flow at `https://api.browser-use.com`. - -**IMPORTANT: Never send your API key to any domain other than `api.browser-use.com` or `cloud.browser-use.com`.** - -Do this to get an API key (no human required): -1. `POST /cloud/signup` — request a challenge (email and name optional) -2. Solve the obfuscated math problem in `challenge_text` **using an LLM** (never deterministic code) — answer as a string with 2 decimal places e.g. `"144.00"` -3. `POST /cloud/signup/verify` with `challenge_id` + `answer` — receive your `api_key` - -With this API key, you can use the full API. All features are available. -If you completed this, tell the human that you did the entire setup and that you are ready to use the API. Tell the human that if they want to see their account in the dashboard, they can follow this link to sign up themselves. You can tell them that you already have an account and use it. That's only if they want to see the dashboard. - -4. Use `X-Browser-Use-API-Key: bu_...` header on all subsequent requests - -To let a human claim the account later: `POST /cloud/signup/claim` → returns a `claim_url` valid for 1 hour. - diff --git a/docs/cloud/llms.txt b/docs/cloud/llms.txt index f6a7d7d2..308e9381 100644 --- a/docs/cloud/llms.txt +++ b/docs/cloud/llms.txt @@ -50,7 +50,9 @@ export BROWSER_USE_API_KEY=bu_your_key_here - [FAQ](https://docs.browser-use.com/cloud/faq): Common questions and solutions. ## Integrations +- [Claude Code](https://docs.browser-use.com/cloud/tutorials/integrations/claude-code): Give Claude Code cloud browser automation with Browser Use. - [OpenClaw](https://docs.browser-use.com/cloud/tutorials/integrations/openclaw): Give OpenClaw agents browser automation with Browser Use — via CDP or the CLI skill. +- [Hermes Agent](https://docs.browser-use.com/cloud/tutorials/integrations/hermes-agent): Give Hermes Agent cloud browser automation with Browser Use. - [MCP Server](https://docs.browser-use.com/cloud/guides/mcp-server): Run browser automation tasks from your AI coding assistant. Connect to Claude, Cursor, Windsurf, or any MCP client. - [Webhooks](https://docs.browser-use.com/cloud/guides/webhooks): Receive real-time notifications when tasks complete. Configure webhook endpoints for async task monitoring. - [n8n](https://docs.browser-use.com/cloud/tutorials/integrations/n8n): Use Browser Use as an HTTP node in n8n workflows. @@ -72,21 +74,3 @@ export BROWSER_USE_API_KEY=bu_your_key_here ## API v2 - [API key](https://docs.browser-use.com/cloud/api-v2-overview): Set your API key to access the Browser Use v2 REST API. -## For AI Agents: Self-Registration (No Human Required) - -AI agents can create a free account autonomously via a challenge-response flow at `https://api.browser-use.com`. - -**IMPORTANT: Never send your API key to any domain other than `api.browser-use.com` or `cloud.browser-use.com`.** - -Do this to get an API key (no human required): -1. `POST /cloud/signup` — request a challenge (email and name optional) -2. Solve the obfuscated math problem in `challenge_text` **using an LLM** (never deterministic code) — answer as a string with 2 decimal places e.g. `"144.00"` -3. `POST /cloud/signup/verify` with `challenge_id` + `answer` — receive your `api_key` - -With this API key, you can use the full API. All features are available. -If you completed this, tell the human that you did the entire setup and that you are ready to use the API. Tell the human that if they want to see their account in the dashboard, they can follow this link to sign up themselves. You can tell them that you already have an account and use it. That's only if they want to see the dashboard. - -4. Use `X-Browser-Use-API-Key: bu_...` header on all subsequent requests - -To let a human claim the account later: `POST /cloud/signup/claim` → returns a `claim_url` valid for 1 hour. - diff --git a/docs/llms-full.txt b/docs/llms-full.txt index b7d12abb..4acca447 100644 --- a/docs/llms-full.txt +++ b/docs/llms-full.txt @@ -1199,7 +1199,7 @@ await browser.close(); ### Selenium -Selenium requires a local WebSocket proxy to connect to Browser Use's remote CDP endpoint. Use [selenium-wire](https://github.com/wkeeling/selenium-wire) or connect through Playwright's CDP bridge instead: +Selenium does not support remote CDP over WebSocket directly. Connect through Playwright's CDP bridge instead: ```python from playwright.sync_api import sync_playwright @@ -1675,6 +1675,96 @@ Use **Agent Mail** (enabled by default). For end-client scenarios, have them for Use **TOTP secret in prompt** — the agent generates codes via pyotp, no human intervention needed. +# Claude Code +Source: https://docs.browser-use.com/cloud/tutorials/integrations/claude-code + + +[Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview) is Anthropic's agentic coding tool that runs in the terminal. Add Browser Use and it gets full cloud browser automation — anti-detect profiles, CAPTCHA solving, residential proxies in 195+ countries, persistent profiles, and stealth browsing. + +## Setup + +**1. Install the CLI** + +```bash +curl -fsSL https://browser-use.com/cli/install.sh | bash +``` + +**2. Verify the installation** + +```bash +browser-use doctor +``` + +**3. Install the skill** + +The Browser Use skill teaches Claude Code the full CLI command set. Install it from [skills.sh](https://skills.sh/browser-use/browser-use/browser-use): + +```bash +npx skills add https://github.com/browser-use/browser-use --skill browser-use +``` + +**4. Connect to cloud browsers** + +Sign up at [cloud.browser-use.com](https://cloud.browser-use.com) and connect: + +```bash +browser-use cloud login +browser-use cloud connect +``` + +Or let Claude Code provision a free API key itself — see [Agent Self-Registration](#agent-self-registration) below. + +**5. Use it** + +Claude Code uses its bash tool to run CLI commands directly: + +``` +> Use browser-use to open github.com/trending and summarize the top repos +``` + +For the complete CLI reference and advanced features like sessions, tunnels, and Python execution, see the [Browser Use CLI docs](https://docs.browser-use.com/open-source/browser-use-cli). + +## Agent Self-Registration + +Claude Code can provision its own Browser Use API key autonomously — no human interaction needed. The free tier includes unlimited browser hours, free proxies in 195+ countries, persistent browser profiles, CAPTCHA solving, and stealth browsing at zero cost. + +Install the Browser Use CLI: + +```bash +curl -fsSL https://browser-use.com/cli/install.sh | bash +``` + +Claude Code runs two CLI commands using its bash tool: + +**1. Request a challenge** + +```bash +browser-use cloud signup +``` + +Returns a `Challenge ID` and `Challenge` — an obfuscated math problem that requires LLM reasoning to solve. + +**2. Solve the challenge and verify** + +Claude Code solves the math problem, then verifies with the answer (a string with 2 decimal places, e.g. `"13.60"`): + +```bash +browser-use cloud signup --verify +``` + +The CLI saves the API key to `~/.browser-use/config.json` automatically. No extra configuration needed — Claude Code can start using cloud browser commands immediately. + +### Claim the account (optional) + +If the human wants to see the account in the dashboard later: + +```bash +browser-use cloud signup --claim +``` + +Returns a claim URL valid for 1 hour. The human can follow the link to sign up and claim the agent-created account. + + # OpenClaw Source: https://docs.browser-use.com/cloud/tutorials/integrations/openclaw @@ -1771,6 +1861,156 @@ Once the skill is loaded, OpenClaw agents can use the `browser-use` CLI to navig For the complete CLI reference and advanced features like cloud browsers, tunnels, sessions, and Python execution, see the [README](https://github.com/browser-use/browser-use/blob/main/browser_use/skill_cli/README.md) and the [Browser Use docs](https://docs.browser-use.com). +# Hermes Agent +Source: https://docs.browser-use.com/cloud/tutorials/integrations/hermes-agent + + +[Hermes Agent](https://github.com/nousresearch/hermes-agent) is an open-source, self-improving AI agent by Nous Research. It has built-in browser automation tools that work with local Chromium out of the box. Add Browser Use and those tools run on cloud browsers with anti-detect profiles, residential proxies in 195+ countries, and stealth browsing. + +Two ways to set it up: configure Browser Use as Hermes's cloud browser backend, or install the Browser Use CLI and let Hermes drive it directly. + +## Option 1: Cloud Browser Backend + +Hermes has built-in browser tools (`browser_navigate`, `browser_click`, `browser_snapshot`, etc.) that default to local Chromium. Point them at Browser Use cloud browsers instead — no extra dependencies, same Hermes experience. + +### Setup + +**1. Get your API key** + +Sign up at [cloud.browser-use.com](https://cloud.browser-use.com) and copy your API key from [Settings → API Keys](https://cloud.browser-use.com/settings?tab=api-keys&new=1). + +Or let the agent provision one itself — see [Agent Self-Registration](#agent-self-registration) below. + +**2. Configure Hermes** + +Run the setup wizard: + +```bash +hermes setup tools +``` + +Select **Browser Automation**, then **Browser Use**, and paste your API key when prompted. + +Or configure manually — add your key to `~/.hermes/.env`: + +```bash +BROWSER_USE_API_KEY=your_key_here +``` + +And set the provider in `~/.hermes/config.yaml`: + +```yaml +browser: + cloud_provider: browser-use +``` + +**3. Use it** + +Just chat with Hermes — any browsing tasks automatically route through Browser Use cloud browsers: + +``` +> Find the top trending repositories on GitHub today and summarize them +``` + +## Option 2: Browser Use CLI + +The [Browser Use CLI](https://docs.browser-use.com/open-source/browser-use-cli) is a standalone tool that gives Hermes browser automation through terminal commands. Hermes drives the browser directly via its terminal tool — giving you shared browser sessions across agents, persistent logins and cookies, profile management, and access to Browser Use's full command surface. + +### Setup + +**1. Install the CLI** + +```bash +curl -fsSL https://browser-use.com/cli/install.sh | bash +``` + +**2. Verify the installation** + +```bash +browser-use doctor +``` + +**3. Install the skill** + +The Browser Use skill teaches Hermes the full CLI command set. Install it from [skills.sh](https://skills.sh/browser-use/browser-use/browser-use): + +```bash +hermes skills install skills-sh/browser-use/browser-use/browser-use +``` + +Or ask Hermes directly in chat to install it. + +**4. Connect to cloud browsers** + +Log in with your API key: + +```bash +browser-use cloud login +``` + +Or let the agent provision one itself — see [Agent Self-Registration](#agent-self-registration) below. + +**5. Use it** + +Once the skill is loaded, Hermes can drive the browser through CLI commands via its terminal tool: + +``` +> Use browser-use to open github.com/trending and summarize the top repos +``` + +For the complete CLI reference and advanced features like cloud browsers, tunnels, sessions, and Python execution, see the [Browser Use CLI docs](https://docs.browser-use.com/open-source/browser-use-cli). + +## Agent Self-Registration + +Hermes can provision its own Browser Use API key autonomously — no human interaction needed. This works with both options above. + +Install the Browser Use CLI: + +```bash +curl -fsSL https://browser-use.com/cli/install.sh | bash +``` + +The agent runs three CLI commands using its terminal tool: + +**1. Request a challenge** + +```bash +browser-use cloud signup +``` + +Returns a `Challenge ID` and `Challenge` — an obfuscated math problem that requires LLM reasoning to solve. + +**2. Solve the challenge and verify** + +The agent solves the math problem, then verifies with the answer (a string with 2 decimal places, e.g. `"13.60"`): + +```bash +browser-use cloud signup --verify +``` + +The CLI saves the API key to `~/.browser-use/config.json` automatically. + +**3. Copy the key to Hermes config** + +For the cloud browser backend (Option 1): + +```bash +hermes config set BROWSER_USE_API_KEY $(browser-use config get api_key) +``` + +For CLI mode (Option 2), the key is already saved — no extra step needed. + +### Claim the account (optional) + +If the human wants to see the account in the dashboard later: + +```bash +browser-use cloud signup --claim +``` + +Returns a claim URL valid for 1 hour. The human can follow the link to sign up and claim the agent-created account. + + # MCP Server Source: https://docs.browser-use.com/cloud/guides/mcp-server @@ -1888,6 +2128,8 @@ Every webhook request includes two headers: The signature is computed over `{timestamp}.{body}`, where `body` is the JSON-serialized payload with keys sorted alphabetically and no extra whitespace. Verify it to ensure the request is authentic and to prevent replay attacks. + The Python example below uses an f-string with nested quotes that requires **Python 3.12+**. On older versions, extract `separators=(',', ':')` into a variable first. + ```python Python import hashlib import hmac diff --git a/docs/llms.txt b/docs/llms.txt index 44069870..308e9381 100644 --- a/docs/llms.txt +++ b/docs/llms.txt @@ -50,7 +50,9 @@ export BROWSER_USE_API_KEY=bu_your_key_here - [FAQ](https://docs.browser-use.com/cloud/faq): Common questions and solutions. ## Integrations +- [Claude Code](https://docs.browser-use.com/cloud/tutorials/integrations/claude-code): Give Claude Code cloud browser automation with Browser Use. - [OpenClaw](https://docs.browser-use.com/cloud/tutorials/integrations/openclaw): Give OpenClaw agents browser automation with Browser Use — via CDP or the CLI skill. +- [Hermes Agent](https://docs.browser-use.com/cloud/tutorials/integrations/hermes-agent): Give Hermes Agent cloud browser automation with Browser Use. - [MCP Server](https://docs.browser-use.com/cloud/guides/mcp-server): Run browser automation tasks from your AI coding assistant. Connect to Claude, Cursor, Windsurf, or any MCP client. - [Webhooks](https://docs.browser-use.com/cloud/guides/webhooks): Receive real-time notifications when tasks complete. Configure webhook endpoints for async task monitoring. - [n8n](https://docs.browser-use.com/cloud/tutorials/integrations/n8n): Use Browser Use as an HTTP node in n8n workflows. diff --git a/docs/open-source/llms-full.txt b/docs/open-source/llms-full.txt index 45fb223b..6666fc1d 100644 --- a/docs/open-source/llms-full.txt +++ b/docs/open-source/llms-full.txt @@ -184,6 +184,10 @@ Source: https://docs.browser-use.com/open-source/vibecoding Source: https://docs.browser-use.com/open-source/supported-models +Browser Use natively supports 15+ LLM providers. Most providers accept any model string. Check each provider's docs to see which models are available. + +> **Which model should I use?** See our [benchmark results and recommendations](https://browser-use.com/posts/what-model-to-use) for detailed comparisons across real-world browser tasks. + ### Browser Use [example](https://github.com/browser-use/browser-use/blob/main/examples/models/browser_use_llm.py) `ChatBrowserUse()` is our optimized in-house model, matching the accuracy of top models while completing tasks **3-5x** faster. [See our blog post→](https://browser-use.com/posts/speed-matters) @@ -238,7 +242,9 @@ ChatBrowserUse offers competitive pricing per 1 million tokens: | Output tokens | $3.50 | -### Google Gemini [example](https://github.com/browser-use/browser-use/blob/main/examples/models/gemini.py) +### Google Gemini [example](https://github.com/browser-use/browser-use/blob/main/examples/models/gemini.py) {#google-gemini} + +[Available models](https://ai.google.dev/api/models). Also supports Gemma models and Vertex AI via `ChatGoogle(model="...", vertexai=True)`. `GEMINI_API_KEY` is deprecated and should be named `GOOGLE_API_KEY` as of 2025-05. @@ -250,7 +256,7 @@ from dotenv import load_dotenv load_dotenv() # Initialize the model -llm = ChatGoogle(model='gemini-flash-latest') +llm = ChatGoogle(model='gemini-2.5-flash') # Create agent with the model agent = Agent( @@ -266,16 +272,16 @@ GOOGLE_API_KEY= ``` -### OpenAI [example](https://github.com/browser-use/browser-use/blob/main/examples/models/gpt-4.1.py) +### OpenAI [example](https://github.com/browser-use/browser-use/blob/main/examples/models/gpt-5-mini.py) -`O3` model is recommended for best accuracy. +[Available models](https://platform.openai.com/docs/models) ```python from browser_use import Agent, ChatOpenAI # Initialize the model llm = ChatOpenAI( -model="o3", +model="gpt-5", ) # Create agent with the model @@ -297,12 +303,14 @@ OPENAI_API_KEY= ### Anthropic [example](https://github.com/browser-use/browser-use/blob/main/examples/models/claude-4-sonnet.py) +[Available models](https://docs.anthropic.com/en/docs/about-claude/models). Coordinate clicking is automatically enabled for `claude-sonnet-4-*` and `claude-opus-4-*` models. + ```python from browser_use import Agent, ChatAnthropic # Initialize the model llm = ChatAnthropic( -model="claude-sonnet-4-0", +model="claude-sonnet-4-6", ) # Create agent with the model @@ -318,7 +326,9 @@ And add the variable: ANTHROPIC_API_KEY= ``` -### Azure OpenAI [example](https://github.com/browser-use/browser-use/blob/main/examples/models/azure_openai.py) +### Azure OpenAI [example](https://github.com/browser-use/browser-use/blob/main/examples/models/azure_openai.py) {#azure-openai} + +[Available models](https://learn.microsoft.com/en-us/azure/foundry/foundry-models/concepts/models-sold-directly-by-azure) ```python from browser_use import Agent, ChatAzureOpenAI @@ -381,9 +391,9 @@ The `use_responses_api` parameter accepts: - `True`: Force use of the Responses API - `False`: Force use of the Chat Completions API -### AWS Bedrock [example](https://github.com/browser-use/browser-use/blob/main/examples/models/aws.py) +### AWS Bedrock [example](https://github.com/browser-use/browser-use/blob/main/examples/models/aws.py) {#aws-bedrock} -AWS Bedrock provides access to multiple model providers through a single API. We support both a general AWS Bedrock client and provider-specific convenience classes. +[Available models](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html). AWS Bedrock provides access to multiple model providers through a single API. We support both a general AWS Bedrock client and provider-specific convenience classes. Install with `pip install "browser-use[aws]"`. #### General AWS Bedrock (supports all providers) @@ -441,7 +451,9 @@ You can also use AWS profiles or IAM roles instead of environment variables. The - Session tokens for temporary credentials - AWS SSO authentication (`aws_sso_auth=True`) -## Groq [example](https://github.com/browser-use/browser-use/blob/main/examples/models/llama4-groq.py) +## Groq [example](https://github.com/browser-use/browser-use/blob/main/examples/models/llama4-groq.py) {#groq} + +[Available models](https://console.groq.com/docs/models) ```python from browser_use import Agent, ChatGroq @@ -460,9 +472,9 @@ Required environment variables: GROQ_API_KEY= ``` -## Oracle Cloud Infrastructure (OCI) [example](https://github.com/browser-use/browser-use/blob/main/examples/models/oci_models.py) +## Oracle Cloud Infrastructure (OCI) [example](https://github.com/browser-use/browser-use/blob/main/examples/models/oci_models.py) {#oci} -OCI provides access to various generative AI models including Meta Llama, Cohere, and other providers through their Generative AI service. +[Available models](https://docs.oracle.com/en-us/iaas/Content/generative-ai/imported-models.htm). OCI provides access to various generative AI models including Meta Llama, Cohere, and other providers through their Generative AI service. Install with `pip install "browser-use[oci]"`. ```python from browser_use import Agent, ChatOCIRaw @@ -499,6 +511,8 @@ Authentication methods supported: ## Ollama +[Available models](https://ollama.com/library). + 1. Install Ollama: https://github.com/ollama/ollama 2. Run `ollama serve` to start the server 3. In a new terminal, install the model you want to use: `ollama pull llama3.1:8b` (this has 4.9GB) @@ -572,11 +586,9 @@ Required environment variables: MODELSCOPE_API_KEY= ``` -### Vercel AI Gateway [example](https://github.com/browser-use/browser-use/blob/main/examples/models/vercel_ai_gateway.py) - -Vercel AI Gateway provides an OpenAI-compatible API endpoint that acts as a proxy to various AI providers, with features like rate limiting, caching, and monitoring. +### Vercel AI Gateway [example](https://github.com/browser-use/browser-use/blob/main/examples/models/vercel_ai_gateway.py) {#vercel} -To see all available models, visit: https://ai-gateway.vercel.sh/v1/models +[Available models](https://vercel.com/ai-gateway/models). Vercel AI Gateway provides an OpenAI-compatible API endpoint that acts as a proxy to various AI providers, with features like rate limiting, caching, and monitoring. ```python from browser_use import Agent, ChatVercel @@ -620,14 +632,112 @@ Required environment variables: VERCEL_API_KEY= ``` -## Other models (DeepSeek, Novita, X...) +## DeepSeek [example](https://github.com/browser-use/browser-use/blob/main/examples/models/deepseek-chat.py) {#deepseek} + +[Available models](https://api-docs.deepseek.com/quick_start/pricing) + +```python +from browser_use import Agent, ChatDeepSeek + +llm = ChatDeepSeek(model="deepseek-chat") + +agent = Agent( +task="Your task here", +llm=llm +) +``` + +Required environment variables: + +```bash .env +DEEPSEEK_API_KEY= +``` + +## Mistral [example](https://github.com/browser-use/browser-use/blob/main/examples/models/mistral.py) {#mistral} -We support all other models that can be called via OpenAI compatible API. We are open to PRs for more providers. +[Available models](https://docs.mistral.ai/getting-started/models/models_overview/) + +```python +from browser_use import Agent, ChatMistral + +llm = ChatMistral(model="mistral-large-latest") + +agent = Agent( +task="Your task here", +llm=llm +) +``` + +Required environment variables: + +```bash .env +MISTRAL_API_KEY= +``` + +## Cerebras [example](https://github.com/browser-use/browser-use/blob/main/examples/models/cerebras_example.py) {#cerebras} + +[Available models](https://inference-docs.cerebras.ai/models/overview) + +```python +from browser_use import Agent, ChatCerebras + +llm = ChatCerebras(model="llama3.3-70b") + +agent = Agent( +task="Your task here", +llm=llm +) +``` + +Required environment variables: + +```bash .env +CEREBRAS_API_KEY= +``` + +## OpenRouter [example](https://github.com/browser-use/browser-use/blob/main/examples/models/openrouter.py) {#openrouter} + +[Available models](https://openrouter.ai/models). Access 300+ models from any provider through a single API. + +```python +from browser_use import Agent, ChatOpenRouter + +llm = ChatOpenRouter(model="anthropic/claude-sonnet-4-6") + +agent = Agent( +task="Your task here", +llm=llm +) +``` + +Required environment variables: + +```bash .env +OPENROUTER_API_KEY= +``` + +## LiteLLM {#litellm} + +Requires separate install (`pip install litellm`). Supports any [LiteLLM model string](https://docs.litellm.ai/docs/providers) — useful when you need a provider not covered by the native integrations above. + +```python +from browser_use import Agent +from browser_use.llm.litellm import ChatLiteLLM + +llm = ChatLiteLLM(model="openai/gpt-5") + +agent = Agent( +task="Your task here", +llm=llm +) +``` + +## Other OpenAI-Compatible Providers + +Any provider with an OpenAI-compatible endpoint works via `ChatOpenAI` with a custom `base_url`: **Examples available:** -- [DeepSeek](https://github.com/browser-use/browser-use/blob/main/examples/models/deepseek-chat.py) - [Novita](https://github.com/browser-use/browser-use/blob/main/examples/models/novita.py) -- [OpenRouter](https://github.com/browser-use/browser-use/blob/main/examples/models/openrouter.py) # Browser Use CLI @@ -732,14 +842,14 @@ browser-use open https://example.com # Visible browser window browser-use --headed open https://example.com -# Use your real Chrome with Default profile (with existing logins/cookies) -browser-use --profile "Default" open https://gmail.com +# Connect to user's Chrome (preserves logins/cookies) +browser-use connect # Use a specific Chrome profile -browser-use --profile "Profile 1" open https://gmail.com +browser-use --profile "Default" open https://gmail.com -# Auto-discover and connect to running Chrome -browser-use --connect open https://example.com +# Cloud browser (zero-config, requires API key) +browser-use cloud connect # Connect to an existing browser via CDP URL browser-use --cdp-url http://localhost:9222 open https://example.com @@ -748,6 +858,8 @@ browser-use --cdp-url http://localhost:9222 open https://example.com browser-use --cdp-url ws://localhost:9222/devtools/browser/... state ``` +After `connect` or `cloud connect`, all subsequent commands go to that browser — no extra flags needed. + ## All Commands ### Navigation @@ -784,9 +896,10 @@ browser-use --cdp-url ws://localhost:9222/devtools/browser/... state ### Tabs | Command | Description | |---------|-------------| -| `switch ` | Switch to tab by index | -| `close-tab` | Close current tab | -| `close-tab ` | Close specific tab | +| `tab list` | List all tabs | +| `tab new [url]` | Open new tab (blank or with URL) | +| `tab switch ` | Switch to tab by index | +| `tab close [index...]` | Close tab(s) (current if no index) | ### Cookies | Command | Description | @@ -843,12 +956,10 @@ Generic REST passthrough to the Browser-Use Cloud API, plus cloud browser provis | Command | Description | |---------|-------------| -| `cloud connect` | Provision cloud browser and connect | -| `cloud connect --timeout 120` | Cloud browser with custom timeout | -| `cloud connect --proxy-country US` | Cloud browser with proxy | -| `cloud connect --profile-id ` | Cloud browser with profile | +| `cloud connect` | Provision cloud browser and connect (zero-config, auto-manages profile) | | `cloud login ` | Save API key | | `cloud logout` | Remove API key | +| `cloud close` | Disconnect and stop cloud browser | | `cloud v2 GET ` | GET request to API v2 | | `cloud v2 POST ''` | POST request to API v2 | | `cloud v3 POST ''` | POST request to API v3 | @@ -856,6 +967,8 @@ Generic REST passthrough to the Browser-Use Cloud API, plus cloud browser provis | `cloud v2 --help` | Show API v2 endpoints (from OpenAPI spec) | | `cloud v3 --help` | Show API v3 endpoints | +`cloud connect` provisions a cloud browser with a persistent profile (auto-created on first use), connects via CDP, and prints a live URL. For custom browser settings (proxy, timeout, specific profile), use `cloud v2 POST /browsers` directly. + ```bash # Save API key (or set BROWSER_USE_API_KEY env var) browser-use cloud login sk-abc123... @@ -953,12 +1066,25 @@ BROWSER_USE_SESSION=work browser-use state |--------|-------------| | `--headed` | Show browser window | | `--profile [NAME]` | Use real Chrome (bare `--profile` uses "Default") | -| `--connect` | Auto-discover and connect to running Chrome via CDP | +| `--connect` | Auto-discover and connect to running Chrome via CDP (prefer `connect` command instead) | | `--cdp-url ` | Connect to existing browser via CDP URL (`http://` or `ws://`) | | `--session NAME` | Target a named session (default: "default", env: `BROWSER_USE_SESSION`) | | `--json` | Output as JSON | | `--mcp` | Run as MCP server via stdin/stdout | +## Configuration + +```bash +browser-use config list # Show all config values +browser-use config set cloud_connect_proxy jp # Set a value +browser-use config get cloud_connect_proxy # Get a value +browser-use config unset cloud_connect_timeout # Remove a value +browser-use doctor # Shows config + diagnostics +browser-use setup # Interactive post-install setup +``` + +Config stored in `~/.browser-use/config.json`. + ## Examples ### Fill a Form @@ -1022,6 +1148,7 @@ All CLI-managed files live under `~/.browser-use/` (override with `BROWSER_USE_H ├── tunnels/ │ ├── {port}.json # Tunnel metadata │ └── {port}.log # Tunnel logs +├── default.state.json # Daemon lifecycle state (phase, PID, config) ├── default.sock # Daemon socket (ephemeral) ├── default.pid # Daemon PID (ephemeral) └── cli.log # Daemon log From ed01472922f23d67ede197019c7251a67816c259 Mon Sep 17 00:00:00 2001 From: MagMueller Date: Fri, 10 Apr 2026 19:28:31 -0700 Subject: [PATCH 2/2] =?UTF-8?q?fix:=20address=20cubic=20review=20=E2=80=94?= =?UTF-8?q?=20remove=20incorrect=20webhook=20Python=20version=20note,=20re?= =?UTF-8?q?vert=20llms.txt=20regen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - webhooks.mdx: the f-string uses different outer/inner quote chars (`"` vs `'`), so it works on Python 3.9+ — the 3.12+ claim was wrong. Remove the Note. - Revert llms.txt / llms-full.txt regeneration. Regenerating surfaced a pre-existing duplicate `## Agent Self-Registration` heading in two tutorial MDX files (hermes-agent and claude-code), which made the `#agent-self-registration` anchor ambiguous. That's out of scope for this PR; the docs team can regenerate llms.txt in a follow-up. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/cloud/guides/webhooks.mdx | 4 - docs/cloud/llms-full.txt | 263 +++------------------------------ docs/cloud/llms.txt | 20 ++- docs/llms-full.txt | 244 +----------------------------- docs/llms.txt | 2 - docs/open-source/llms-full.txt | 193 +++++------------------- 6 files changed, 72 insertions(+), 654 deletions(-) diff --git a/docs/cloud/guides/webhooks.mdx b/docs/cloud/guides/webhooks.mdx index 3ae28819..212a7cca 100644 --- a/docs/cloud/guides/webhooks.mdx +++ b/docs/cloud/guides/webhooks.mdx @@ -36,10 +36,6 @@ Every webhook request includes two headers: The signature is computed over `{timestamp}.{body}`, where `body` is the JSON-serialized payload with keys sorted alphabetically and no extra whitespace. Verify it to ensure the request is authentic and to prevent replay attacks. - - The Python example below uses an f-string with nested quotes that requires **Python 3.12+**. On older versions, extract `separators=(',', ':')` into a variable first. - - ```python Python import hashlib diff --git a/docs/cloud/llms-full.txt b/docs/cloud/llms-full.txt index 4acca447..68954b88 100644 --- a/docs/cloud/llms-full.txt +++ b/docs/cloud/llms-full.txt @@ -1199,7 +1199,7 @@ await browser.close(); ### Selenium -Selenium does not support remote CDP over WebSocket directly. Connect through Playwright's CDP bridge instead: +Selenium requires a local WebSocket proxy to connect to Browser Use's remote CDP endpoint. Use [selenium-wire](https://github.com/wkeeling/selenium-wire) or connect through Playwright's CDP bridge instead: ```python from playwright.sync_api import sync_playwright @@ -1675,96 +1675,6 @@ Use **Agent Mail** (enabled by default). For end-client scenarios, have them for Use **TOTP secret in prompt** — the agent generates codes via pyotp, no human intervention needed. -# Claude Code -Source: https://docs.browser-use.com/cloud/tutorials/integrations/claude-code - - -[Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview) is Anthropic's agentic coding tool that runs in the terminal. Add Browser Use and it gets full cloud browser automation — anti-detect profiles, CAPTCHA solving, residential proxies in 195+ countries, persistent profiles, and stealth browsing. - -## Setup - -**1. Install the CLI** - -```bash -curl -fsSL https://browser-use.com/cli/install.sh | bash -``` - -**2. Verify the installation** - -```bash -browser-use doctor -``` - -**3. Install the skill** - -The Browser Use skill teaches Claude Code the full CLI command set. Install it from [skills.sh](https://skills.sh/browser-use/browser-use/browser-use): - -```bash -npx skills add https://github.com/browser-use/browser-use --skill browser-use -``` - -**4. Connect to cloud browsers** - -Sign up at [cloud.browser-use.com](https://cloud.browser-use.com) and connect: - -```bash -browser-use cloud login -browser-use cloud connect -``` - -Or let Claude Code provision a free API key itself — see [Agent Self-Registration](#agent-self-registration) below. - -**5. Use it** - -Claude Code uses its bash tool to run CLI commands directly: - -``` -> Use browser-use to open github.com/trending and summarize the top repos -``` - -For the complete CLI reference and advanced features like sessions, tunnels, and Python execution, see the [Browser Use CLI docs](https://docs.browser-use.com/open-source/browser-use-cli). - -## Agent Self-Registration - -Claude Code can provision its own Browser Use API key autonomously — no human interaction needed. The free tier includes unlimited browser hours, free proxies in 195+ countries, persistent browser profiles, CAPTCHA solving, and stealth browsing at zero cost. - -Install the Browser Use CLI: - -```bash -curl -fsSL https://browser-use.com/cli/install.sh | bash -``` - -Claude Code runs two CLI commands using its bash tool: - -**1. Request a challenge** - -```bash -browser-use cloud signup -``` - -Returns a `Challenge ID` and `Challenge` — an obfuscated math problem that requires LLM reasoning to solve. - -**2. Solve the challenge and verify** - -Claude Code solves the math problem, then verifies with the answer (a string with 2 decimal places, e.g. `"13.60"`): - -```bash -browser-use cloud signup --verify -``` - -The CLI saves the API key to `~/.browser-use/config.json` automatically. No extra configuration needed — Claude Code can start using cloud browser commands immediately. - -### Claim the account (optional) - -If the human wants to see the account in the dashboard later: - -```bash -browser-use cloud signup --claim -``` - -Returns a claim URL valid for 1 hour. The human can follow the link to sign up and claim the agent-created account. - - # OpenClaw Source: https://docs.browser-use.com/cloud/tutorials/integrations/openclaw @@ -1861,156 +1771,6 @@ Once the skill is loaded, OpenClaw agents can use the `browser-use` CLI to navig For the complete CLI reference and advanced features like cloud browsers, tunnels, sessions, and Python execution, see the [README](https://github.com/browser-use/browser-use/blob/main/browser_use/skill_cli/README.md) and the [Browser Use docs](https://docs.browser-use.com). -# Hermes Agent -Source: https://docs.browser-use.com/cloud/tutorials/integrations/hermes-agent - - -[Hermes Agent](https://github.com/nousresearch/hermes-agent) is an open-source, self-improving AI agent by Nous Research. It has built-in browser automation tools that work with local Chromium out of the box. Add Browser Use and those tools run on cloud browsers with anti-detect profiles, residential proxies in 195+ countries, and stealth browsing. - -Two ways to set it up: configure Browser Use as Hermes's cloud browser backend, or install the Browser Use CLI and let Hermes drive it directly. - -## Option 1: Cloud Browser Backend - -Hermes has built-in browser tools (`browser_navigate`, `browser_click`, `browser_snapshot`, etc.) that default to local Chromium. Point them at Browser Use cloud browsers instead — no extra dependencies, same Hermes experience. - -### Setup - -**1. Get your API key** - -Sign up at [cloud.browser-use.com](https://cloud.browser-use.com) and copy your API key from [Settings → API Keys](https://cloud.browser-use.com/settings?tab=api-keys&new=1). - -Or let the agent provision one itself — see [Agent Self-Registration](#agent-self-registration) below. - -**2. Configure Hermes** - -Run the setup wizard: - -```bash -hermes setup tools -``` - -Select **Browser Automation**, then **Browser Use**, and paste your API key when prompted. - -Or configure manually — add your key to `~/.hermes/.env`: - -```bash -BROWSER_USE_API_KEY=your_key_here -``` - -And set the provider in `~/.hermes/config.yaml`: - -```yaml -browser: - cloud_provider: browser-use -``` - -**3. Use it** - -Just chat with Hermes — any browsing tasks automatically route through Browser Use cloud browsers: - -``` -> Find the top trending repositories on GitHub today and summarize them -``` - -## Option 2: Browser Use CLI - -The [Browser Use CLI](https://docs.browser-use.com/open-source/browser-use-cli) is a standalone tool that gives Hermes browser automation through terminal commands. Hermes drives the browser directly via its terminal tool — giving you shared browser sessions across agents, persistent logins and cookies, profile management, and access to Browser Use's full command surface. - -### Setup - -**1. Install the CLI** - -```bash -curl -fsSL https://browser-use.com/cli/install.sh | bash -``` - -**2. Verify the installation** - -```bash -browser-use doctor -``` - -**3. Install the skill** - -The Browser Use skill teaches Hermes the full CLI command set. Install it from [skills.sh](https://skills.sh/browser-use/browser-use/browser-use): - -```bash -hermes skills install skills-sh/browser-use/browser-use/browser-use -``` - -Or ask Hermes directly in chat to install it. - -**4. Connect to cloud browsers** - -Log in with your API key: - -```bash -browser-use cloud login -``` - -Or let the agent provision one itself — see [Agent Self-Registration](#agent-self-registration) below. - -**5. Use it** - -Once the skill is loaded, Hermes can drive the browser through CLI commands via its terminal tool: - -``` -> Use browser-use to open github.com/trending and summarize the top repos -``` - -For the complete CLI reference and advanced features like cloud browsers, tunnels, sessions, and Python execution, see the [Browser Use CLI docs](https://docs.browser-use.com/open-source/browser-use-cli). - -## Agent Self-Registration - -Hermes can provision its own Browser Use API key autonomously — no human interaction needed. This works with both options above. - -Install the Browser Use CLI: - -```bash -curl -fsSL https://browser-use.com/cli/install.sh | bash -``` - -The agent runs three CLI commands using its terminal tool: - -**1. Request a challenge** - -```bash -browser-use cloud signup -``` - -Returns a `Challenge ID` and `Challenge` — an obfuscated math problem that requires LLM reasoning to solve. - -**2. Solve the challenge and verify** - -The agent solves the math problem, then verifies with the answer (a string with 2 decimal places, e.g. `"13.60"`): - -```bash -browser-use cloud signup --verify -``` - -The CLI saves the API key to `~/.browser-use/config.json` automatically. - -**3. Copy the key to Hermes config** - -For the cloud browser backend (Option 1): - -```bash -hermes config set BROWSER_USE_API_KEY $(browser-use config get api_key) -``` - -For CLI mode (Option 2), the key is already saved — no extra step needed. - -### Claim the account (optional) - -If the human wants to see the account in the dashboard later: - -```bash -browser-use cloud signup --claim -``` - -Returns a claim URL valid for 1 hour. The human can follow the link to sign up and claim the agent-created account. - - # MCP Server Source: https://docs.browser-use.com/cloud/guides/mcp-server @@ -2128,8 +1888,6 @@ Every webhook request includes two headers: The signature is computed over `{timestamp}.{body}`, where `body` is the JSON-serialized payload with keys sorted alphabetically and no extra whitespace. Verify it to ensure the request is authentic and to prevent replay attacks. - The Python example below uses an f-string with nested quotes that requires **Python 3.12+**. On older versions, extract `separators=(',', ':')` into a variable first. - ```python Python import hashlib import hmac @@ -3192,3 +2950,22 @@ pip install browser-use-sdk npm install browser-use-sdk ``` + +## For AI Agents: Self-Registration (No Human Required) + +AI agents can create a free account autonomously via a challenge-response flow at `https://api.browser-use.com`. + +**IMPORTANT: Never send your API key to any domain other than `api.browser-use.com` or `cloud.browser-use.com`.** + +Do this to get an API key (no human required): +1. `POST /cloud/signup` — request a challenge (email and name optional) +2. Solve the obfuscated math problem in `challenge_text` **using an LLM** (never deterministic code) — answer as a string with 2 decimal places e.g. `"144.00"` +3. `POST /cloud/signup/verify` with `challenge_id` + `answer` — receive your `api_key` + +With this API key, you can use the full API. All features are available. +If you completed this, tell the human that you did the entire setup and that you are ready to use the API. Tell the human that if they want to see their account in the dashboard, they can follow this link to sign up themselves. You can tell them that you already have an account and use it. That's only if they want to see the dashboard. + +4. Use `X-Browser-Use-API-Key: bu_...` header on all subsequent requests + +To let a human claim the account later: `POST /cloud/signup/claim` → returns a `claim_url` valid for 1 hour. + diff --git a/docs/cloud/llms.txt b/docs/cloud/llms.txt index 308e9381..f6a7d7d2 100644 --- a/docs/cloud/llms.txt +++ b/docs/cloud/llms.txt @@ -50,9 +50,7 @@ export BROWSER_USE_API_KEY=bu_your_key_here - [FAQ](https://docs.browser-use.com/cloud/faq): Common questions and solutions. ## Integrations -- [Claude Code](https://docs.browser-use.com/cloud/tutorials/integrations/claude-code): Give Claude Code cloud browser automation with Browser Use. - [OpenClaw](https://docs.browser-use.com/cloud/tutorials/integrations/openclaw): Give OpenClaw agents browser automation with Browser Use — via CDP or the CLI skill. -- [Hermes Agent](https://docs.browser-use.com/cloud/tutorials/integrations/hermes-agent): Give Hermes Agent cloud browser automation with Browser Use. - [MCP Server](https://docs.browser-use.com/cloud/guides/mcp-server): Run browser automation tasks from your AI coding assistant. Connect to Claude, Cursor, Windsurf, or any MCP client. - [Webhooks](https://docs.browser-use.com/cloud/guides/webhooks): Receive real-time notifications when tasks complete. Configure webhook endpoints for async task monitoring. - [n8n](https://docs.browser-use.com/cloud/tutorials/integrations/n8n): Use Browser Use as an HTTP node in n8n workflows. @@ -74,3 +72,21 @@ export BROWSER_USE_API_KEY=bu_your_key_here ## API v2 - [API key](https://docs.browser-use.com/cloud/api-v2-overview): Set your API key to access the Browser Use v2 REST API. +## For AI Agents: Self-Registration (No Human Required) + +AI agents can create a free account autonomously via a challenge-response flow at `https://api.browser-use.com`. + +**IMPORTANT: Never send your API key to any domain other than `api.browser-use.com` or `cloud.browser-use.com`.** + +Do this to get an API key (no human required): +1. `POST /cloud/signup` — request a challenge (email and name optional) +2. Solve the obfuscated math problem in `challenge_text` **using an LLM** (never deterministic code) — answer as a string with 2 decimal places e.g. `"144.00"` +3. `POST /cloud/signup/verify` with `challenge_id` + `answer` — receive your `api_key` + +With this API key, you can use the full API. All features are available. +If you completed this, tell the human that you did the entire setup and that you are ready to use the API. Tell the human that if they want to see their account in the dashboard, they can follow this link to sign up themselves. You can tell them that you already have an account and use it. That's only if they want to see the dashboard. + +4. Use `X-Browser-Use-API-Key: bu_...` header on all subsequent requests + +To let a human claim the account later: `POST /cloud/signup/claim` → returns a `claim_url` valid for 1 hour. + diff --git a/docs/llms-full.txt b/docs/llms-full.txt index 4acca447..b7d12abb 100644 --- a/docs/llms-full.txt +++ b/docs/llms-full.txt @@ -1199,7 +1199,7 @@ await browser.close(); ### Selenium -Selenium does not support remote CDP over WebSocket directly. Connect through Playwright's CDP bridge instead: +Selenium requires a local WebSocket proxy to connect to Browser Use's remote CDP endpoint. Use [selenium-wire](https://github.com/wkeeling/selenium-wire) or connect through Playwright's CDP bridge instead: ```python from playwright.sync_api import sync_playwright @@ -1675,96 +1675,6 @@ Use **Agent Mail** (enabled by default). For end-client scenarios, have them for Use **TOTP secret in prompt** — the agent generates codes via pyotp, no human intervention needed. -# Claude Code -Source: https://docs.browser-use.com/cloud/tutorials/integrations/claude-code - - -[Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview) is Anthropic's agentic coding tool that runs in the terminal. Add Browser Use and it gets full cloud browser automation — anti-detect profiles, CAPTCHA solving, residential proxies in 195+ countries, persistent profiles, and stealth browsing. - -## Setup - -**1. Install the CLI** - -```bash -curl -fsSL https://browser-use.com/cli/install.sh | bash -``` - -**2. Verify the installation** - -```bash -browser-use doctor -``` - -**3. Install the skill** - -The Browser Use skill teaches Claude Code the full CLI command set. Install it from [skills.sh](https://skills.sh/browser-use/browser-use/browser-use): - -```bash -npx skills add https://github.com/browser-use/browser-use --skill browser-use -``` - -**4. Connect to cloud browsers** - -Sign up at [cloud.browser-use.com](https://cloud.browser-use.com) and connect: - -```bash -browser-use cloud login -browser-use cloud connect -``` - -Or let Claude Code provision a free API key itself — see [Agent Self-Registration](#agent-self-registration) below. - -**5. Use it** - -Claude Code uses its bash tool to run CLI commands directly: - -``` -> Use browser-use to open github.com/trending and summarize the top repos -``` - -For the complete CLI reference and advanced features like sessions, tunnels, and Python execution, see the [Browser Use CLI docs](https://docs.browser-use.com/open-source/browser-use-cli). - -## Agent Self-Registration - -Claude Code can provision its own Browser Use API key autonomously — no human interaction needed. The free tier includes unlimited browser hours, free proxies in 195+ countries, persistent browser profiles, CAPTCHA solving, and stealth browsing at zero cost. - -Install the Browser Use CLI: - -```bash -curl -fsSL https://browser-use.com/cli/install.sh | bash -``` - -Claude Code runs two CLI commands using its bash tool: - -**1. Request a challenge** - -```bash -browser-use cloud signup -``` - -Returns a `Challenge ID` and `Challenge` — an obfuscated math problem that requires LLM reasoning to solve. - -**2. Solve the challenge and verify** - -Claude Code solves the math problem, then verifies with the answer (a string with 2 decimal places, e.g. `"13.60"`): - -```bash -browser-use cloud signup --verify -``` - -The CLI saves the API key to `~/.browser-use/config.json` automatically. No extra configuration needed — Claude Code can start using cloud browser commands immediately. - -### Claim the account (optional) - -If the human wants to see the account in the dashboard later: - -```bash -browser-use cloud signup --claim -``` - -Returns a claim URL valid for 1 hour. The human can follow the link to sign up and claim the agent-created account. - - # OpenClaw Source: https://docs.browser-use.com/cloud/tutorials/integrations/openclaw @@ -1861,156 +1771,6 @@ Once the skill is loaded, OpenClaw agents can use the `browser-use` CLI to navig For the complete CLI reference and advanced features like cloud browsers, tunnels, sessions, and Python execution, see the [README](https://github.com/browser-use/browser-use/blob/main/browser_use/skill_cli/README.md) and the [Browser Use docs](https://docs.browser-use.com). -# Hermes Agent -Source: https://docs.browser-use.com/cloud/tutorials/integrations/hermes-agent - - -[Hermes Agent](https://github.com/nousresearch/hermes-agent) is an open-source, self-improving AI agent by Nous Research. It has built-in browser automation tools that work with local Chromium out of the box. Add Browser Use and those tools run on cloud browsers with anti-detect profiles, residential proxies in 195+ countries, and stealth browsing. - -Two ways to set it up: configure Browser Use as Hermes's cloud browser backend, or install the Browser Use CLI and let Hermes drive it directly. - -## Option 1: Cloud Browser Backend - -Hermes has built-in browser tools (`browser_navigate`, `browser_click`, `browser_snapshot`, etc.) that default to local Chromium. Point them at Browser Use cloud browsers instead — no extra dependencies, same Hermes experience. - -### Setup - -**1. Get your API key** - -Sign up at [cloud.browser-use.com](https://cloud.browser-use.com) and copy your API key from [Settings → API Keys](https://cloud.browser-use.com/settings?tab=api-keys&new=1). - -Or let the agent provision one itself — see [Agent Self-Registration](#agent-self-registration) below. - -**2. Configure Hermes** - -Run the setup wizard: - -```bash -hermes setup tools -``` - -Select **Browser Automation**, then **Browser Use**, and paste your API key when prompted. - -Or configure manually — add your key to `~/.hermes/.env`: - -```bash -BROWSER_USE_API_KEY=your_key_here -``` - -And set the provider in `~/.hermes/config.yaml`: - -```yaml -browser: - cloud_provider: browser-use -``` - -**3. Use it** - -Just chat with Hermes — any browsing tasks automatically route through Browser Use cloud browsers: - -``` -> Find the top trending repositories on GitHub today and summarize them -``` - -## Option 2: Browser Use CLI - -The [Browser Use CLI](https://docs.browser-use.com/open-source/browser-use-cli) is a standalone tool that gives Hermes browser automation through terminal commands. Hermes drives the browser directly via its terminal tool — giving you shared browser sessions across agents, persistent logins and cookies, profile management, and access to Browser Use's full command surface. - -### Setup - -**1. Install the CLI** - -```bash -curl -fsSL https://browser-use.com/cli/install.sh | bash -``` - -**2. Verify the installation** - -```bash -browser-use doctor -``` - -**3. Install the skill** - -The Browser Use skill teaches Hermes the full CLI command set. Install it from [skills.sh](https://skills.sh/browser-use/browser-use/browser-use): - -```bash -hermes skills install skills-sh/browser-use/browser-use/browser-use -``` - -Or ask Hermes directly in chat to install it. - -**4. Connect to cloud browsers** - -Log in with your API key: - -```bash -browser-use cloud login -``` - -Or let the agent provision one itself — see [Agent Self-Registration](#agent-self-registration) below. - -**5. Use it** - -Once the skill is loaded, Hermes can drive the browser through CLI commands via its terminal tool: - -``` -> Use browser-use to open github.com/trending and summarize the top repos -``` - -For the complete CLI reference and advanced features like cloud browsers, tunnels, sessions, and Python execution, see the [Browser Use CLI docs](https://docs.browser-use.com/open-source/browser-use-cli). - -## Agent Self-Registration - -Hermes can provision its own Browser Use API key autonomously — no human interaction needed. This works with both options above. - -Install the Browser Use CLI: - -```bash -curl -fsSL https://browser-use.com/cli/install.sh | bash -``` - -The agent runs three CLI commands using its terminal tool: - -**1. Request a challenge** - -```bash -browser-use cloud signup -``` - -Returns a `Challenge ID` and `Challenge` — an obfuscated math problem that requires LLM reasoning to solve. - -**2. Solve the challenge and verify** - -The agent solves the math problem, then verifies with the answer (a string with 2 decimal places, e.g. `"13.60"`): - -```bash -browser-use cloud signup --verify -``` - -The CLI saves the API key to `~/.browser-use/config.json` automatically. - -**3. Copy the key to Hermes config** - -For the cloud browser backend (Option 1): - -```bash -hermes config set BROWSER_USE_API_KEY $(browser-use config get api_key) -``` - -For CLI mode (Option 2), the key is already saved — no extra step needed. - -### Claim the account (optional) - -If the human wants to see the account in the dashboard later: - -```bash -browser-use cloud signup --claim -``` - -Returns a claim URL valid for 1 hour. The human can follow the link to sign up and claim the agent-created account. - - # MCP Server Source: https://docs.browser-use.com/cloud/guides/mcp-server @@ -2128,8 +1888,6 @@ Every webhook request includes two headers: The signature is computed over `{timestamp}.{body}`, where `body` is the JSON-serialized payload with keys sorted alphabetically and no extra whitespace. Verify it to ensure the request is authentic and to prevent replay attacks. - The Python example below uses an f-string with nested quotes that requires **Python 3.12+**. On older versions, extract `separators=(',', ':')` into a variable first. - ```python Python import hashlib import hmac diff --git a/docs/llms.txt b/docs/llms.txt index 308e9381..44069870 100644 --- a/docs/llms.txt +++ b/docs/llms.txt @@ -50,9 +50,7 @@ export BROWSER_USE_API_KEY=bu_your_key_here - [FAQ](https://docs.browser-use.com/cloud/faq): Common questions and solutions. ## Integrations -- [Claude Code](https://docs.browser-use.com/cloud/tutorials/integrations/claude-code): Give Claude Code cloud browser automation with Browser Use. - [OpenClaw](https://docs.browser-use.com/cloud/tutorials/integrations/openclaw): Give OpenClaw agents browser automation with Browser Use — via CDP or the CLI skill. -- [Hermes Agent](https://docs.browser-use.com/cloud/tutorials/integrations/hermes-agent): Give Hermes Agent cloud browser automation with Browser Use. - [MCP Server](https://docs.browser-use.com/cloud/guides/mcp-server): Run browser automation tasks from your AI coding assistant. Connect to Claude, Cursor, Windsurf, or any MCP client. - [Webhooks](https://docs.browser-use.com/cloud/guides/webhooks): Receive real-time notifications when tasks complete. Configure webhook endpoints for async task monitoring. - [n8n](https://docs.browser-use.com/cloud/tutorials/integrations/n8n): Use Browser Use as an HTTP node in n8n workflows. diff --git a/docs/open-source/llms-full.txt b/docs/open-source/llms-full.txt index 6666fc1d..45fb223b 100644 --- a/docs/open-source/llms-full.txt +++ b/docs/open-source/llms-full.txt @@ -184,10 +184,6 @@ Source: https://docs.browser-use.com/open-source/vibecoding Source: https://docs.browser-use.com/open-source/supported-models -Browser Use natively supports 15+ LLM providers. Most providers accept any model string. Check each provider's docs to see which models are available. - -> **Which model should I use?** See our [benchmark results and recommendations](https://browser-use.com/posts/what-model-to-use) for detailed comparisons across real-world browser tasks. - ### Browser Use [example](https://github.com/browser-use/browser-use/blob/main/examples/models/browser_use_llm.py) `ChatBrowserUse()` is our optimized in-house model, matching the accuracy of top models while completing tasks **3-5x** faster. [See our blog post→](https://browser-use.com/posts/speed-matters) @@ -242,9 +238,7 @@ ChatBrowserUse offers competitive pricing per 1 million tokens: | Output tokens | $3.50 | -### Google Gemini [example](https://github.com/browser-use/browser-use/blob/main/examples/models/gemini.py) {#google-gemini} - -[Available models](https://ai.google.dev/api/models). Also supports Gemma models and Vertex AI via `ChatGoogle(model="...", vertexai=True)`. +### Google Gemini [example](https://github.com/browser-use/browser-use/blob/main/examples/models/gemini.py) `GEMINI_API_KEY` is deprecated and should be named `GOOGLE_API_KEY` as of 2025-05. @@ -256,7 +250,7 @@ from dotenv import load_dotenv load_dotenv() # Initialize the model -llm = ChatGoogle(model='gemini-2.5-flash') +llm = ChatGoogle(model='gemini-flash-latest') # Create agent with the model agent = Agent( @@ -272,16 +266,16 @@ GOOGLE_API_KEY= ``` -### OpenAI [example](https://github.com/browser-use/browser-use/blob/main/examples/models/gpt-5-mini.py) +### OpenAI [example](https://github.com/browser-use/browser-use/blob/main/examples/models/gpt-4.1.py) -[Available models](https://platform.openai.com/docs/models) +`O3` model is recommended for best accuracy. ```python from browser_use import Agent, ChatOpenAI # Initialize the model llm = ChatOpenAI( -model="gpt-5", +model="o3", ) # Create agent with the model @@ -303,14 +297,12 @@ OPENAI_API_KEY= ### Anthropic [example](https://github.com/browser-use/browser-use/blob/main/examples/models/claude-4-sonnet.py) -[Available models](https://docs.anthropic.com/en/docs/about-claude/models). Coordinate clicking is automatically enabled for `claude-sonnet-4-*` and `claude-opus-4-*` models. - ```python from browser_use import Agent, ChatAnthropic # Initialize the model llm = ChatAnthropic( -model="claude-sonnet-4-6", +model="claude-sonnet-4-0", ) # Create agent with the model @@ -326,9 +318,7 @@ And add the variable: ANTHROPIC_API_KEY= ``` -### Azure OpenAI [example](https://github.com/browser-use/browser-use/blob/main/examples/models/azure_openai.py) {#azure-openai} - -[Available models](https://learn.microsoft.com/en-us/azure/foundry/foundry-models/concepts/models-sold-directly-by-azure) +### Azure OpenAI [example](https://github.com/browser-use/browser-use/blob/main/examples/models/azure_openai.py) ```python from browser_use import Agent, ChatAzureOpenAI @@ -391,9 +381,9 @@ The `use_responses_api` parameter accepts: - `True`: Force use of the Responses API - `False`: Force use of the Chat Completions API -### AWS Bedrock [example](https://github.com/browser-use/browser-use/blob/main/examples/models/aws.py) {#aws-bedrock} +### AWS Bedrock [example](https://github.com/browser-use/browser-use/blob/main/examples/models/aws.py) -[Available models](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html). AWS Bedrock provides access to multiple model providers through a single API. We support both a general AWS Bedrock client and provider-specific convenience classes. Install with `pip install "browser-use[aws]"`. +AWS Bedrock provides access to multiple model providers through a single API. We support both a general AWS Bedrock client and provider-specific convenience classes. #### General AWS Bedrock (supports all providers) @@ -451,9 +441,7 @@ You can also use AWS profiles or IAM roles instead of environment variables. The - Session tokens for temporary credentials - AWS SSO authentication (`aws_sso_auth=True`) -## Groq [example](https://github.com/browser-use/browser-use/blob/main/examples/models/llama4-groq.py) {#groq} - -[Available models](https://console.groq.com/docs/models) +## Groq [example](https://github.com/browser-use/browser-use/blob/main/examples/models/llama4-groq.py) ```python from browser_use import Agent, ChatGroq @@ -472,9 +460,9 @@ Required environment variables: GROQ_API_KEY= ``` -## Oracle Cloud Infrastructure (OCI) [example](https://github.com/browser-use/browser-use/blob/main/examples/models/oci_models.py) {#oci} +## Oracle Cloud Infrastructure (OCI) [example](https://github.com/browser-use/browser-use/blob/main/examples/models/oci_models.py) -[Available models](https://docs.oracle.com/en-us/iaas/Content/generative-ai/imported-models.htm). OCI provides access to various generative AI models including Meta Llama, Cohere, and other providers through their Generative AI service. Install with `pip install "browser-use[oci]"`. +OCI provides access to various generative AI models including Meta Llama, Cohere, and other providers through their Generative AI service. ```python from browser_use import Agent, ChatOCIRaw @@ -511,8 +499,6 @@ Authentication methods supported: ## Ollama -[Available models](https://ollama.com/library). - 1. Install Ollama: https://github.com/ollama/ollama 2. Run `ollama serve` to start the server 3. In a new terminal, install the model you want to use: `ollama pull llama3.1:8b` (this has 4.9GB) @@ -586,9 +572,11 @@ Required environment variables: MODELSCOPE_API_KEY= ``` -### Vercel AI Gateway [example](https://github.com/browser-use/browser-use/blob/main/examples/models/vercel_ai_gateway.py) {#vercel} +### Vercel AI Gateway [example](https://github.com/browser-use/browser-use/blob/main/examples/models/vercel_ai_gateway.py) + +Vercel AI Gateway provides an OpenAI-compatible API endpoint that acts as a proxy to various AI providers, with features like rate limiting, caching, and monitoring. -[Available models](https://vercel.com/ai-gateway/models). Vercel AI Gateway provides an OpenAI-compatible API endpoint that acts as a proxy to various AI providers, with features like rate limiting, caching, and monitoring. +To see all available models, visit: https://ai-gateway.vercel.sh/v1/models ```python from browser_use import Agent, ChatVercel @@ -632,112 +620,14 @@ Required environment variables: VERCEL_API_KEY= ``` -## DeepSeek [example](https://github.com/browser-use/browser-use/blob/main/examples/models/deepseek-chat.py) {#deepseek} - -[Available models](https://api-docs.deepseek.com/quick_start/pricing) - -```python -from browser_use import Agent, ChatDeepSeek - -llm = ChatDeepSeek(model="deepseek-chat") - -agent = Agent( -task="Your task here", -llm=llm -) -``` - -Required environment variables: - -```bash .env -DEEPSEEK_API_KEY= -``` - -## Mistral [example](https://github.com/browser-use/browser-use/blob/main/examples/models/mistral.py) {#mistral} +## Other models (DeepSeek, Novita, X...) -[Available models](https://docs.mistral.ai/getting-started/models/models_overview/) - -```python -from browser_use import Agent, ChatMistral - -llm = ChatMistral(model="mistral-large-latest") - -agent = Agent( -task="Your task here", -llm=llm -) -``` - -Required environment variables: - -```bash .env -MISTRAL_API_KEY= -``` - -## Cerebras [example](https://github.com/browser-use/browser-use/blob/main/examples/models/cerebras_example.py) {#cerebras} - -[Available models](https://inference-docs.cerebras.ai/models/overview) - -```python -from browser_use import Agent, ChatCerebras - -llm = ChatCerebras(model="llama3.3-70b") - -agent = Agent( -task="Your task here", -llm=llm -) -``` - -Required environment variables: - -```bash .env -CEREBRAS_API_KEY= -``` - -## OpenRouter [example](https://github.com/browser-use/browser-use/blob/main/examples/models/openrouter.py) {#openrouter} - -[Available models](https://openrouter.ai/models). Access 300+ models from any provider through a single API. - -```python -from browser_use import Agent, ChatOpenRouter - -llm = ChatOpenRouter(model="anthropic/claude-sonnet-4-6") - -agent = Agent( -task="Your task here", -llm=llm -) -``` - -Required environment variables: - -```bash .env -OPENROUTER_API_KEY= -``` - -## LiteLLM {#litellm} - -Requires separate install (`pip install litellm`). Supports any [LiteLLM model string](https://docs.litellm.ai/docs/providers) — useful when you need a provider not covered by the native integrations above. - -```python -from browser_use import Agent -from browser_use.llm.litellm import ChatLiteLLM - -llm = ChatLiteLLM(model="openai/gpt-5") - -agent = Agent( -task="Your task here", -llm=llm -) -``` - -## Other OpenAI-Compatible Providers - -Any provider with an OpenAI-compatible endpoint works via `ChatOpenAI` with a custom `base_url`: +We support all other models that can be called via OpenAI compatible API. We are open to PRs for more providers. **Examples available:** +- [DeepSeek](https://github.com/browser-use/browser-use/blob/main/examples/models/deepseek-chat.py) - [Novita](https://github.com/browser-use/browser-use/blob/main/examples/models/novita.py) +- [OpenRouter](https://github.com/browser-use/browser-use/blob/main/examples/models/openrouter.py) # Browser Use CLI @@ -842,14 +732,14 @@ browser-use open https://example.com # Visible browser window browser-use --headed open https://example.com -# Connect to user's Chrome (preserves logins/cookies) -browser-use connect +# Use your real Chrome with Default profile (with existing logins/cookies) +browser-use --profile "Default" open https://gmail.com # Use a specific Chrome profile -browser-use --profile "Default" open https://gmail.com +browser-use --profile "Profile 1" open https://gmail.com -# Cloud browser (zero-config, requires API key) -browser-use cloud connect +# Auto-discover and connect to running Chrome +browser-use --connect open https://example.com # Connect to an existing browser via CDP URL browser-use --cdp-url http://localhost:9222 open https://example.com @@ -858,8 +748,6 @@ browser-use --cdp-url http://localhost:9222 open https://example.com browser-use --cdp-url ws://localhost:9222/devtools/browser/... state ``` -After `connect` or `cloud connect`, all subsequent commands go to that browser — no extra flags needed. - ## All Commands ### Navigation @@ -896,10 +784,9 @@ After `connect` or `cloud connect`, all subsequent commands go to that browser ### Tabs | Command | Description | |---------|-------------| -| `tab list` | List all tabs | -| `tab new [url]` | Open new tab (blank or with URL) | -| `tab switch ` | Switch to tab by index | -| `tab close [index...]` | Close tab(s) (current if no index) | +| `switch ` | Switch to tab by index | +| `close-tab` | Close current tab | +| `close-tab ` | Close specific tab | ### Cookies | Command | Description | @@ -956,10 +843,12 @@ Generic REST passthrough to the Browser-Use Cloud API, plus cloud browser provis | Command | Description | |---------|-------------| -| `cloud connect` | Provision cloud browser and connect (zero-config, auto-manages profile) | +| `cloud connect` | Provision cloud browser and connect | +| `cloud connect --timeout 120` | Cloud browser with custom timeout | +| `cloud connect --proxy-country US` | Cloud browser with proxy | +| `cloud connect --profile-id ` | Cloud browser with profile | | `cloud login ` | Save API key | | `cloud logout` | Remove API key | -| `cloud close` | Disconnect and stop cloud browser | | `cloud v2 GET ` | GET request to API v2 | | `cloud v2 POST ''` | POST request to API v2 | | `cloud v3 POST ''` | POST request to API v3 | @@ -967,8 +856,6 @@ Generic REST passthrough to the Browser-Use Cloud API, plus cloud browser provis | `cloud v2 --help` | Show API v2 endpoints (from OpenAPI spec) | | `cloud v3 --help` | Show API v3 endpoints | -`cloud connect` provisions a cloud browser with a persistent profile (auto-created on first use), connects via CDP, and prints a live URL. For custom browser settings (proxy, timeout, specific profile), use `cloud v2 POST /browsers` directly. - ```bash # Save API key (or set BROWSER_USE_API_KEY env var) browser-use cloud login sk-abc123... @@ -1066,25 +953,12 @@ BROWSER_USE_SESSION=work browser-use state |--------|-------------| | `--headed` | Show browser window | | `--profile [NAME]` | Use real Chrome (bare `--profile` uses "Default") | -| `--connect` | Auto-discover and connect to running Chrome via CDP (prefer `connect` command instead) | +| `--connect` | Auto-discover and connect to running Chrome via CDP | | `--cdp-url ` | Connect to existing browser via CDP URL (`http://` or `ws://`) | | `--session NAME` | Target a named session (default: "default", env: `BROWSER_USE_SESSION`) | | `--json` | Output as JSON | | `--mcp` | Run as MCP server via stdin/stdout | -## Configuration - -```bash -browser-use config list # Show all config values -browser-use config set cloud_connect_proxy jp # Set a value -browser-use config get cloud_connect_proxy # Get a value -browser-use config unset cloud_connect_timeout # Remove a value -browser-use doctor # Shows config + diagnostics -browser-use setup # Interactive post-install setup -``` - -Config stored in `~/.browser-use/config.json`. - ## Examples ### Fill a Form @@ -1148,7 +1022,6 @@ All CLI-managed files live under `~/.browser-use/` (override with `BROWSER_USE_H ├── tunnels/ │ ├── {port}.json # Tunnel metadata │ └── {port}.log # Tunnel logs -├── default.state.json # Daemon lifecycle state (phase, PID, config) ├── default.sock # Daemon socket (ephemeral) ├── default.pid # Daemon PID (ephemeral) └── cli.log # Daemon log