diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f0f7381..bf86fa4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -100,3 +100,31 @@ jobs: - name: Run integration tests run: uv run pytest tests/integration -v --tb=short -m "not smoke" timeout-minutes: 15 + + packaging-smoke: + name: Packaging Smoke + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up uv + uses: astral-sh/setup-uv@v7 + with: + python-version-file: .python-version + enable-cache: true + + - name: Sync dependencies + run: uv sync --locked --dev + + - name: Build wheel + run: uv build --wheel --no-sources + + - name: Verify wheel-packaged skills + run: uv run pytest tests/unit/test_release_artifacts.py -v + + - name: Smoke install-skills from built wheel + run: | + python -m venv .pkg-smoke-venv + .pkg-smoke-venv/bin/pip install dist/*.whl + target_dir="$(mktemp -d)" + .pkg-smoke-venv/bin/browser-cli install-skills --dry-run --target "$target_dir" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 995b91a..363d46d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -24,5 +24,13 @@ jobs: - name: Build package run: uv build --no-sources + - name: Verify wheel-packaged skills + run: | + uv run pytest tests/unit/test_release_artifacts.py -v + python -m venv .pkg-smoke-venv + .pkg-smoke-venv/bin/pip install dist/*.whl + target_dir="$(mktemp -d)" + .pkg-smoke-venv/bin/browser-cli install-skills --dry-run --target "$target_dir" + - name: Publish to PyPI run: uv publish diff --git a/AGENTS.md b/AGENTS.md index e95c3e2..e036bd5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -54,6 +54,8 @@ the implementation, and where should a change land first. `src/browser_cli/cli/main.py` - Install and runtime diagnostics: `src/browser_cli/commands/doctor.py` +- Packaged skill installation and `--target` handling: + `src/browser_cli/commands/install_skills.py` - Runtime path discovery: `src/browser_cli/commands/paths.py` - Daemon-backed command catalog, arguments, aliases, and request builders: @@ -146,6 +148,8 @@ the implementation, and where should a change land first. - User-facing output rendering: `src/browser_cli/outputs/render.py`, `src/browser_cli/outputs/json.py` +- Packaged Browser CLI skill assets shipped with the installed wheel: + `src/browser_cli/packaged_skills/*` - Error taxonomy and exit codes: `src/browser_cli/errors.py`, `src/browser_cli/error_codes.py`, `src/browser_cli/exit_codes.py` @@ -154,7 +158,7 @@ the implementation, and where should a change land first. - Example tasks and packaged automations: `tasks/*` - Browser-CLI-specific agent delivery guidance: - `skills/browser-cli-explore-delivery/SKILL.md` + `skills/browser-cli-delivery/SKILL.md` - Tests for behavior and contracts: `tests/unit/*`, `tests/integration/*` @@ -219,6 +223,7 @@ the implementation, and where should a change land first. - `browser_cli.drivers` owns the explicit backend contract plus `playwright_driver` and `extension_driver`. Drivers consume daemon-built locator specs, not raw refs. - `browser_cli.extension` owns the extension transport, handshake, heartbeat, required-capability checks, and artifact assembly from WebSocket chunks. - `browser_cli.outputs` owns final rendering for content-first and JSON-first surfaces. +- `browser_cli.packaged_skills` owns the Browser CLI skill assets that are shipped in installed distributions and consumed by `browser-cli install-skills`. - `browser_cli.profiles` owns Chrome executable discovery, managed profile directories, profile naming, and lock detection. - `browser_cli.refs` owns semantic ref models, snapshot generation, latest-snapshot registry state, and locator reconstruction. - `browser_cli.tabs` owns agent-visible tab state, active-tab tracking, and busy-state conflict rules. @@ -231,6 +236,7 @@ public interactive commands. ## Implementation Conventions - Top-level parser registration lives in `src/browser_cli/cli/main.py`. `read`, `doctor`, `paths`, `task`, `automation`, `status`, and lifecycle `reload` are hand-wired there; the rest come from `get_action_specs()`. +- `browser-cli install-skills` installs the packaged Browser CLI skills into `~/.agents/skills` by default and `--target` overrides the destination root. - Public daemon-backed actions should be added through `ActionSpec`, not by manually bolting ad hoc parsers into `main.py`. - The lifecycle command `browser-cli reload` and the page action `browser-cli page-reload` are intentionally different surfaces. Do not collapse them. - Public daemon commands return JSON payloads. Preserve `ok/data/meta` shape and machine-readable error codes. diff --git a/docs/superpowers/plans/2026-04-11-browser-cli-network-response-body-implementation-plan.md b/docs/superpowers/plans/2026-04-11-browser-cli-network-response-body-implementation-plan.md index dc84e70..f5ff321 100644 --- a/docs/superpowers/plans/2026-04-11-browser-cli-network-response-body-implementation-plan.md +++ b/docs/superpowers/plans/2026-04-11-browser-cli-network-response-body-implementation-plan.md @@ -103,7 +103,9 @@ docs/ superpowers/specs/ superpowers/plans/ skills/ - browser-cli-explore-delivery/ + browser-cli-delivery/ + browser-cli-explore/ + browser-cli-converge/ ``` The exact file split may evolve during implementation, but the old page-runtime @@ -315,7 +317,7 @@ network patch should not survive the final migration. - `/Users/hongv/workspace/m-projects/browser-cli/docs/smoke-checklist.md` - task examples - Browser CLI docs that mention network observation -2. Update the explore-delivery skill references to prefer: +2. Update the delivery/explore/converge skill references to prefer: - `network-wait` for response-dependent tasks - `network-start/network/network-stop` for broader collection 3. Remove obsolete wording that says Browser CLI only captures network requests. @@ -350,4 +352,3 @@ Before calling the work complete, verify: - large bodies do not cause uncontrolled memory growth - extension and Playwright backends pass the same public expectations - no old request-only `network` semantics remain in code or docs - diff --git a/docs/superpowers/plans/2026-04-13-browser-cli-task-automation-implementation-plan.md b/docs/superpowers/plans/2026-04-13-browser-cli-task-automation-implementation-plan.md index 6a34e95..bfe78d0 100644 --- a/docs/superpowers/plans/2026-04-13-browser-cli-task-automation-implementation-plan.md +++ b/docs/superpowers/plans/2026-04-13-browser-cli-task-automation-implementation-plan.md @@ -85,10 +85,12 @@ - Reframe the product around task and automation. - `AGENTS.md` - Update durable navigation guidance, code map entries, and debugging paths. -- `skills/browser-cli-explore-delivery/SKILL.md` - - Require canonical task templates and `browser-cli task validate`. -- `skills/browser-cli-explore-delivery/references/preflight-and-runtime.md` - - Update task execution/publish guidance. +- `skills/browser-cli-delivery/SKILL.md` + - Define orchestration, validation rollback, and optional automation gating. +- `skills/browser-cli-explore/SKILL.md` + - Capture task execution and exploration guidance. +- `skills/browser-cli-converge/SKILL.md` + - Capture task-code convergence and validation guidance. - `scripts/guards/product_contracts.py` - Freeze the new top-level `task` and `automation` surfaces. - `scripts/guards/architecture.py` @@ -1125,8 +1127,9 @@ git commit -m "refactor: remove legacy workflow surface" **Files:** - Modify: `README.md` - Modify: `AGENTS.md` -- Modify: `skills/browser-cli-explore-delivery/SKILL.md` -- Modify: `skills/browser-cli-explore-delivery/references/preflight-and-runtime.md` +- Create: `skills/browser-cli-delivery/SKILL.md` +- Create: `skills/browser-cli-explore/SKILL.md` +- Create: `skills/browser-cli-converge/SKILL.md` - Modify: `docs/smoke-checklist.md` - Create: `docs/examples/task-and-automation.md` - Delete: `docs/examples/task-and-workflow.md` @@ -1237,7 +1240,7 @@ Expected: PASS - [ ] **Step 7: Commit** ```bash -git add README.md AGENTS.md skills/browser-cli-explore-delivery/SKILL.md skills/browser-cli-explore-delivery/references/preflight-and-runtime.md docs/smoke-checklist.md docs/examples/task-and-automation.md scripts/guards/product_contracts.py scripts/guards/architecture.py scripts/guards/docs_sync.py +git add README.md AGENTS.md skills/browser-cli-delivery/SKILL.md skills/browser-cli-explore/SKILL.md skills/browser-cli-converge/SKILL.md docs/smoke-checklist.md docs/examples/task-and-automation.md scripts/guards/product_contracts.py scripts/guards/architecture.py scripts/guards/docs_sync.py git rm docs/examples/task-and-workflow.md git commit -m "docs: update task and automation guidance" ``` diff --git a/docs/superpowers/plans/2026-04-14-browser-cli-delivery-skills-implementation-plan.md b/docs/superpowers/plans/2026-04-14-browser-cli-delivery-skills-implementation-plan.md new file mode 100644 index 0000000..9665ad1 --- /dev/null +++ b/docs/superpowers/plans/2026-04-14-browser-cli-delivery-skills-implementation-plan.md @@ -0,0 +1,634 @@ +# Browser CLI Delivery Skills Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Replace the single delivery-oriented skill with a three-skill Browser CLI delivery stack that captures exploration feedback into `task.meta.json`, converges validated paths into `task.py`, and keeps `automation.toml` plus publish optional. + +**Architecture:** Add three new skills under `skills/` with clear role boundaries: `browser-cli-delivery` as the orchestrator, `browser-cli-explore` as the metadata-first exploration skill, and `browser-cli-converge` as the task-code convergence skill. Lock the new topology with repo text-contract tests, update `AGENTS.md` to point maintainers at the new entrypoint, and remove the legacy single-skill directory after its references are cleaned up. + +**Tech Stack:** Markdown skill docs, pytest repo text-contract tests, AGENTS.md repository guidance, Browser CLI task and automation contracts + +--- + +## File Map + +- Create: `skills/browser-cli-delivery/SKILL.md` + Responsibility: user-facing orchestration rules, stage model, rollback rules, optional `automation.toml` and publish branch. +- Create: `skills/browser-cli-explore/SKILL.md` + Responsibility: Browser CLI exploration rules, task-mode selection, durable feedback capture into `task.meta.json`. +- Create: `skills/browser-cli-converge/SKILL.md` + Responsibility: convergence rules for `task.py`, `Flow` usage, metadata-code alignment, task validation. +- Create: `tests/unit/test_repo_skill_docs.py` + Responsibility: lock the new skill topology and the required contract text so future edits do not drift back to the old single-skill model. +- Modify: `AGENTS.md` + Responsibility: point Browser CLI maintainers to the new top-level skill instead of the old one. +- Modify: `docs/superpowers/plans/2026-04-14-browser-cli-delivery-skills-implementation-plan.md` + Responsibility: update checkbox state during execution if you are using this plan as the working log. + +## Task 1: Lock The New Skill Topology With A Repo Text Contract + +**Files:** +- Create: `tests/unit/test_repo_skill_docs.py` +- Test: `tests/unit/test_repo_skill_docs.py` + +- [ ] **Step 1: Write the failing topology test** + +```python +from __future__ import annotations + +from pathlib import Path + + +def _repo_root() -> Path: + return Path(__file__).resolve().parents[2] + + +def _read(path: str) -> str: + return (_repo_root() / path).read_text(encoding="utf-8") + + +def test_browser_cli_skill_topology_exists() -> None: + root = _repo_root() + + assert (root / "skills" / "browser-cli-delivery" / "SKILL.md").exists() + assert (root / "skills" / "browser-cli-explore" / "SKILL.md").exists() + assert (root / "skills" / "browser-cli-converge" / "SKILL.md").exists() + + +def test_agents_points_to_browser_cli_delivery_skill() -> None: + agents_text = _read("AGENTS.md") + + assert "skills/browser-cli-delivery/SKILL.md" in agents_text +``` + +- [ ] **Step 2: Run the test to verify it fails** + +Run: + +```bash +pytest tests/unit/test_repo_skill_docs.py::test_browser_cli_skill_topology_exists -v +``` + +Expected: FAIL because the new skill directories do not exist yet. + +- [ ] **Step 3: Add the minimal files and AGENTS pointer needed to make the topology real** + +Create `skills/browser-cli-delivery/SKILL.md`: + +```markdown +--- +name: browser-cli-delivery +description: Orchestrate Browser CLI exploration, convergence, validation, and optional automation packaging for reusable web tasks. +--- + +# Browser CLI Delivery +``` + +Create `skills/browser-cli-explore/SKILL.md`: + +```markdown +--- +name: browser-cli-explore +description: Explore real websites with Browser CLI, validate task mode, and distill durable feedback into task metadata. +--- + +# Browser CLI Explore +``` + +Create `skills/browser-cli-converge/SKILL.md`: + +```markdown +--- +name: browser-cli-converge +description: Turn validated Browser CLI exploration into stable task.py execution logic and task validation. +--- + +# Browser CLI Converge +``` + +Update the Browser-CLI-specific guidance line in `AGENTS.md`: + +```markdown +- Browser-CLI-specific agent delivery guidance: + `skills/browser-cli-delivery/SKILL.md` +``` + +- [ ] **Step 4: Run the test to verify the topology passes** + +Run: + +```bash +pytest tests/unit/test_repo_skill_docs.py::test_browser_cli_skill_topology_exists tests/unit/test_repo_skill_docs.py::test_agents_points_to_browser_cli_delivery_skill -v +``` + +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add tests/unit/test_repo_skill_docs.py skills/browser-cli-delivery/SKILL.md skills/browser-cli-explore/SKILL.md skills/browser-cli-converge/SKILL.md AGENTS.md +git commit -m "test: lock browser-cli delivery skill topology" +``` + +## Task 2: Implement The Metadata-First Exploration Skill + +**Files:** +- Modify: `skills/browser-cli-explore/SKILL.md` +- Modify: `tests/unit/test_repo_skill_docs.py` +- Test: `tests/unit/test_repo_skill_docs.py` + +- [ ] **Step 1: Extend the repo text-contract test for exploration requirements** + +Append to `tests/unit/test_repo_skill_docs.py`: + +```python +def test_browser_cli_explore_skill_records_feedback_into_task_metadata() -> None: + skill_text = _read("skills/browser-cli-explore/SKILL.md") + + assert "task.meta.json" in skill_text + assert "browser-cli is the primary browser execution path" in skill_text + assert "environment" in skill_text + assert "success_path" in skill_text + assert "recovery_hints" in skill_text + assert "failures" in skill_text + assert "knowledge" in skill_text + assert "Do not record raw logs" in skill_text +``` + +- [ ] **Step 2: Run the exploration contract test to verify it fails** + +Run: + +```bash +pytest tests/unit/test_repo_skill_docs.py::test_browser_cli_explore_skill_records_feedback_into_task_metadata -v +``` + +Expected: FAIL because the file only contains the stub header. + +- [ ] **Step 3: Replace the stub header with the full exploration skill** + +Write `skills/browser-cli-explore/SKILL.md`: + +```markdown +--- +name: browser-cli-explore +description: Explore real websites with Browser CLI, validate task mode, and distill durable feedback into task metadata. +--- + +# Browser CLI Explore + +## Overview + +Use `browser-cli` to explore a site, test candidate paths, and distill only the +durable findings needed to build a reusable task. The primary output of this +skill is structured knowledge in `task.meta.json`, not final task code. + +## When to Use + +Use this skill when: + +- a web task still needs exploration or validation +- the page depends on real browser state, cookies, login, or rendering +- the next useful artifact is better task metadata, not yet final `task.py` + +Do not use this skill when: + +- the success path is already validated end to end +- the work is only task-code refactoring with no evidence gap +- the task is pure API work with no Browser CLI dependency + +## Hard Rules + +- browser-cli is the primary browser execution path +- choose the task mode before broad exploration +- capture only observations that change the next decision +- update `task.meta.json` as a rolling feedback sink +- treat these metadata sections as required destinations for durable knowledge: + `environment`, `success_path`, `recovery_hints`, `failures`, `knowledge` +- stop once the evidence is strong enough for deterministic implementation +- Do not record raw logs, chat transcripts, or exploratory dead ends in metadata +- Do not turn one lucky run into stable knowledge without a verification step + +## Phase Order + +1. Confirm the site-specific preflight assumptions: + login state, cookies, locale, browser profile, writable artifacts, Python env +2. Choose the task mode: + `ref-driven`, `content-first`, `lazy-scroll`, `login-state-first`, or + `browser-state/network-assisted` +3. Explore with the smallest reliable Browser CLI signal +4. Capture durable findings into `task.meta.json` +5. Stop when the success path, waits, refs, and failure lessons are clear enough + for `task.py` + +## Metadata Capture Rules + +- `environment`: site, entry URL, login requirements, profile assumptions, + browser assumptions +- `success_path`: validated steps, key refs, assertions, artifacts +- `recovery_hints`: retryable steps, alternate paths, stale-ref strategy, wait + points, anti-bot recovery +- `failures`: repeatable failure modes and the lesson each one teaches +- `knowledge`: stable selectors/roles, semantic-ref notes, pagination, + lazy-load, anti-bot, and output interpretation rules + +## Done Criteria + +This skill is complete when: + +- the task mode is known +- the stable path is understood +- the fragile points are documented +- `task.meta.json` contains enough evidence for `browser-cli-converge` + +## Common Mistakes + +- exploring with direct Playwright instead of Browser CLI +- jumping straight from browsing to `task.py` +- keeping the useful lessons only in chat +- recording logs instead of reusable metadata +``` + +- [ ] **Step 4: Run the exploration contract test to verify it passes** + +Run: + +```bash +pytest tests/unit/test_repo_skill_docs.py::test_browser_cli_explore_skill_records_feedback_into_task_metadata -v +``` + +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add tests/unit/test_repo_skill_docs.py skills/browser-cli-explore/SKILL.md +git commit -m "docs: add browser-cli explore skill" +``` + +## Task 3: Implement The Task-Code Convergence Skill + +**Files:** +- Modify: `skills/browser-cli-converge/SKILL.md` +- Modify: `tests/unit/test_repo_skill_docs.py` +- Test: `tests/unit/test_repo_skill_docs.py` + +- [ ] **Step 1: Extend the repo text-contract test for convergence requirements** + +Append to `tests/unit/test_repo_skill_docs.py`: + +```python +def test_browser_cli_converge_skill_centers_task_py_and_flow_validation() -> None: + skill_text = _read("skills/browser-cli-converge/SKILL.md") + + assert "task.py is the single source of execution logic" in skill_text + assert "browser_cli.task_runtime.Flow" in skill_text + assert "browser-cli task validate" in skill_text + assert "browser-cli task run" in skill_text + assert "must stay aligned with task.meta.json" in skill_text +``` + +- [ ] **Step 2: Run the convergence contract test to verify it fails** + +Run: + +```bash +pytest tests/unit/test_repo_skill_docs.py::test_browser_cli_converge_skill_centers_task_py_and_flow_validation -v +``` + +Expected: FAIL because the file only contains the stub header. + +- [ ] **Step 3: Replace the stub header with the full convergence skill** + +Write `skills/browser-cli-converge/SKILL.md`: + +```markdown +--- +name: browser-cli-converge +description: Turn validated Browser CLI exploration into stable task.py execution logic and task validation. +--- + +# Browser CLI Converge + +## Overview + +Use this skill after exploration has already validated the success path. +Its job is to encode that evidence into stable `task.py` logic and keep the +implementation aligned with `task.meta.json`. + +## When to Use + +Use this skill when: + +- the success path is already validated +- the task now needs executable Browser CLI task code +- waits, assertions, and artifacts are known well enough to encode + +Do not use this skill when: + +- the site still has unresolved evidence gaps +- the task mode is still unclear +- validation failures show the metadata is incomplete + +## Hard Rules + +- task.py is the single source of execution logic +- browser interactions must go through `browser_cli.task_runtime.Flow` +- task code must stay aligned with task.meta.json +- keep exploration-only retries, branches, and debug logic out of the final task +- validate with `browser-cli task validate` +- use `browser-cli task run` when runtime proof is needed +- if validation exposes an evidence gap, go back to `browser-cli-explore` + +## Phase Order + +1. Read the validated `task.meta.json` +2. Encode the stable success path in `task.py` +3. Add explicit waits, assertions, and artifact writes +4. Verify metadata-code alignment +5. Run `browser-cli task validate` +6. Run `browser-cli task run` if the task shape requires live proof +7. If evidence is missing, return to exploration instead of guessing + +## Done Criteria + +This skill is complete when: + +- `task.py` replays the validated path +- waits and assertions are explicit +- the code and metadata describe the same workflow +- task validation passes + +## Common Mistakes + +- bypassing the task runtime with direct Playwright +- encoding guesses instead of validated waits or refs +- letting metadata and code drift apart +- patching around a missing exploration lesson instead of going back +``` + +- [ ] **Step 4: Run the convergence contract test to verify it passes** + +Run: + +```bash +pytest tests/unit/test_repo_skill_docs.py::test_browser_cli_converge_skill_centers_task_py_and_flow_validation -v +``` + +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add tests/unit/test_repo_skill_docs.py skills/browser-cli-converge/SKILL.md +git commit -m "docs: add browser-cli converge skill" +``` + +## Task 4: Implement The Orchestrator Skill And Rollback Rules + +**Files:** +- Modify: `skills/browser-cli-delivery/SKILL.md` +- Modify: `tests/unit/test_repo_skill_docs.py` +- Test: `tests/unit/test_repo_skill_docs.py` + +- [ ] **Step 1: Extend the repo text-contract test for orchestration requirements** + +Append to `tests/unit/test_repo_skill_docs.py`: + +```python +def test_browser_cli_delivery_skill_orchestrates_explore_converge_and_optional_automation() -> None: + skill_text = _read("skills/browser-cli-delivery/SKILL.md") + + assert "browser-cli-explore" in skill_text + assert "browser-cli-converge" in skill_text + assert "task.py + task.meta.json" in skill_text + assert "automation.toml" in skill_text + assert "publish" in skill_text + assert "If validation fails because evidence is missing, go back to explore" in skill_text +``` + +- [ ] **Step 2: Run the orchestration contract test to verify it fails** + +Run: + +```bash +pytest tests/unit/test_repo_skill_docs.py::test_browser_cli_delivery_skill_orchestrates_explore_converge_and_optional_automation -v +``` + +Expected: FAIL because the file only contains the stub header. + +- [ ] **Step 3: Replace the stub header with the full orchestration skill** + +Write `skills/browser-cli-delivery/SKILL.md`: + +```markdown +--- +name: browser-cli-delivery +description: Orchestrate Browser CLI exploration, convergence, validation, and optional automation packaging for reusable web tasks. +--- + +# Browser CLI Delivery + +## Overview + +Use this as the main entrypoint when the user wants a reusable Browser CLI web +task rather than one-off browsing. The default endpoint is stable +`task.py + task.meta.json`. `automation.toml` generation and publish are +optional user-driven branches. + +## When to Use + +Use this skill when: + +- the user wants a reusable browser task +- the work may require exploration, iteration, and validation +- the final deliverable should match Browser CLI task artifacts + +Do not use this skill when: + +- one-off browsing is enough +- the task is not Browser CLI based +- the work is already scoped to only one lower-level skill + +## Hard Rules + +- this is the main user-facing skill +- call `browser-cli-explore` when evidence is missing +- call `browser-cli-converge` when the success path is validated +- default completion is `task.py + task.meta.json` +- `automation.toml` and publish are optional and require user choice +- If validation fails because evidence is missing, go back to explore +- do not publish by default + +## Phase Order + +1. Preflight: confirm Browser CLI, Python environment, login/profile, and site assumptions +2. Explore: call `browser-cli-explore` to validate the task mode and capture feedback +3. Converge: call `browser-cli-converge` to encode the stable path in `task.py` +4. Validate: run task validation and decide whether to fix code or return to explore +5. Optional automation: ask whether to create `automation.toml` +6. Optional publish: ask whether to run Browser CLI automation publish + +## Done Criteria + +This skill is complete when: + +- `task.py + task.meta.json` are stable +- validation passed +- optional automation work is either completed or intentionally skipped by the user + +## Common Mistakes + +- skipping metadata capture +- converging before the success path is real +- generating automation packaging too early +- treating one successful page run as enough evidence +``` + +- [ ] **Step 4: Run the orchestration contract test to verify it passes** + +Run: + +```bash +pytest tests/unit/test_repo_skill_docs.py::test_browser_cli_delivery_skill_orchestrates_explore_converge_and_optional_automation -v +``` + +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add tests/unit/test_repo_skill_docs.py skills/browser-cli-delivery/SKILL.md +git commit -m "docs: add browser-cli delivery skill" +``` + +## Task 5: Remove The Legacy Single-Skill Directory + +**Files:** +- Delete: the legacy Browser CLI single-skill directory and its helper files +- Modify: `tests/unit/test_repo_skill_docs.py` +- Test: `tests/unit/test_repo_skill_docs.py` + +- [ ] **Step 1: Extend the repo text-contract test so only the new skill directories remain** + +Append to `tests/unit/test_repo_skill_docs.py`: + +```python +def test_browser_cli_skill_topology_exists() -> None: + skills_dir = _repo_root() / "skills" + actual = { + path.name + for path in skills_dir.iterdir() + if path.is_dir() and path.name.startswith("browser-cli-") + } + + assert actual == { + "browser-cli-delivery", + "browser-cli-explore", + "browser-cli-converge", + } +``` + +- [ ] **Step 2: Run the topology test to verify it fails** + +Run: + +```bash +pytest tests/unit/test_repo_skill_docs.py::test_browser_cli_skill_topology_exists -v +``` + +Expected: FAIL because the legacy skill directory still exists. + +- [ ] **Step 3: Delete the legacy single-skill directory and its reference files** + +Delete: + +- the legacy Browser CLI single-skill directory under `skills/` +- its helper files under `agents/` and `references/` + +- [ ] **Step 4: Run the topology test to verify it passes** + +Run: + +```bash +pytest tests/unit/test_repo_skill_docs.py::test_browser_cli_skill_topology_exists -v +``` + +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add tests/unit/test_repo_skill_docs.py +git rm -r skills/browser-cli-explore-delivery +git commit -m "docs: remove legacy browser-cli delivery skill" +``` + +## Task 6: Run Full Validation And Record The Final State + +**Files:** +- Modify: `docs/superpowers/plans/2026-04-14-browser-cli-delivery-skills-implementation-plan.md` +- Test: `tests/unit/test_repo_skill_docs.py` + +- [ ] **Step 1: Run the focused unit test file** + +Run: + +```bash +pytest tests/unit/test_repo_skill_docs.py -v +``` + +Expected: PASS + +- [ ] **Step 2: Run repository lint** + +Run: + +```bash +scripts/lint.sh +``` + +Expected: exit code 0 + +- [ ] **Step 3: Run repository tests** + +Run: + +```bash +scripts/test.sh +``` + +Expected: exit code 0 + +- [ ] **Step 4: Run repository guards** + +Run: + +```bash +scripts/guard.sh +``` + +Expected: exit code 0 + +- [ ] **Step 5: Commit the completed delivery-skill migration** + +```bash +git add skills/browser-cli-delivery/SKILL.md skills/browser-cli-explore/SKILL.md skills/browser-cli-converge/SKILL.md AGENTS.md tests/unit/test_repo_skill_docs.py docs/superpowers/plans/2026-04-14-browser-cli-delivery-skills-implementation-plan.md +git commit -m "docs: add browser-cli delivery skill stack" +``` + +## Self-Review + +- Spec coverage: + - three-skill topology is covered in Tasks 1, 4, and 5 + - metadata-first exploration is covered in Task 2 + - `task.py` convergence and validation rules are covered in Task 3 + - optional `automation.toml` and publish are covered in Task 4 + - maintainer navigation update is covered in Task 1 +- Placeholder scan: + - no deferred implementation markers remain + - every file path and command is explicit +- Type and contract consistency: + - all tests reference the final short names + - all skill docs use `task.meta.json`, `task.py`, and `automation.toml` + - AGENTS points to `skills/browser-cli-delivery/SKILL.md` diff --git a/docs/superpowers/plans/2026-04-14-browser-cli-install-skills-implementation-plan.md b/docs/superpowers/plans/2026-04-14-browser-cli-install-skills-implementation-plan.md new file mode 100644 index 0000000..61a19a3 --- /dev/null +++ b/docs/superpowers/plans/2026-04-14-browser-cli-install-skills-implementation-plan.md @@ -0,0 +1,620 @@ +# Browser CLI Install Skills Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make `browser-cli install-skills` install exactly the three Browser CLI skills from wheel-packaged assets, with `--target` support and release-artifact validation. + +**Architecture:** Move the release-backed skill assets under `src/browser_cli/packaged_skills/` so the installed wheel owns the runtime source of truth. Replace the current path-guessing logic in `install_skills.py` with `importlib.resources`-based discovery plus a fixed public whitelist, document the new package and `install-skills --target` contract in `AGENTS.md`, and lock the behavior with command tests, guard expectations, and a build-artifact smoke check. + +**Tech Stack:** Python 3.10, `importlib.resources`, `argparse`, `shutil`, `pytest`, uv build/install workflows, GitHub Actions + +--- + +## File Map + +- Create: `src/browser_cli/packaged_skills/__init__.py` + Responsibility: mark packaged skill assets as a Python package addressable through `importlib.resources`. +- Create: `src/browser_cli/packaged_skills/browser-cli-delivery/SKILL.md` + Responsibility: packaged runtime copy of the public delivery skill. +- Create: `src/browser_cli/packaged_skills/browser-cli-explore/SKILL.md` + Responsibility: packaged runtime copy of the public explore skill. +- Create: `src/browser_cli/packaged_skills/browser-cli-converge/SKILL.md` + Responsibility: packaged runtime copy of the public converge skill. +- Modify: `src/browser_cli/commands/install_skills.py` + Responsibility: replace repository/pip heuristics with packaged whitelist discovery, `--target` support, and fail-fast validation. +- Modify: `src/browser_cli/cli/main.py` + Responsibility: expose `--target` on the top-level command and keep help text aligned with the new contract. +- Modify: `AGENTS.md` + Responsibility: document the `browser_cli.packaged_skills` package and the public `browser-cli install-skills --target` behavior in the repo navigation guide. +- Modify: `pyproject.toml` + Responsibility: ensure packaged skill assets are included in the wheel. +- Modify: `scripts/guards/architecture.py` + Responsibility: whitelist the new `browser_cli.packaged_skills` top-level package boundary. +- Modify: `scripts/guards/docs_sync.py` + Responsibility: require the maintained `install-skills --target` AGENTS.md contract text. +- Create: `tests/unit/test_install_skills_command.py` + Responsibility: cover whitelist discovery, install/update behavior, `--target`, and failure paths. +- Modify: `tests/unit/test_cli.py` + Responsibility: assert `install-skills --help` exposes `--target`. +- Modify: `tests/unit/test_repo_skill_docs.py` + Responsibility: lock the `packaged_skills` architecture entry and packaged skill doc sync expectations. +- Modify: `tests/unit/test_repo_metadata.py` + Responsibility: lock packaging metadata needed for packaged skill assets. +- Create: `tests/unit/test_release_artifacts.py` + Responsibility: inspect the built wheel and verify it contains the three packaged skills. +- Modify: `.github/workflows/ci.yml` + Responsibility: run the artifact smoke test in CI so wheel regressions fail before release. +- Modify: `.github/workflows/release.yml` + Responsibility: run the same build-artifact smoke before `uv publish`. +- Modify: `docs/superpowers/plans/2026-04-14-browser-cli-install-skills-implementation-plan.md` + Responsibility: update checkbox state during execution if this plan is used as the live log. + +## Task 1: Package The Three Public Skills Inside `browser_cli` + +**Files:** +- Create: `src/browser_cli/packaged_skills/__init__.py` +- Create: `src/browser_cli/packaged_skills/browser-cli-delivery/SKILL.md` +- Create: `src/browser_cli/packaged_skills/browser-cli-explore/SKILL.md` +- Create: `src/browser_cli/packaged_skills/browser-cli-converge/SKILL.md` +- Modify: `pyproject.toml` +- Modify: `tests/unit/test_repo_metadata.py` +- Test: `tests/unit/test_repo_metadata.py` + +- [ ] **Step 1: Write the failing packaging metadata test** + +Append to `tests/unit/test_repo_metadata.py`: + +```python +def test_repo_includes_packaged_browser_cli_skills_in_wheel_config() -> None: + data = _load_pyproject() + + package_data = data["tool"]["setuptools"].get("package-data", {}) + assert "browser_cli.packaged_skills" in package_data + assert package_data["browser_cli.packaged_skills"] == ["**/SKILL.md"] +``` + +- [ ] **Step 2: Run the test to verify it fails** + +Run: + +```bash +uv run pytest tests/unit/test_repo_metadata.py::test_repo_includes_packaged_browser_cli_skills_in_wheel_config -v +``` + +Expected: FAIL because `tool.setuptools.package-data` is not defined yet. + +- [ ] **Step 3: Add the packaged skill package and wheel metadata** + +Create `src/browser_cli/packaged_skills/__init__.py`: + +```python +"""Packaged Browser CLI skills shipped with the installed distribution.""" +``` + +Create `src/browser_cli/packaged_skills/browser-cli-delivery/SKILL.md` by copying the current public source skill: + +```markdown +--- +name: browser-cli-delivery +description: Orchestrate Browser CLI exploration, convergence, validation, and optional automation packaging for reusable web tasks. +--- + +# Browser CLI Delivery + +## Overview + +Use this as the main entrypoint when the user wants a reusable Browser CLI web +task rather than one-off browsing. The default endpoint is stable +`task.py + task.meta.json`. `automation.toml` generation and publish are +optional user-driven branches. +``` + +Create `src/browser_cli/packaged_skills/browser-cli-explore/SKILL.md`: + +```markdown +--- +name: browser-cli-explore +description: Explore real websites with Browser CLI, validate task mode, and distill durable feedback into task metadata. +--- + +# Browser CLI Explore + +## Overview + +Use `browser-cli` to explore a site, test candidate paths, and distill only the +durable findings needed to build a reusable task. The primary output of this +skill is structured knowledge in `task.meta.json`, not final task code. +``` + +Create `src/browser_cli/packaged_skills/browser-cli-converge/SKILL.md`: + +```markdown +--- +name: browser-cli-converge +description: Turn validated Browser CLI exploration into stable task.py execution logic and task validation. +--- + +# Browser CLI Converge + +## Overview + +Use this skill after exploration has already validated the success path. Its +job is to encode that evidence into stable `task.py` logic and keep the +implementation aligned with `task.meta.json`. +``` + +Update `pyproject.toml`: + +```toml +[tool.setuptools] +package-dir = {"" = "src"} +include-package-data = true + +[tool.setuptools.package-data] +"browser_cli.packaged_skills" = ["**/SKILL.md"] +``` + +- [ ] **Step 4: Run the metadata test to verify it passes** + +Run: + +```bash +uv run pytest tests/unit/test_repo_metadata.py::test_repo_includes_packaged_browser_cli_skills_in_wheel_config -v +``` + +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add pyproject.toml tests/unit/test_repo_metadata.py src/browser_cli/packaged_skills +git commit -m "build: package browser-cli skills in wheel" +``` + +## Task 2: Replace Runtime Skill Discovery With A Packaged Whitelist + +**Files:** +- Modify: `src/browser_cli/commands/install_skills.py` +- Create: `tests/unit/test_install_skills_command.py` +- Test: `tests/unit/test_install_skills_command.py` + +- [ ] **Step 1: Write the failing command tests** + +Create `tests/unit/test_install_skills_command.py`: + +```python +from __future__ import annotations + +from argparse import Namespace +from pathlib import Path + +import pytest + +from browser_cli.commands import install_skills as install_skills_module +from browser_cli.errors import InvalidInputError + + +def test_get_skills_target_path_defaults_to_agents_skills(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + monkeypatch.setattr(Path, "home", lambda: tmp_path) + assert install_skills_module.get_skills_target_path(None) == tmp_path / ".agents" / "skills" + + +def test_get_skills_target_path_honors_explicit_target(tmp_path: Path) -> None: + target = tmp_path / "custom-skills" + assert install_skills_module.get_skills_target_path(str(target)) == target.resolve() + + +def test_discover_packaged_skills_returns_three_public_skills() -> None: + discovered = install_skills_module.discover_packaged_skills() + assert [item.name for item in discovered] == [ + "browser-cli-converge", + "browser-cli-delivery", + "browser-cli-explore", + ] + + +def test_install_skills_reports_install_and_update(tmp_path: Path) -> None: + source_root = tmp_path / "source" + target_root = tmp_path / "target" + for name in ("browser-cli-delivery", "browser-cli-explore", "browser-cli-converge"): + skill_dir = source_root / name + skill_dir.mkdir(parents=True) + (skill_dir / "SKILL.md").write_text(f"# {name}\n", encoding="utf-8") + (target_root / "browser-cli-delivery").mkdir(parents=True) + + results = install_skills_module.install_skills_from_paths( + [ + install_skills_module.PackagedSkill(name="browser-cli-delivery", path=source_root / "browser-cli-delivery"), + install_skills_module.PackagedSkill(name="browser-cli-explore", path=source_root / "browser-cli-explore"), + install_skills_module.PackagedSkill(name="browser-cli-converge", path=source_root / "browser-cli-converge"), + ], + target_root, + dry_run=True, + ) + + assert results == [ + ("browser-cli-delivery", "would update"), + ("browser-cli-explore", "would install"), + ("browser-cli-converge", "would install"), + ] + + +def test_run_install_skills_command_uses_explicit_target(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + source_root = tmp_path / "source" + packaged = [] + for name in ("browser-cli-delivery", "browser-cli-explore", "browser-cli-converge"): + skill_dir = source_root / name + skill_dir.mkdir(parents=True) + (skill_dir / "SKILL.md").write_text(f"# {name}\n", encoding="utf-8") + packaged.append(install_skills_module.PackagedSkill(name=name, path=skill_dir)) + monkeypatch.setattr(install_skills_module, "discover_packaged_skills", lambda: packaged) + + output = install_skills_module.run_install_skills_command( + Namespace(dry_run=True, target=str(tmp_path / "custom")) + ) + + assert "Installing skills to" in output + assert str((tmp_path / "custom").resolve()) in output + assert "Total: 3 skill(s)" in output +``` + +- [ ] **Step 2: Run the command tests to verify they fail** + +Run: + +```bash +uv run pytest tests/unit/test_install_skills_command.py -v +``` + +Expected: FAIL because `get_skills_target_path(None)`, `discover_packaged_skills()`, `PackagedSkill`, and `install_skills_from_paths()` do not exist yet. + +- [ ] **Step 3: Rewrite `install_skills.py` around packaged skill discovery** + +Replace `src/browser_cli/commands/install_skills.py` with: + +```python +"""Install packaged Browser CLI skills into a target skills directory.""" + +from __future__ import annotations + +import argparse +import shutil +import tempfile +from dataclasses import dataclass +from importlib import resources +from pathlib import Path + +from browser_cli.errors import InvalidInputError + +PUBLIC_SKILL_NAMES = ( + "browser-cli-converge", + "browser-cli-delivery", + "browser-cli-explore", +) + + +@dataclass(frozen=True, slots=True) +class PackagedSkill: + name: str + path: Path + + +def discover_packaged_skills() -> list[PackagedSkill]: + root = resources.files("browser_cli.packaged_skills") + discovered: list[PackagedSkill] = [] + for name in PUBLIC_SKILL_NAMES: + skill_root = root.joinpath(name) + if not skill_root.is_dir(): + raise InvalidInputError(f"Packaged skill is missing from this build: {name}") + with resources.as_file(skill_root) as skill_path: + skill_dir = skill_path.resolve() + if not (skill_dir / "SKILL.md").exists(): + raise InvalidInputError(f"Packaged skill is incomplete in this build: {name}") + discovered.append(PackagedSkill(name=name, path=skill_dir)) + return discovered + + +def get_skills_target_path(target: str | None) -> Path: + if target: + return Path(target).expanduser().resolve() + return Path.home() / ".agents" / "skills" + + +def install_skills_from_paths( + skills: list[PackagedSkill], + target: Path, + *, + dry_run: bool = False, +) -> list[tuple[str, str]]: + results: list[tuple[str, str]] = [] + if not dry_run: + target.mkdir(parents=True, exist_ok=True) + for skill in skills: + destination = target / skill.name + if destination.exists(): + status = "would update" if dry_run else "updated" + else: + status = "would install" if dry_run else "installed" + if not dry_run: + if destination.exists(): + shutil.rmtree(destination) + with tempfile.TemporaryDirectory(prefix=f"{skill.name}-") as tmp_dir: + staged = Path(tmp_dir) / skill.name + shutil.copytree(skill.path, staged) + shutil.move(str(staged), destination) + results.append((skill.name, status)) + return results + + +def run_install_skills_command(args: argparse.Namespace) -> str: + skills = discover_packaged_skills() + target = get_skills_target_path(getattr(args, "target", None)) + results = install_skills_from_paths(skills, target, dry_run=bool(args.dry_run)) + mode = "(dry-run) " if args.dry_run else "" + lines = [f"{mode}Installing skills to {target}:", ""] + for skill_name, status in results: + lines.append(f" {skill_name}: {status}") + lines.append("") + lines.append(f"Total: {len(results)} skill(s)") + return "\n".join(lines) + "\n" +``` + +- [ ] **Step 4: Run the command tests to verify they pass** + +Run: + +```bash +uv run pytest tests/unit/test_install_skills_command.py -v +``` + +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add src/browser_cli/commands/install_skills.py tests/unit/test_install_skills_command.py +git commit -m "feat: install packaged browser-cli skills" +``` + +## Task 3: Expose `--target` And Lock The CLI Contract + +**Files:** +- Modify: `src/browser_cli/cli/main.py` +- Modify: `tests/unit/test_cli.py` +- Test: `tests/unit/test_cli.py` + +- [ ] **Step 1: Write the failing CLI help test** + +Append to `tests/unit/test_cli.py`: + +```python +def test_install_skills_help_mentions_target(capsys) -> None: + exit_code = main(["install-skills", "--help"]) + captured = capsys.readouterr() + assert exit_code == 0 + assert "--dry-run" in captured.out + assert "--target" in captured.out + assert "packaged skills" in captured.out.lower() +``` + +- [ ] **Step 2: Run the CLI help test to verify it fails** + +Run: + +```bash +uv run pytest tests/unit/test_cli.py::test_install_skills_help_mentions_target -v +``` + +Expected: FAIL because `--target` is not registered yet. + +- [ ] **Step 3: Add the CLI argument and keep help text aligned** + +Update the `install-skills` parser block in `src/browser_cli/cli/main.py`: + +```python + skills_parser = subparsers.add_parser( + "install-skills", + help="Install packaged Browser CLI skills to a skills directory.", + description="Copy packaged Browser CLI skills from the installed package to the target skills directory.", + ) + skills_parser.add_argument( + "--dry-run", + action="store_true", + help="Show what would be installed without making changes.", + ) + skills_parser.add_argument( + "--target", + help="Optional target directory. Defaults to ~/.agents/skills.", + ) +``` + +- [ ] **Step 4: Run the CLI help test to verify it passes** + +Run: + +```bash +uv run pytest tests/unit/test_cli.py::test_install_skills_help_mentions_target -v +``` + +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add src/browser_cli/cli/main.py tests/unit/test_cli.py +git commit -m "feat: add target override for install-skills" +``` + +## Task 4: Add Wheel-Artifact Verification + +**Files:** +- Create: `tests/unit/test_release_artifacts.py` +- Modify: `.github/workflows/ci.yml` +- Modify: `.github/workflows/release.yml` +- Test: `tests/unit/test_release_artifacts.py` + +- [ ] **Step 1: Write the failing wheel inspection test** + +Create `tests/unit/test_release_artifacts.py`: + +```python +from __future__ import annotations + +import zipfile +from pathlib import Path + + +def _repo_root() -> Path: + return Path(__file__).resolve().parents[2] + + +def test_built_wheel_contains_packaged_browser_cli_skills() -> None: + wheels = sorted((_repo_root() / "dist").glob("*.whl")) + assert wheels, "Build a wheel before running this test: uv build --wheel" + wheel_path = wheels[-1] + with zipfile.ZipFile(wheel_path) as archive: + names = set(archive.namelist()) + assert "browser_cli/packaged_skills/browser-cli-delivery/SKILL.md" in names + assert "browser_cli/packaged_skills/browser-cli-explore/SKILL.md" in names + assert "browser_cli/packaged_skills/browser-cli-converge/SKILL.md" in names +``` + +- [ ] **Step 2: Build the wheel and run the test to verify it fails** + +Run: + +```bash +rm -rf dist +uv build --wheel +uv run pytest tests/unit/test_release_artifacts.py::test_built_wheel_contains_packaged_browser_cli_skills -v +``` + +Expected: FAIL until the wheel metadata and packaged files are fully wired up. + +- [ ] **Step 3: Add CI and release smoke steps for built artifacts** + +Add this step near the end of `.github/workflows/ci.yml` after the existing unit and integration coverage: + +```yaml + packaging-smoke: + name: Packaging Smoke + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up uv + uses: astral-sh/setup-uv@v7 + with: + python-version-file: .python-version + enable-cache: true + + - name: Sync dependencies + run: uv sync --locked --dev + + - name: Build wheel + run: uv build --wheel --no-sources + + - name: Verify wheel-packaged skills + run: uv run pytest tests/unit/test_release_artifacts.py -v +``` + +Insert this step in `.github/workflows/release.yml` before `uv publish`: + +```yaml + - name: Verify wheel-packaged skills + run: | + uv build --wheel --no-sources + uv run pytest tests/unit/test_release_artifacts.py -v +``` + +- [ ] **Step 4: Rebuild and rerun the artifact test to verify it passes** + +Run: + +```bash +rm -rf dist +uv build --wheel +uv run pytest tests/unit/test_release_artifacts.py::test_built_wheel_contains_packaged_browser_cli_skills -v +``` + +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add tests/unit/test_release_artifacts.py .github/workflows/ci.yml .github/workflows/release.yml +git commit -m "test: verify packaged skills in built wheel" +``` + +## Task 5: Run Full Validation + +**Files:** +- Modify: `docs/superpowers/plans/2026-04-14-browser-cli-install-skills-implementation-plan.md` +- Test: `tests/unit/test_install_skills_command.py` +- Test: `tests/unit/test_cli.py` +- Test: `tests/unit/test_repo_metadata.py` +- Test: `tests/unit/test_release_artifacts.py` + +- [ ] **Step 1: Run the focused unit and artifact tests** + +Run: + +```bash +uv run pytest tests/unit/test_repo_metadata.py::test_repo_includes_packaged_browser_cli_skills_in_wheel_config -v +uv run pytest tests/unit/test_install_skills_command.py -v +uv run pytest tests/unit/test_cli.py::test_install_skills_help_mentions_target -v +rm -rf dist +uv build --wheel +uv run pytest tests/unit/test_release_artifacts.py -v +``` + +Expected: PASS for all commands. + +- [ ] **Step 2: Run the repository validation scripts** + +Run: + +```bash +./scripts/lint.sh +./scripts/test.sh +./scripts/guard.sh +``` + +Expected: all three scripts exit `0`. + +- [ ] **Step 3: Update the plan checklist to reflect completion** + +Update this file so completed steps are checked as work lands: + +```markdown +- [x] **Step 1: Run the focused unit and artifact tests** +- [x] **Step 2: Run the repository validation scripts** +``` + +- [ ] **Step 4: Commit** + +```bash +git add docs/superpowers/plans/2026-04-14-browser-cli-install-skills-implementation-plan.md +git commit -m "docs: mark install-skills plan execution complete" +``` + +## Self-Review + +Spec coverage check: + +- packaged asset source under `src/browser_cli/packaged_skills/`: covered by Task 1 +- install only the three public skills: covered by Task 2 +- `--target` override with `~/.agents/skills` default: covered by Tasks 2 and 3 +- fail-fast behavior for missing packaged assets: covered by Task 2 tests and implementation +- wheel contains packaged skills: covered by Task 4 +- CI and release validate built artifacts: covered by Task 4 + +Placeholder scan: + +- no `TODO`, `TBD`, or deferred implementation markers remain +- every task names exact files, commands, and code snippets + +Type and naming consistency: + +- `PackagedSkill`, `discover_packaged_skills`, `get_skills_target_path`, and `install_skills_from_paths` are introduced in Task 2 and referenced consistently afterward +- the packaged asset path is consistently `src/browser_cli/packaged_skills/` diff --git a/docs/superpowers/specs/2026-04-13-browser-cli-task-automation-design.md b/docs/superpowers/specs/2026-04-13-browser-cli-task-automation-design.md index 1947495..ef7b5ee 100644 --- a/docs/superpowers/specs/2026-04-13-browser-cli-task-automation-design.md +++ b/docs/superpowers/specs/2026-04-13-browser-cli-task-automation-design.md @@ -344,8 +344,8 @@ This means: Because first release does not require `task init`, consistency must come from task contract documentation, validation, and skill behavior. -The `browser-cli-explore-delivery` skill should be upgraded to a stronger task -generation contract. +The Browser CLI delivery skill stack should enforce a stronger task generation +contract. It should include: diff --git a/docs/superpowers/specs/2026-04-14-browser-cli-delivery-skills-design.md b/docs/superpowers/specs/2026-04-14-browser-cli-delivery-skills-design.md new file mode 100644 index 0000000..c5fbcb4 --- /dev/null +++ b/docs/superpowers/specs/2026-04-14-browser-cli-delivery-skills-design.md @@ -0,0 +1,490 @@ +# Browser CLI Delivery Skills Design + +Date: 2026-04-14 +Status: Drafted for review +Repo: `browser-cli` + +## Summary + +This spec defines a new three-skill system for browser-task delivery in the +`browser-cli` repository: + +1. `browser-cli-delivery` +2. `browser-cli-explore` +3. `browser-cli-converge` + +The new design replaces the idea of one large delivery skill with a layered +model: + +- one user-facing orchestrator skill +- one exploration skill that captures durable feedback into `task.meta.json` +- one convergence skill that turns validated paths into `task.py` + +The default completion state is a stable `task.py` plus `task.meta.json`. +`automation.toml` generation and `browser-cli automation publish` remain +optional, user-driven branches rather than mandatory output. + +## Problem Statement + +The current repository previously relied on a single delivery-oriented skill. + +That skill points agents toward the right deliverables, but it still behaves +mostly like a linear checklist. It does not yet make the core feedback loop the +center of the workflow: + +`explore -> try -> learn -> record reusable knowledge -> converge -> validate` + +This gap matters because the current Browser CLI artifact model already expects +that reusable knowledge will survive beyond chat history: + +- `task.py` holds executable logic +- `task.meta.json` holds structured environment assumptions, success-path + knowledge, recovery hints, failures, and reusable site knowledge +- `automation.toml` wraps the task only when the user wants packaging or + publication + +Without a stronger skill contract: + +- agents can jump from exploration straight to code +- durable lessons stay in chat instead of metadata +- `task.py` risks absorbing exploration-only trial logic +- `automation.toml` or publish can be attempted before the task is stable + +## Repository Constraints + +This design must align with the repository's current implementation, not an +imagined future API. + +### Current Runtime And CLI Truths + +- `src/browser_cli/task_runtime/flow.py` defines the high-level `Flow` surface + for task execution +- `src/browser_cli/commands/task.py` defines the public task surfaces: + `browser-cli task template`, `validate`, and `run` +- `src/browser_cli/commands/automation.py` defines the public automation + surfaces including `publish` +- `tasks/_templates/task.meta.json` defines the full metadata structure the + repository expects agents to work with +- `tasks/douyin_video_download/task.meta.json`, + `tasks/interactive_reveal_capture/task.meta.json`, and + `tasks/lazy_scroll_capture/task.meta.json` show the intended level of durable + knowledge capture + +### Important Observations + +- `validate_task_metadata()` currently enforces required top-level sections and + a valid `task` section, but not the full richness of the metadata template +- the skill should therefore treat the repository template and examples as the + practical contract for good metadata, not only the minimum validator +- the skill must keep `task.py` as the only source of execution logic +- the skill must keep `automation.toml` and publish optional because the user + may decline either one + +## Goals + +- Create a delivery workflow that is high-autonomy by default. +- Preserve `browser-cli` as the main browser execution backend. +- Make feedback capture into `task.meta.json` a first-class requirement. +- Keep `task.py` focused on validated execution paths only. +- Allow optional exploration and debugging work without weakening delivery + discipline. +- Keep `automation.toml` generation and publish user-driven rather than + mandatory. +- Make stage boundaries explicit so agents know when to explore, when to + converge, and when to go back. + +## Non-Goals + +- This spec does not redesign the Browser CLI runtime itself. +- This spec does not add a public `browser-cli explore` command. +- This spec does not make publication mandatory for every task. +- This spec does not require every task to start from repository templates, as + long as the final result matches the task contract. +- This spec does not turn `task.meta.json` into a transcript or raw log store. + +## Options Considered + +### 1. One larger replacement skill + +Advantages: + +- simple to discover +- fewer files + +Disadvantages: + +- mixes orchestration, exploration, and convergence rules together +- harder to maintain strict stage boundaries +- encourages large, vague instructions instead of explicit phase ownership + +Rejected. + +### 2. One delivery skill with optional embedded exploration guidance + +Advantages: + +- smaller surface area +- somewhat easier migration from the existing skill + +Disadvantages: + +- exploration remains secondary instead of first-class +- convergence rules are easier to bypass +- metadata feedback capture stays too weak + +Rejected. + +### 3. One orchestrator skill plus two focused child skills + +Advantages: + +- separates concerns cleanly +- matches the user's desired feedback loop +- lets the top-level skill manage stage transitions and rollback rules +- gives exploration and convergence each a clear artifact responsibility + +Disadvantages: + +- introduces more than one skill document +- requires cross-skill conventions + +Chosen direction. + +## Chosen Direction + +The repository should add a three-layer skill system with short names: + +1. `browser-cli-delivery` +2. `browser-cli-explore` +3. `browser-cli-converge` + +### Role Split + +- `browser-cli-delivery` is the user-facing orchestrator +- `browser-cli-explore` is responsible for real browser exploration and + feedback capture into `task.meta.json` +- `browser-cli-converge` is responsible for implementing the validated path in + `task.py` + +`automation.toml` creation and `browser-cli automation publish` stay inside the +orchestrator as optional end-stage branches. They are not part of the default +definition of done. + +## Skill Topology + +### `browser-cli-delivery` + +This is the only skill users should normally invoke directly for browser-task +delivery work. + +Its responsibilities are: + +- decide whether the task is ready for exploration +- decide when to invoke `browser-cli-explore` +- decide when exploration evidence is sufficient to invoke + `browser-cli-converge` +- decide when validation failure should send the process back to exploration +- ask the user whether to generate `automation.toml` +- ask the user whether to publish + +### `browser-cli-explore` + +This skill owns: + +- preflight context gathering relevant to the target site and environment +- choosing the exploration mode +- trying candidate browser paths with `browser-cli` +- capturing only durable findings +- updating `task.meta.json` with stable knowledge and reusable failure lessons + +### `browser-cli-converge` + +This skill owns: + +- implementing the validated path in `task.py` +- keeping code aligned with metadata +- encoding waits, assertions, and artifacts explicitly +- validating the task with `browser-cli task validate` +- running `browser-cli task run` when the task needs runtime proof + +## Artifact Responsibilities + +The new skill system must preserve a strict artifact split. + +### `task.py` + +`task.py` is the single source of execution logic. + +It should contain: + +- the validated success path +- explicit waits and assertions +- helper functions needed for deterministic replay +- artifact writing logic that belongs to task execution + +It should not contain: + +- raw exploration branches +- speculative fallback paths that were never validated +- chat-derived guesses standing in for evidence + +### `task.meta.json` + +`task.meta.json` is the durable sidecar for reusable knowledge gathered during +exploration and refined during convergence. + +The skill system should treat these sections as the core feedback sink: + +- `environment` +- `success_path` +- `recovery_hints` +- `failures` +- `knowledge` + +The metadata should capture: + +- environment assumptions +- stable execution steps +- key semantic refs or anchor patterns +- known wait points +- alternate paths when validated +- reusable failure lessons +- site-specific behavior such as lazy load, pagination, anti-bot, or + browser-state requirements + +The metadata should not capture: + +- raw logs +- unfiltered transcripts +- every exploratory dead end +- verbose artifact inventories with no future decision value + +### `automation.toml` + +`automation.toml` is optional. + +It should be created only when the user wants packaging or publication. +It must not become a second implementation layer for browser logic. + +## Default Done Criteria + +The default done state for the new workflow is: + +- a stable `task.py` +- a meaningful `task.meta.json` +- validation through `browser-cli task validate` +- runtime proof through `browser-cli task run` when needed by the task shape + +The process may stop there. + +Additional completion states are allowed only when the user asks for them: + +- generation of `automation.toml` +- publication via `browser-cli automation publish` + +## Stage Model + +The orchestrator should manage a strict state machine rather than a loose +checklist. + +### 1. Preflight + +The orchestrator must verify: + +- `browser-cli` is usable +- the Python environment that will run the task is understood +- browser/profile assumptions are known +- site constraints such as login, cookies, locale, or writable artifacts are + known + +If the live daemon and documented CLI appear out of sync, one +`browser-cli reload` is allowed before declaring a capability gap. + +If critical prerequisites are missing, the process stops with a fix plan. It +does not enter exploration blindly. + +### 2. Explore + +The orchestrator invokes `browser-cli-explore` to determine the task mode and +test the smallest viable path. + +Expected exploration modes include: + +- ref-driven +- content-first +- lazy-scroll +- login-state-first +- browser-state or network-assisted + +The exploration goal is not "browse around until success". It is to validate +which path is repeatable and what must be recorded for replay. + +### 3. Feedback Capture + +After each meaningful exploration round, durable findings should be distilled +into `task.meta.json`. + +This is a rolling process, not a single final documentation step. + +The rules are: + +- validated success behavior belongs in `success_path` +- repeatable waits, alternate routes, and stale-ref handling belong in + `recovery_hints` +- reusable failures belong in `failures` +- site behavior patterns belong in `knowledge` + +### 4. Converge + +The orchestrator invokes `browser-cli-converge` only when: + +- the success path is sufficiently clear +- the key assertions are known +- the fragile points and recovery logic are understood well enough to encode + +Convergence should turn evidence into deterministic task code. + +### 5. Validate + +Validation always starts with: + +- `browser-cli task validate ` + +If the task depends on runtime behavior or real inputs, the process should also +use: + +- `browser-cli task run ...` + +When validation fails, the orchestrator must decide whether the failure is: + +- an implementation bug inside the converged path +- or an evidence gap that requires returning to exploration + +If the metadata or explored evidence is insufficient, the process must go back +to exploration instead of stacking guesses in code. + +### 6. Optional Automation + +Only after the default done state is reached should the orchestrator ask the +user whether to: + +- generate `automation.toml` +- publish through `browser-cli automation publish` + +Both remain optional. + +## Skill Contracts + +### Contract For `browser-cli-delivery` + +Required behavior: + +- act as the main user-facing skill +- target `task.py` plus `task.meta.json` as the default endpoint +- invoke `browser-cli-explore` and `browser-cli-converge` when appropriate +- manage rollback from validation back to exploration +- keep `automation.toml` and publish behind explicit user choice + +Prohibited behavior: + +- treating one successful exploration as sufficient without checking stability +- skipping metadata capture and writing only `task.py` +- publishing by default + +### Contract For `browser-cli-explore` + +Required behavior: + +- use `browser-cli` as the primary browser execution path +- choose and validate the exploration mode +- gather only observations that change the next decision +- write durable learnings into `task.meta.json` +- stop once there is enough validated evidence to implement the task + +Prohibited behavior: + +- turning exploration code directly into the final `task.py` +- recording raw logs or chat transcripts in metadata +- promoting one-off page behavior into stable knowledge + +### Contract For `browser-cli-converge` + +Required behavior: + +- keep `task.py` as the only execution-logic truth source +- route browser interactions through `browser_cli.task_runtime.Flow` +- align task code with `task.meta.json` +- use repository task commands for validation +- keep temporary exploration-only logic out of the final task + +Prohibited behavior: + +- bypassing the Browser CLI task runtime as the main path +- allowing metadata and code to describe different workflows +- encoding unsupported guesses as waits, selectors, or alternate flows + +## Content Structure For The New Skill Files + +Each new `SKILL.md` should use the same compact structure: + +1. `Overview` +2. `When to Use` +3. `Hard Rules` +4. `Phase Order` +5. `Done Criteria` +6. `Common Mistakes` + +The content should stay concrete and repository-aware rather than generic. + +## Suggested File Layout + +```text +skills/ + browser-cli-delivery/ + SKILL.md + browser-cli-explore/ + SKILL.md + browser-cli-converge/ + SKILL.md +``` + +Legacy single-skill guidance may be reused as source material, but it should +not remain the primary design shape if it prevents the new layered model. + +## Migration Guidance + +This design allows the old single-skill layout to be retired after references +have been updated. + +A safe migration path is: + +1. add the three new skills +2. port the reusable repository-specific guidance into the new skills +3. update references or documentation that point to the legacy layout +4. remove the legacy directory once the repository no longer depends on it + +## Open Questions + +These questions do not block the design, but they should be resolved during +implementation: + +- whether `browser-cli-delivery` should reference the old skill during the + transition period +- whether the new skills need shared reference files beyond their `SKILL.md` +- whether repository docs should point users at the new top-level skill once it + exists + +## Acceptance Criteria + +The design is satisfied when: + +- the repository contains the three new skills with clear role boundaries +- the top-level skill defaults to `task.py` plus `task.meta.json` +- the exploration skill explicitly treats `task.meta.json` as the durable + feedback sink +- the convergence skill explicitly treats `task.py` as the only execution + logic artifact +- optional automation generation and publish remain gated by explicit user + choice +- the resulting guidance matches the current Browser CLI runtime and CLI + surfaces diff --git a/docs/superpowers/specs/2026-04-14-install-skills-design.md b/docs/superpowers/specs/2026-04-14-install-skills-design.md new file mode 100644 index 0000000..e410590 --- /dev/null +++ b/docs/superpowers/specs/2026-04-14-install-skills-design.md @@ -0,0 +1,368 @@ +# Browser CLI Install Skills Design + +Date: 2026-04-14 +Status: Drafted for review +Repo: `browser-cli` + +## Summary + +This design repairs `browser-cli install-skills` so it works for installed +users and exposes a narrow, intentional public contract. + +The command should: + +- install exactly three Browser CLI skills +- load them only from packaged assets shipped with the installed distribution +- default to `~/.agents/skills` +- allow `--target ` for explicit overrides +- fail fast if any required packaged skill is missing + +The three public skills are: + +1. `browser-cli-delivery` +2. `browser-cli-explore` +3. `browser-cli-converge` + +This design does not expand `install-skills` into a generic skill manager. It +only makes the current public command deterministic and release-safe. + +## Problem Statement + +`browser-cli install-skills` is currently exposed as a public command, but its +implementation depends on repository-top-level `skills/` content being present +at runtime. + +That assumption is not stable for installed users: + +- the current wheel does not ship the required skill assets +- the command tries to discover `skills/` via installation-path heuristics +- installed users can receive a runtime failure even though the command is + advertised as package-backed behavior + +This creates two release-quality problems: + +1. the public command is broken in the installed package +2. the installation surface is broader and more implicit than intended + +At the same time, the repository now has a more intentional three-layer Browser +CLI skill stack: + +- `browser-cli-delivery` as the main entrypoint +- `browser-cli-explore` for evidence gathering and metadata capture +- `browser-cli-converge` for validated path convergence into `task.py` + +The command should install that explicit stack, not whatever happens to live +under the repository `skills/` directory. + +## Goals + +- Make `browser-cli install-skills` work from an installed package. +- Restrict the public installation surface to the three Browser CLI skills. +- Stop relying on git-root or pip-location heuristics to find installable + assets. +- Add an explicit `--target` override while preserving the current default + target. +- Fail clearly when packaged assets are incomplete rather than silently + degrading. +- Add release-oriented verification so wheel regressions are caught before + publish. + +## Non-Goals + +- This design does not package or install `browser-cli-extension`. +- This design does not install every repository skill. +- This design does not introduce remote fetching or repo-clone fallback. +- This design does not change the Browser CLI daemon, task runtime, or + automation contracts. +- This design does not turn `install-skills` into a JSON-first machine API. + +## Chosen Direction + +Browser CLI should treat installable skills as packaged application assets, not +as incidental repository files. + +The implementation should move the release-backed skill source to a package-owned +location under `src/browser_cli/`, then access those files through +`importlib.resources`. + +The command should install only a hard-coded whitelist of public Browser CLI +skills: + +- `browser-cli-delivery` +- `browser-cli-explore` +- `browser-cli-converge` + +The command should no longer scan the repository `skills/` directory or infer a +git checkout. If packaged assets are missing, the command should fail because +that is a release defect. + +## Options Considered + +### 1. Explicit packaged whitelist + +Ship the three public skills as package-owned assets and install only those +directories. + +Advantages: + +- stable for wheel installs +- narrow public contract +- no dependency on repository layout at runtime +- easiest behavior to test as a release artifact + +Disadvantages: + +- requires explicit packaging configuration +- requires a small amount of asset-copy plumbing + +Chosen direction. + +### 2. Filter repository or packaged `skills/` at runtime + +Continue discovering a `skills/` directory, then filter for Browser CLI skill +names. + +Advantages: + +- smaller code change + +Disadvantages: + +- still depends on an implicit directory contract +- still couples runtime behavior to packaging accidents +- easier to widen public surface unintentionally + +Rejected. + +### 3. Require repository installs for `install-skills` + +Document that the command is only supported from a git checkout. + +Advantages: + +- avoids packaging work + +Disadvantages: + +- contradicts the command description +- weakens installed-user UX unnecessarily +- leaves a public command broken in the release artifact + +Rejected. + +## Public Contract + +### Command Shape + +`browser-cli install-skills` remains a top-level command. + +Arguments: + +- `--dry-run` +- `--target ` + +Default target: + +- `~/.agents/skills` + +### Installed Skill Set + +The command installs exactly these three skills: + +1. `browser-cli-delivery` +2. `browser-cli-explore` +3. `browser-cli-converge` + +No other skill directories are installed by this command, even if additional +skills exist in the repository. + +### Source of Truth + +The source of truth for the command is the packaged asset set included in the +installed distribution. + +Runtime fallback to: + +- git repository discovery +- repository root scanning +- network download + +is explicitly out of scope. + +### Update Behavior + +If a target skill directory already exists: + +- `--dry-run` reports `would update` +- a real run replaces the existing directory contents + +The command does not perform partial merges inside a skill directory. + +### Failure Semantics + +The command should fail immediately when: + +- any required packaged skill is missing +- packaged skill contents cannot be read +- the target directory cannot be created or written + +Failure should stop installation rather than returning a partial-success result. + +## Packaging Design + +### Asset Location + +The installable skill assets should live under this package-owned path: + +```text +src/browser_cli/packaged_skills/ + browser-cli-delivery/ + SKILL.md + browser-cli-explore/ + SKILL.md + browser-cli-converge/ + SKILL.md +``` + +These files must be present inside the wheel and readable through +`importlib.resources`. + +### Repository Editing Model + +The packaged path becomes the release-backed source for `install-skills`. + +Repository-top-level `skills/` may still exist for other workflows, but it is +not the runtime source for this command and must not silently widen the install +surface. + +This design intentionally prefers one release-backed truth over dual-source +runtime discovery. + +## Command Design + +The implementation should separate discovery, validation, and copy behavior. + +### Discovery Layer + +A helper should enumerate the packaged whitelist and verify that every required +skill exists before any target mutation begins. + +The helper should return a structured list of installable assets keyed by skill +name. + +### Installation Layer + +For each whitelisted skill: + +- resolve the target path under the chosen target directory +- report `would install` or `would update` during dry-run +- otherwise replace the target directory with the packaged contents + +Replacement should be directory-level replacement, not per-file merge logic. + +### CLI Layer + +The CLI handler should: + +- resolve `--target` or the default path +- call packaged-skill discovery +- run dry-run or real copy +- render the existing plain-text summary format + +The command may keep text output because it is a user-facing helper rather than +part of the daemon JSON contract. + +## Error Handling + +The command should distinguish packaging defects from user-environment failures. + +### Packaging Defects + +Examples: + +- one of the three public skills is absent from packaged assets +- a packaged skill lacks `SKILL.md` +- resource extraction fails unexpectedly + +These should produce a direct error that identifies the missing or unreadable +skill by name. + +### User Environment Failures + +Examples: + +- target path parent cannot be created +- an existing target directory cannot be removed +- copy to target fails due to permissions + +These should produce a direct error that includes the target path and underlying +filesystem cause. + +## Testing And Validation + +This change needs release-oriented validation, not only repo-local unit tests. + +### Unit Tests + +Add or update tests to cover: + +- only the three whitelisted skills are considered installable +- `--target` overrides the default path +- dry-run reports `would install` and `would update` +- missing packaged assets fail fast +- existing directories are replaced on real install + +### Build-Artifact Tests + +Add a test or guard that validates the built wheel contains the three packaged +skill directories and their `SKILL.md` files. + +### Installed-Smoke Validation + +Add a release-oriented smoke step that: + +1. builds the distribution artifacts +2. installs the wheel into a clean environment +3. runs `browser-cli install-skills --dry-run --target ` +4. verifies the command succeeds + +This should run before publish, because this class of regression is invisible in +repository-local development environments. + +## Files Expected To Change + +Primary implementation areas: + +- `src/browser_cli/commands/install_skills.py` +- `src/browser_cli/cli/main.py` +- `pyproject.toml` +- packaging metadata needed to include packaged skill assets in the wheel +- tests covering install-skills behavior and release artifacts + +New package-owned asset paths are expected under `src/browser_cli/`. + +## Risks + +- dual-maintenance risk if repository-top-level `skills/` and packaged assets + drift +- future accidental widening of the install surface if tests only verify command + success and not the exact installed set +- release regressions if artifact checks are omitted from CI or release flow + +The design addresses these risks by using: + +- an explicit skill whitelist +- package-owned runtime assets +- wheel-level validation + +## Acceptance Criteria + +This design is complete when all of the following are true: + +- `browser-cli install-skills` installs only the three Browser CLI skills +- the command works from an installed wheel without a git checkout +- `--target` overrides the default install directory +- missing packaged assets produce a hard failure +- the wheel contains the packaged skill assets +- CI or release validation exercises the built artifact, not just source-tree + execution diff --git a/pyproject.toml b/pyproject.toml index 1f93101..241fa67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,10 @@ default-groups = ["dev"] [tool.setuptools] package-dir = {"" = "src"} +include-package-data = true + +[tool.setuptools.package-data] +"browser_cli.packaged_skills" = ["*/SKILL.md"] [tool.setuptools.packages.find] where = ["src"] diff --git a/scripts/generate_packaged_skill_docs.py b/scripts/generate_packaged_skill_docs.py new file mode 100644 index 0000000..c8207f2 --- /dev/null +++ b/scripts/generate_packaged_skill_docs.py @@ -0,0 +1,46 @@ +"""Sync packaged Browser CLI skill docs from the canonical repo skills.""" + +from __future__ import annotations + +from pathlib import Path + +SKILL_NAMES = ( + "browser-cli-delivery", + "browser-cli-explore", + "browser-cli-converge", +) + + +def repo_root() -> Path: + return Path(__file__).resolve().parents[1] + + +def canonical_skill_doc_path(root: Path, skill_name: str) -> Path: + return root / "skills" / skill_name / "SKILL.md" + + +def packaged_skill_doc_path(root: Path, skill_name: str) -> Path: + return root / "src" / "browser_cli" / "packaged_skills" / skill_name / "SKILL.md" + + +def expected_packaged_skill_docs(root: Path) -> dict[str, str]: + return { + skill_name: canonical_skill_doc_path(root, skill_name).read_text(encoding="utf-8") + for skill_name in SKILL_NAMES + } + + +def sync_packaged_skill_docs(root: Path) -> None: + for skill_name, content in expected_packaged_skill_docs(root).items(): + destination = packaged_skill_doc_path(root, skill_name) + destination.parent.mkdir(parents=True, exist_ok=True) + destination.write_text(content, encoding="utf-8") + + +def main() -> int: + sync_packaged_skill_docs(repo_root()) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/guards/architecture.py b/scripts/guards/architecture.py index b35bb5e..77b5deb 100644 --- a/scripts/guards/architecture.py +++ b/scripts/guards/architecture.py @@ -45,6 +45,7 @@ "drivers": {"browser", "errors", "extension", "profiles", "refs"}, "extension": {"constants", "errors", "extension"}, "outputs": set(), + "packaged_skills": set(), "profiles": {"errors"}, "refs": {"refs"}, "tabs": {"constants", "errors"}, diff --git a/scripts/guards/docs_sync.py b/scripts/guards/docs_sync.py index 6291138..4ce5efe 100644 --- a/scripts/guards/docs_sync.py +++ b/scripts/guards/docs_sync.py @@ -19,6 +19,7 @@ "`scripts/test.sh`", "`scripts/guard.sh`", "`scripts/check.sh`", + "`browser-cli install-skills` installs the packaged Browser CLI skills into `~/.agents/skills` by default and `--target` overrides the destination root.", ] REQUIRED_README_PHRASES = [ diff --git a/skills/browser-cli-converge/SKILL.md b/skills/browser-cli-converge/SKILL.md new file mode 100644 index 0000000..259babd --- /dev/null +++ b/skills/browser-cli-converge/SKILL.md @@ -0,0 +1,62 @@ +--- +name: browser-cli-converge +description: Turn validated Browser CLI exploration into stable task.py execution logic and task validation. +--- + +# Browser CLI Converge + +## Overview + +Use this skill after exploration has already validated the success path. +Its job is to encode that evidence into stable `task.py` logic and keep the +implementation aligned with `task.meta.json`. + +## When to Use + +Use this skill when: + +- the success path is already validated +- the task now needs executable Browser CLI task code +- waits, assertions, and artifacts are known well enough to encode + +Do not use this skill when: + +- the site still has unresolved evidence gaps +- the task mode is still unclear +- validation failures show the metadata is incomplete + +## Hard Rules + +- task.py is the single source of execution logic +- browser interactions must go through `browser_cli.task_runtime.Flow` +- task code must stay aligned with task.meta.json +- keep exploration-only retries, branches, and debug logic out of the final task +- validate with `browser-cli task validate` +- use `browser-cli task run` when runtime proof is needed +- if validation exposes an evidence gap, go back to `browser-cli-explore` + +## Phase Order + +1. Read the validated `task.meta.json` +2. Encode the stable success path in `task.py` +3. Add explicit waits, assertions, and artifact writes +4. Verify metadata-code alignment +5. Run `browser-cli task validate` +6. Run `browser-cli task run` if the task shape requires live proof +7. If evidence is missing, return to exploration instead of guessing + +## Done Criteria + +This skill is complete when: + +- `task.py` replays the validated path +- waits and assertions are explicit +- the code and metadata describe the same workflow +- task validation passes + +## Common Mistakes + +- bypassing the task runtime with direct Playwright +- encoding guesses instead of validated waits or refs +- letting metadata and code drift apart +- patching around a missing exploration lesson instead of going back diff --git a/skills/browser-cli-delivery/SKILL.md b/skills/browser-cli-delivery/SKILL.md new file mode 100644 index 0000000..25d02f6 --- /dev/null +++ b/skills/browser-cli-delivery/SKILL.md @@ -0,0 +1,61 @@ +--- +name: browser-cli-delivery +description: Orchestrate Browser CLI exploration, convergence, validation, and optional automation packaging for reusable web tasks. +--- + +# Browser CLI Delivery + +## Overview + +Use this as the main entrypoint when the user wants a reusable Browser CLI web +task rather than one-off browsing. The default endpoint is stable +`task.py + task.meta.json`. `automation.toml` generation and publish are +optional user-driven branches. + +## When to Use + +Use this skill when: + +- the user wants a reusable browser task +- the work may require exploration, iteration, and validation +- the final deliverable should match Browser CLI task artifacts + +Do not use this skill when: + +- one-off browsing is enough +- the task is not Browser CLI based +- the work is already scoped to only one lower-level skill + +## Hard Rules + +- this is the main user-facing skill +- call `browser-cli-explore` when evidence is missing +- call `browser-cli-converge` when the success path is validated +- default completion is `task.py + task.meta.json` +- `automation.toml` and publish are optional and require user choice +- If validation fails because evidence is missing, go back to explore +- do not publish by default + +## Phase Order + +1. Preflight: confirm Browser CLI, Python environment, login/profile, and site assumptions +2. Explore: call `browser-cli-explore` to validate the task mode and capture feedback +3. Converge: call `browser-cli-converge` to encode the stable path in `task.py` +4. Validate: run task validation and decide whether to fix code or return to explore +5. Optional automation: ask whether to create `automation.toml` +6. Optional publish: ask whether to run Browser CLI automation publish + +## Done Criteria + +This skill is complete when: + +- `task.py + task.meta.json` are stable +- validation passed +- optional automation work is either completed or intentionally skipped by the user + +## Common Mistakes + +- skipping metadata capture +- converging before the success path is real +- generating automation packaging too early +- treating one successful page run as enough evidence diff --git a/skills/browser-cli-explore-delivery/SKILL.md b/skills/browser-cli-explore-delivery/SKILL.md deleted file mode 100644 index 8989e05..0000000 --- a/skills/browser-cli-explore-delivery/SKILL.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -name: browser-cli-explore-delivery -description: Use when website work must end as reusable browser-cli task artifacts rather than one-off chat notes, especially when success depends on validating runtime assumptions and choosing the right exploration mode. ---- - -# Browser CLI Explore Delivery - -## Overview - -Use `browser-cli` as the browser backend and converge successful work into -`task.py` plus `task.meta.json`. Keep automation publication separate from task -logic. - -## When to Use - -Use this skill when: - -- the browser path should become a reusable task -- the site depends on real browser execution, cookies, login context, or page state -- the output should end as `task.py` plus `task.meta.json` - -Do not use this skill when: - -- one-off browsing is enough -- the task is pure API/data work with no browser dependency -- `browser-cli` cannot be installed or used - -## Phase Order - -Always follow this order: - -1. Preflight -2. Choose task mode -3. Explore with `browser-cli` -4. Converge to `task.py` -5. Distill `task.meta.json` -6. Templates -7. Optional publish gate -8. Publish `automation.toml` or `browser-cli automation publish` - -Never skip from exploration straight to publication. - -## Quick Decisions - -- Need dependency, runtime, profile, or artifact checks first: read - [`references/preflight-and-runtime.md`](references/preflight-and-runtime.md) -- Need to decide whether the task is content-first, browser-state-first, login-state-first, - or scroll-first: read [`references/task-modes.md`](references/task-modes.md) -- Need to decide which inputs users should actually see: read - [`references/task-input-design.md`](references/task-input-design.md) - -## 1. Preflight - -- confirm `browser-cli`, `browser_cli`, Python, browser, and task-specific Python deps -- confirm the task will run in the same Python environment you just validated -- confirm profile assumptions such as login state, cookies, locale, and writable artifacts -- if the advertised CLI surface and the live daemon disagree, do one `browser-cli reload` before declaring a capability gap -- if anything is missing, produce a short install/fix plan and ask before changing the environment - -Do not silently install `browser-cli`, browser dependencies, or Python packages. - -## 2. Browser CLI Rules - -- `browser-cli` is the only browser execution backend -- do not switch to direct Playwright as the main path -- use the smallest reliable signal for the current task mode -- stop once the successful path is deterministic -- stop once you hit a real capability gap; do not hide it behind retries -- a single runtime reset is allowed when a command is documented by the current CLI but rejected by the live daemon or backend - -## 3. Explore - -- capture only observations that change the next decision -- verify each critical step locally before assuming success -- refresh snapshots only when semantics changed -- keep exploration token usage low with targeted checks - -## 4. Converge to `task.py` - -- `task.py` is the single source of execution logic -- route browser actions through `browser_cli.task_runtime` -- helper functions, bounded retries, loops, and assertions are allowed -- direct Playwright and raw daemon handling are not the primary path -- keep exploration-only waits and retry knobs as internal defaults unless users benefit from controlling them -- there is no required `browser-cli task init`; the agent may create the task directory directly where the project expects it, including `~/.browser-cli/tasks//` - -## 5. Distill `task.meta.json` - -- keep: `task`, `environment`, `success_path`, `recovery_hints`, `failures`, `knowledge` -- record reusable environment assumptions, recovery patterns, and mode-specific lessons -- preserve failures that teach something reusable -- do not dump raw logs or chat transcripts - -## 6. Templates Are Mandatory - -When producing or modifying task deliverables, start from the repository -templates and preserve their contract: - -- `tasks/_templates/task.py` -- `tasks/_templates/task.meta.json` -- `tasks/_templates/automation.toml` - -The agent must not invent an ad hoc task structure or omit required metadata -sections. `browser-cli task validate` must succeed before publication. - -## 7. Publish Gate - -Move to automation publication only when both are true: - -- the task is already stable -- the user approved publication - -If not, stop after `task.py` and `task.meta.json`. - -## 8. Publish `automation.toml` - -`automation.toml` wraps the task or published snapshot. It does not -re-implement browser logic. - -Use these surfaces: - -- `browser-cli task validate ` -- `browser-cli task run --set key=value` -- `browser-cli automation publish ` -- `browser-cli automation import ` -- `browser-cli automation export --output ` - -Prefer `browser-cli automation publish` when the user wants a durable published -snapshot. It creates a new immutable snapshot version and auto-imports it into -the automation service. - -## Done Criteria - -Stop only when one of these states is true: - -- preflight failed and the user declined installation -- a stable `task.py` plus `task.meta.json` has been produced -- the user approved publication and a working automation snapshot has also been produced - -## Common Mistakes - -- validating `browser-cli` in one Python environment and executing the task in another -- choosing the wrong task mode and exploring the page with the wrong signal first -- exposing exploration-only knobs as user-facing inputs -- treating a stale daemon/runtime mismatch as a permanent missing feature before trying one `browser-cli reload` -- retrying around a missing capability instead of stopping to confirm -- leaving the successful path only in chat instead of `task.py` -- writing a custom `task.meta.json` shape instead of following the required template diff --git a/skills/browser-cli-explore-delivery/agents/openai.yaml b/skills/browser-cli-explore-delivery/agents/openai.yaml deleted file mode 100644 index 7a36a6f..0000000 --- a/skills/browser-cli-explore-delivery/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Browser CLI Explore Delivery" - short_description: "Explore sites into Browser CLI tasks" diff --git a/skills/browser-cli-explore-delivery/references/preflight-and-runtime.md b/skills/browser-cli-explore-delivery/references/preflight-and-runtime.md deleted file mode 100644 index 456e9a9..0000000 --- a/skills/browser-cli-explore-delivery/references/preflight-and-runtime.md +++ /dev/null @@ -1,59 +0,0 @@ -# Preflight And Runtime - -Use this reference before exploration when task success may depend on the -execution environment as much as the browser steps. - -## What to Prove - -- `browser-cli` is callable -- `browser_cli` is importable -- the Python environment that will execute `task.py` is the same one you just validated -- Chrome and the `browser-cli` runtime are usable -- task-specific Python deps exist in that same environment -- the target profile assumptions are true: login state, cookies, locale, storage, output dir - -## Minimum Checks - -- `which browser-cli` -- `python -c 'import browser_cli; print(browser_cli.__file__)'` -- any extra task deps in the same Python, such as `requests` -- `browser-cli status` -- a writable artifacts directory - -If the task will later be run by `browser-cli task run`, the automation service, -or plain Python, validate that exact entry environment now. Do not validate with -one interpreter and execute with another. - -## Profile And Site Assumptions - -Check these before you explore too far: - -- does the site require login -- does the current browser profile already have the needed login or cookies -- does the site depend on geo, locale, or persistent storage -- does the task need the full browser cookie jar rather than just `document.cookie` - -If the task needs a login state that is missing, stop and confirm rather than -inventing a fake path. - -## Early-Stop Signals - -Stop and confirm quickly when any of these are true: - -- the task needs response bodies, CDP data, or another driver feature the runtime does not expose -- the task depends on a missing login state or profile assumption -- the validated Python environment is not the one that will run the task -- the browser runtime is healthy enough to open a page but not healthy enough to provide the needed signal - -Do not hide these gaps behind repeated retries. - -## Douyin Lesson - -For signed Douyin detail requests, the stable path required: - -- real browser navigation to mint signed request URLs -- the full browser cookie jar from `browser-cli cookies` -- replay with matching `Referer`, `User-Agent`, and csrf headers - -Replaying the detail URL with only `document.cookie` returned HTTP `200` with -an empty body. That was a runtime assumption failure, not a retry problem. diff --git a/skills/browser-cli-explore-delivery/references/task-input-design.md b/skills/browser-cli-explore-delivery/references/task-input-design.md deleted file mode 100644 index 3c9435c..0000000 --- a/skills/browser-cli-explore-delivery/references/task-input-design.md +++ /dev/null @@ -1,52 +0,0 @@ -# Task Input Design - -Design inputs for the user who will rerun the task, not for the agent who -explored it once. - -## Default Rule - -Expose only the inputs users actually care about. Keep exploration knobs as -internal defaults unless they are likely to matter during normal reruns. - -## Usually User-Facing - -- target URL, query, or identifier -- output path or destination -- output filename or overwrite behavior -- filters, scope, date range, or count limits -- explicit profile selection only if users really need to choose it - -## Usually Internal - -- wait loops -- retry counts -- polling intervals -- exploration timeouts -- transient recovery toggles - -Tune these during exploration, then encode stable defaults in `task.py`. - -## Good Defaults - -If the user did not specify a value but a sensible default improves usability, -choose one and document it in metadata. - -Good examples: - -- default download path in the task artifacts directory -- filename derived from a stable content identifier - -Bad examples: - -- exposing `wait_rounds`, `wait_seconds`, or similar knobs by default just because the agent used them during exploration - -## Metadata Expectations - -Record these in `task.meta.json`: - -- which inputs are user-facing -- which defaults were chosen for rerun stability -- which internal knobs remain hidden and why - -The goal is a task that feels obvious to run later, not a transcript of the -exploration process. diff --git a/skills/browser-cli-explore-delivery/references/task-modes.md b/skills/browser-cli-explore-delivery/references/task-modes.md deleted file mode 100644 index 55eea99..0000000 --- a/skills/browser-cli-explore-delivery/references/task-modes.md +++ /dev/null @@ -1,87 +0,0 @@ -# Task Modes - -Choose the primary task mode before exploring. Start with one mode and switch -only when evidence demands it. - -## Content-First - -Use when: - -- the goal is extracting rendered text, tables, cards, or form results -- the page meaning is visible in DOM semantics - -Prefer: - -- `snapshot` -- semantic refs -- `click`, `fill`, `wait`, `html` - -Avoid: - -- overusing `eval` when refs and rendered HTML already prove the path - -## Browser-State-First - -Use when: - -- success depends on signed URLs, browser-generated tokens, cookies, storage, or performance state -- the page is easier to reason about through browser state than visible DOM -- anti-bot behavior makes direct replay fragile - -Prefer: - -- `open` -- `eval` -- `performance` entries -- cookies and storage capture -- Python replay with the validated browser context - -Avoid: - -- assuming `network` capture already gives you response bodies -- defaulting to semantic refs when the real signal lives in browser state - -This was the right mode for the Douyin download task. - -## Login-State-First - -Use when: - -- the task depends on an existing logged-in profile -- the main risk is session validity, not page interaction mechanics - -Prefer: - -- verify login state immediately -- record profile assumptions in metadata -- stop early if the needed session is absent - -Avoid: - -- exploring deep flows before proving the profile is usable - -## Scroll-First - -Use when: - -- content appears only after incremental loading -- the main difficulty is stabilization rather than interaction - -Prefer: - -- bounded scroll loops -- explicit stability checks -- artifacts that record the stabilization history - -Avoid: - -- open-ended scrolling with no stop rule - -## Response-Body Gate - -If the task requires direct access to response bodies, decide that up front. - -- If `browser-cli` already exposes the response body you need, use it. -- If it only exposes request metadata, either find a stable browser-state-first fallback or stop and confirm the runtime gap. - -Do not discover this after a long exploration loop if you can prove it early. diff --git a/skills/browser-cli-explore/SKILL.md b/skills/browser-cli-explore/SKILL.md new file mode 100644 index 0000000..127daf3 --- /dev/null +++ b/skills/browser-cli-explore/SKILL.md @@ -0,0 +1,77 @@ +--- +name: browser-cli-explore +description: Explore real websites with Browser CLI, validate task mode, and distill durable feedback into task metadata. +--- + +# Browser CLI Explore + +## Overview + +Use `browser-cli` to explore a site, test candidate paths, and distill only the +durable findings needed to build a reusable task. The primary output of this +skill is structured knowledge in `task.meta.json`, not final task code. + +## When to Use + +Use this skill when: + +- a web task still needs exploration or validation +- the page depends on real browser state, cookies, login, or rendering +- the next useful artifact is better task metadata, not yet final `task.py` + +Do not use this skill when: + +- the success path is already validated end to end +- the work is only task-code refactoring with no evidence gap +- the task is pure API work with no Browser CLI dependency + +## Hard Rules + +- browser-cli is the primary browser execution path +- choose the task mode before broad exploration +- capture only observations that change the next decision +- update `task.meta.json` as a rolling feedback sink +- treat these metadata sections as required destinations for durable knowledge: + `environment`, `success_path`, `recovery_hints`, `failures`, `knowledge` +- stop once the evidence is strong enough for deterministic implementation +- Do not record raw logs, chat transcripts, or exploratory dead ends in metadata +- Do not turn one lucky run into stable knowledge without a verification step + +## Phase Order + +1. Confirm the site-specific preflight assumptions: + login state, cookies, locale, browser profile, writable artifacts, Python env +2. Choose the task mode: + `ref-driven`, `content-first`, `lazy-scroll`, `login-state-first`, or + `browser-state/network-assisted` +3. Explore with the smallest reliable Browser CLI signal +4. Capture durable findings into `task.meta.json` +5. Stop when the success path, waits, refs, and failure lessons are clear enough + for `task.py` + +## Metadata Capture Rules + +- `environment`: site, entry URL, login requirements, profile assumptions, + browser assumptions +- `success_path`: validated steps, key refs, assertions, artifacts +- `recovery_hints`: retryable steps, alternate paths, stale-ref strategy, wait + points, anti-bot recovery +- `failures`: repeatable failure modes and the lesson each one teaches +- `knowledge`: stable selectors/roles, semantic-ref notes, pagination, + lazy-load, anti-bot, and output interpretation rules + +## Done Criteria + +This skill is complete when: + +- the task mode is known +- the stable path is understood +- the fragile points are documented +- `task.meta.json` contains enough evidence for `browser-cli-converge` + +## Common Mistakes + +- exploring with direct Playwright instead of Browser CLI +- jumping straight from browsing to `task.py` +- keeping the useful lessons only in chat +- recording logs instead of reusable metadata diff --git a/src/browser_cli/cli/main.py b/src/browser_cli/cli/main.py index 8c6a1db..967ac35 100644 --- a/src/browser_cli/cli/main.py +++ b/src/browser_cli/cli/main.py @@ -231,14 +231,18 @@ def build_parser() -> argparse.ArgumentParser: skills_parser = subparsers.add_parser( "install-skills", - help="Install packaged skills to ~/.agents/skills.", - description="Copy bundled skills from the package to the user's skills directory.", + help="Install packaged skills for Browser CLI to a skills directory.", + description="Copy packaged skills for Browser CLI from the installed package to the target skills directory.", ) skills_parser.add_argument( "--dry-run", action="store_true", help="Show what would be installed without making changes.", ) + skills_parser.add_argument( + "--target", + help="Optional target directory. Defaults to ~/.agents/skills.", + ) skills_parser.set_defaults(handler=run_install_skills_command) for spec in get_action_specs(): diff --git a/src/browser_cli/commands/install_skills.py b/src/browser_cli/commands/install_skills.py index cc3c4ba..dceee15 100644 --- a/src/browser_cli/commands/install_skills.py +++ b/src/browser_cli/commands/install_skills.py @@ -1,147 +1,138 @@ -"""Command to install bundled skills to the user's skills directory.""" +"""Install packaged Browser CLI skills into a target skills directory.""" from __future__ import annotations import argparse +import os import shutil -import subprocess -import sys +import uuid +from dataclasses import dataclass +from importlib import resources +from importlib.abc import Traversable from pathlib import Path -from typing import TYPE_CHECKING -if TYPE_CHECKING: - from collections.abc import Sequence +from browser_cli.errors import InvalidInputError, OperationFailedError +PUBLIC_SKILL_NAMES = ( + "browser-cli-converge", + "browser-cli-delivery", + "browser-cli-explore", +) -def _get_pip_show_location() -> Path | None: - """Get package installation root from pip show.""" - try: - result = subprocess.run( - [sys.executable, "-m", "pip", "show", "browser-cli"], - capture_output=True, - text=True, - check=True, - ) - for line in result.stdout.splitlines(): - if line.startswith("Location:"): - location = line.split(":", 1)[1].strip() - return Path(location) - except (subprocess.CalledProcessError, FileNotFoundError): - pass - return None - - -def _find_git_root() -> Path | None: - """Find git repository root (for development mode).""" - import browser_cli - - package_path = Path(browser_cli.__file__).parent - current = package_path - while current.parent != current: - if (current / ".git").exists(): - return current - current = current.parent - return None - - -def get_skills_source_path() -> Path | None: - """Get the path to bundled skills. - - Tries pip installation location first, then git root for development. - """ - # Try pip installation location - pip_location = _get_pip_show_location() - if pip_location: - skills_path = pip_location / "skills" - if skills_path.exists() and skills_path.is_dir(): - return skills_path - - # Try git root (development mode) - git_root = _find_git_root() - if git_root: - skills_path = git_root / "skills" - if skills_path.exists() and skills_path.is_dir(): - return skills_path - - return None - - -def get_skills_target_path() -> Path: - """Get the target path for skills installation.""" - return Path.home() / ".agents" / "skills" +@dataclass(frozen=True, slots=True) +class PackagedSkill: + name: str + source: Traversable | Path -def install_skills( - source: Path, - target: Path, - dry_run: bool = False, -) -> Sequence[tuple[str, str]]: - """Install skills from source to target directory. - Args: - source: Path to bundled skills directory. - target: Path to user's skills directory. - dry_run: If True, only report what would be done. +def _packaged_skills_root() -> Traversable: + return resources.files("browser_cli.packaged_skills") - Returns: - List of (skill_name, status) tuples. - """ - results: list[tuple[str, str]] = [] - if not dry_run: - target.mkdir(parents=True, exist_ok=True) - - for skill_dir in sorted(source.iterdir()): - if not skill_dir.is_dir(): - continue - - skill_target = target / skill_dir.name - - if skill_target.exists(): - if dry_run: - results.append((skill_dir.name, "would update")) - else: - shutil.rmtree(skill_target) - shutil.copytree(skill_dir, skill_target) - results.append((skill_dir.name, "updated")) - else: - if dry_run: - results.append((skill_dir.name, "would install")) - else: - shutil.copytree(skill_dir, skill_target) - results.append((skill_dir.name, "installed")) - - return results +def discover_packaged_skills() -> list[PackagedSkill]: + root = _packaged_skills_root() + discovered: list[PackagedSkill] = [] + for name in PUBLIC_SKILL_NAMES: + skill_root = root.joinpath(name) + if not skill_root.is_dir(): + raise InvalidInputError(f"Packaged skill is missing from this build: {name}") + skill_doc = skill_root.joinpath("SKILL.md") + if not skill_doc.is_file(): + raise InvalidInputError( + f"Packaged skill is incomplete in this build: {name} is missing SKILL.md" + ) + discovered.append(PackagedSkill(name=name, source=skill_root)) + return discovered -def run_install_skills_command(args: argparse.Namespace) -> str | None: - """Run the install-skills command. +def get_skills_target_path(target: str | None) -> Path: + if target: + return Path(target).expanduser().resolve() + return Path.home() / ".agents" / "skills" - Args: - args: Parsed CLI arguments. - Returns: - Output message to print, or None on failure. - """ - source = get_skills_source_path() - if source is None: - sys.stderr.write("Error: bundled skills not found in package\n") - return None +def install_skills_from_paths( + skills: list[PackagedSkill], + target: Path, + *, + dry_run: bool = False, +) -> list[tuple[str, str]]: + results: list[tuple[str, str]] = [] + if not dry_run: + try: + target.mkdir(parents=True, exist_ok=True) + except OSError as exc: + raise OperationFailedError( + f"Could not create skills target directory {target}: {exc}" + ) from exc + for skill in skills: + destination = target / skill.name + exists = destination.exists() + status = ( + "would update" + if dry_run and exists + else "would install" + if dry_run + else "updated" + if exists + else "installed" + ) + if not dry_run: + _install_one_skill(skill, destination) + results.append((skill.name, status)) + return results - target = get_skills_target_path() - results = install_skills(source, target, dry_run=args.dry_run) +def _install_one_skill(skill: PackagedSkill, destination: Path) -> None: + source = skill.source if isinstance(skill.source, Traversable) else Path(skill.source) + staged = destination.with_name(f"{destination.name}.tmp-{uuid.uuid4().hex}") + try: + _copy_skill_tree(source, staged) + backup = _swap_staged_directory(staged, destination) + if backup is not None: + shutil.rmtree(backup, ignore_errors=True) + except OSError as exc: + if staged.exists(): + shutil.rmtree(staged, ignore_errors=True) + raise OperationFailedError( + f"Could not install skill {skill.name} to {destination}: {exc}" + ) from exc + + +def _copy_skill_tree(source: Traversable | Path, destination: Path) -> None: + destination.mkdir(parents=True, exist_ok=False) + for child in source.iterdir(): + child_destination = destination / child.name + if child.is_dir(): + _copy_skill_tree(child, child_destination) + continue + with child.open("rb") as handle, child_destination.open("wb") as output: + shutil.copyfileobj(handle, output) - if not results: - return "No skills to install.\n" +def _swap_staged_directory(staged: Path, destination: Path) -> Path | None: + backup: Path | None = None + if destination.exists(): + backup = destination.with_name(f"{destination.name}.bak-{uuid.uuid4().hex}") + os.replace(destination, backup) + try: + os.replace(staged, destination) + except OSError: + if backup is not None and backup.exists() and not destination.exists(): + os.replace(backup, destination) + raise + return backup + + +def run_install_skills_command(args: argparse.Namespace) -> str: + skills = discover_packaged_skills() + target = get_skills_target_path(getattr(args, "target", None)) + results = install_skills_from_paths(skills, target, dry_run=bool(args.dry_run)) mode = "(dry-run) " if args.dry_run else "" lines = [f"{mode}Installing skills to {target}:", ""] - for skill_name, status in results: lines.append(f" {skill_name}: {status}") - lines.append("") lines.append(f"Total: {len(results)} skill(s)") - return "\n".join(lines) + "\n" diff --git a/src/browser_cli/packaged_skills/__init__.py b/src/browser_cli/packaged_skills/__init__.py new file mode 100644 index 0000000..46759a0 --- /dev/null +++ b/src/browser_cli/packaged_skills/__init__.py @@ -0,0 +1 @@ +"""Packaged Browser CLI skills shipped with the installed distribution.""" diff --git a/src/browser_cli/packaged_skills/browser-cli-converge/SKILL.md b/src/browser_cli/packaged_skills/browser-cli-converge/SKILL.md new file mode 100644 index 0000000..259babd --- /dev/null +++ b/src/browser_cli/packaged_skills/browser-cli-converge/SKILL.md @@ -0,0 +1,62 @@ +--- +name: browser-cli-converge +description: Turn validated Browser CLI exploration into stable task.py execution logic and task validation. +--- + +# Browser CLI Converge + +## Overview + +Use this skill after exploration has already validated the success path. +Its job is to encode that evidence into stable `task.py` logic and keep the +implementation aligned with `task.meta.json`. + +## When to Use + +Use this skill when: + +- the success path is already validated +- the task now needs executable Browser CLI task code +- waits, assertions, and artifacts are known well enough to encode + +Do not use this skill when: + +- the site still has unresolved evidence gaps +- the task mode is still unclear +- validation failures show the metadata is incomplete + +## Hard Rules + +- task.py is the single source of execution logic +- browser interactions must go through `browser_cli.task_runtime.Flow` +- task code must stay aligned with task.meta.json +- keep exploration-only retries, branches, and debug logic out of the final task +- validate with `browser-cli task validate` +- use `browser-cli task run` when runtime proof is needed +- if validation exposes an evidence gap, go back to `browser-cli-explore` + +## Phase Order + +1. Read the validated `task.meta.json` +2. Encode the stable success path in `task.py` +3. Add explicit waits, assertions, and artifact writes +4. Verify metadata-code alignment +5. Run `browser-cli task validate` +6. Run `browser-cli task run` if the task shape requires live proof +7. If evidence is missing, return to exploration instead of guessing + +## Done Criteria + +This skill is complete when: + +- `task.py` replays the validated path +- waits and assertions are explicit +- the code and metadata describe the same workflow +- task validation passes + +## Common Mistakes + +- bypassing the task runtime with direct Playwright +- encoding guesses instead of validated waits or refs +- letting metadata and code drift apart +- patching around a missing exploration lesson instead of going back diff --git a/src/browser_cli/packaged_skills/browser-cli-delivery/SKILL.md b/src/browser_cli/packaged_skills/browser-cli-delivery/SKILL.md new file mode 100644 index 0000000..25d02f6 --- /dev/null +++ b/src/browser_cli/packaged_skills/browser-cli-delivery/SKILL.md @@ -0,0 +1,61 @@ +--- +name: browser-cli-delivery +description: Orchestrate Browser CLI exploration, convergence, validation, and optional automation packaging for reusable web tasks. +--- + +# Browser CLI Delivery + +## Overview + +Use this as the main entrypoint when the user wants a reusable Browser CLI web +task rather than one-off browsing. The default endpoint is stable +`task.py + task.meta.json`. `automation.toml` generation and publish are +optional user-driven branches. + +## When to Use + +Use this skill when: + +- the user wants a reusable browser task +- the work may require exploration, iteration, and validation +- the final deliverable should match Browser CLI task artifacts + +Do not use this skill when: + +- one-off browsing is enough +- the task is not Browser CLI based +- the work is already scoped to only one lower-level skill + +## Hard Rules + +- this is the main user-facing skill +- call `browser-cli-explore` when evidence is missing +- call `browser-cli-converge` when the success path is validated +- default completion is `task.py + task.meta.json` +- `automation.toml` and publish are optional and require user choice +- If validation fails because evidence is missing, go back to explore +- do not publish by default + +## Phase Order + +1. Preflight: confirm Browser CLI, Python environment, login/profile, and site assumptions +2. Explore: call `browser-cli-explore` to validate the task mode and capture feedback +3. Converge: call `browser-cli-converge` to encode the stable path in `task.py` +4. Validate: run task validation and decide whether to fix code or return to explore +5. Optional automation: ask whether to create `automation.toml` +6. Optional publish: ask whether to run Browser CLI automation publish + +## Done Criteria + +This skill is complete when: + +- `task.py + task.meta.json` are stable +- validation passed +- optional automation work is either completed or intentionally skipped by the user + +## Common Mistakes + +- skipping metadata capture +- converging before the success path is real +- generating automation packaging too early +- treating one successful page run as enough evidence diff --git a/src/browser_cli/packaged_skills/browser-cli-explore/SKILL.md b/src/browser_cli/packaged_skills/browser-cli-explore/SKILL.md new file mode 100644 index 0000000..127daf3 --- /dev/null +++ b/src/browser_cli/packaged_skills/browser-cli-explore/SKILL.md @@ -0,0 +1,77 @@ +--- +name: browser-cli-explore +description: Explore real websites with Browser CLI, validate task mode, and distill durable feedback into task metadata. +--- + +# Browser CLI Explore + +## Overview + +Use `browser-cli` to explore a site, test candidate paths, and distill only the +durable findings needed to build a reusable task. The primary output of this +skill is structured knowledge in `task.meta.json`, not final task code. + +## When to Use + +Use this skill when: + +- a web task still needs exploration or validation +- the page depends on real browser state, cookies, login, or rendering +- the next useful artifact is better task metadata, not yet final `task.py` + +Do not use this skill when: + +- the success path is already validated end to end +- the work is only task-code refactoring with no evidence gap +- the task is pure API work with no Browser CLI dependency + +## Hard Rules + +- browser-cli is the primary browser execution path +- choose the task mode before broad exploration +- capture only observations that change the next decision +- update `task.meta.json` as a rolling feedback sink +- treat these metadata sections as required destinations for durable knowledge: + `environment`, `success_path`, `recovery_hints`, `failures`, `knowledge` +- stop once the evidence is strong enough for deterministic implementation +- Do not record raw logs, chat transcripts, or exploratory dead ends in metadata +- Do not turn one lucky run into stable knowledge without a verification step + +## Phase Order + +1. Confirm the site-specific preflight assumptions: + login state, cookies, locale, browser profile, writable artifacts, Python env +2. Choose the task mode: + `ref-driven`, `content-first`, `lazy-scroll`, `login-state-first`, or + `browser-state/network-assisted` +3. Explore with the smallest reliable Browser CLI signal +4. Capture durable findings into `task.meta.json` +5. Stop when the success path, waits, refs, and failure lessons are clear enough + for `task.py` + +## Metadata Capture Rules + +- `environment`: site, entry URL, login requirements, profile assumptions, + browser assumptions +- `success_path`: validated steps, key refs, assertions, artifacts +- `recovery_hints`: retryable steps, alternate paths, stale-ref strategy, wait + points, anti-bot recovery +- `failures`: repeatable failure modes and the lesson each one teaches +- `knowledge`: stable selectors/roles, semantic-ref notes, pagination, + lazy-load, anti-bot, and output interpretation rules + +## Done Criteria + +This skill is complete when: + +- the task mode is known +- the stable path is understood +- the fragile points are documented +- `task.meta.json` contains enough evidence for `browser-cli-converge` + +## Common Mistakes + +- exploring with direct Playwright instead of Browser CLI +- jumping straight from browsing to `task.py` +- keeping the useful lessons only in chat +- recording logs instead of reusable metadata diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index df1ba77..d3afa99 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -164,3 +164,12 @@ def test_reload_command_renders_summary(capsys) -> None: captured = capsys.readouterr() assert exit_code == 0 assert captured.out == "Reload: complete\n" + + +def test_install_skills_help_mentions_target(capsys) -> None: + exit_code = main(["install-skills", "--help"]) + captured = capsys.readouterr() + assert exit_code == 0 + assert "--dry-run" in captured.out + assert "--target" in captured.out + assert "packaged skills" in captured.out.lower() diff --git a/tests/unit/test_install_skills_command.py b/tests/unit/test_install_skills_command.py new file mode 100644 index 0000000..42463b5 --- /dev/null +++ b/tests/unit/test_install_skills_command.py @@ -0,0 +1,171 @@ +from __future__ import annotations + +from argparse import Namespace +from pathlib import Path + +import pytest + +from browser_cli.commands import install_skills as install_skills_module +from browser_cli.errors import InvalidInputError, OperationFailedError + + +def _write_skill(root: Path, name: str, *, body: str | None = None) -> Path: + skill_dir = root / name + skill_dir.mkdir(parents=True) + (skill_dir / "SKILL.md").write_text(body or f"# {name}\n", encoding="utf-8") + return skill_dir + + +def test_get_skills_target_path_defaults_to_agents_skills( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + monkeypatch.setattr(install_skills_module.Path, "home", lambda: tmp_path) + assert install_skills_module.get_skills_target_path(None) == tmp_path / ".agents" / "skills" + + +def test_get_skills_target_path_honors_explicit_target(tmp_path: Path) -> None: + target = tmp_path / "custom-skills" + assert install_skills_module.get_skills_target_path(str(target)) == target.resolve() + + +def test_discover_packaged_skills_returns_three_public_skills( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + source_root = tmp_path / "source" + for name in install_skills_module.PUBLIC_SKILL_NAMES: + _write_skill(source_root, name) + monkeypatch.setattr(install_skills_module, "_packaged_skills_root", lambda: source_root) + + discovered = install_skills_module.discover_packaged_skills() + + assert [item.name for item in discovered] == list(install_skills_module.PUBLIC_SKILL_NAMES) + + +def test_discover_packaged_skills_fails_when_a_required_skill_is_missing( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + source_root = tmp_path / "source" + _write_skill(source_root, "browser-cli-delivery") + _write_skill(source_root, "browser-cli-explore") + monkeypatch.setattr(install_skills_module, "_packaged_skills_root", lambda: source_root) + + with pytest.raises(InvalidInputError, match="browser-cli-converge"): + install_skills_module.discover_packaged_skills() + + +def test_discover_packaged_skills_fails_when_skill_doc_is_missing( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + source_root = tmp_path / "source" + incomplete_skill = source_root / "browser-cli-converge" + incomplete_skill.mkdir(parents=True) + _write_skill(source_root, "browser-cli-delivery") + _write_skill(source_root, "browser-cli-explore") + monkeypatch.setattr(install_skills_module, "_packaged_skills_root", lambda: source_root) + + with pytest.raises( + InvalidInputError, match=r"browser-cli-converge.*SKILL\.md|SKILL\.md.*browser-cli-converge" + ): + install_skills_module.discover_packaged_skills() + + +def test_install_skills_from_paths_reports_install_and_update(tmp_path: Path) -> None: + source_root = tmp_path / "source" + target_root = tmp_path / "target" + skills = [ + install_skills_module.PackagedSkill( + name="browser-cli-converge", + source=_write_skill(source_root, "browser-cli-converge"), + ), + install_skills_module.PackagedSkill( + name="browser-cli-delivery", + source=_write_skill(source_root, "browser-cli-delivery"), + ), + install_skills_module.PackagedSkill( + name="browser-cli-explore", + source=_write_skill(source_root, "browser-cli-explore"), + ), + ] + (target_root / "browser-cli-delivery").mkdir(parents=True) + + results = install_skills_module.install_skills_from_paths(skills, target_root, dry_run=True) + + assert results == [ + ("browser-cli-converge", "would install"), + ("browser-cli-delivery", "would update"), + ("browser-cli-explore", "would install"), + ] + + +def test_install_skills_from_paths_replaces_existing_skill_directory(tmp_path: Path) -> None: + source_root = tmp_path / "source" + target_root = tmp_path / "target" + source = _write_skill(source_root, "browser-cli-delivery", body="# new\n") + target = target_root / "browser-cli-delivery" + target.mkdir(parents=True) + (target / "SKILL.md").write_text("# old\n", encoding="utf-8") + (target / "stale.txt").write_text("stale\n", encoding="utf-8") + + results = install_skills_module.install_skills_from_paths( + [install_skills_module.PackagedSkill(name="browser-cli-delivery", source=source)], + target_root, + dry_run=False, + ) + + assert results == [("browser-cli-delivery", "updated")] + assert (target / "SKILL.md").read_text(encoding="utf-8") == "# new\n" + assert not (target / "stale.txt").exists() + + +def test_install_skills_from_paths_preserves_existing_skill_when_copy_fails( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + source_root = tmp_path / "source" + target_root = tmp_path / "target" + source = _write_skill(source_root, "browser-cli-delivery", body="# new\n") + target = target_root / "browser-cli-delivery" + target.mkdir(parents=True) + (target / "SKILL.md").write_text("# old\n", encoding="utf-8") + (target / "stale.txt").write_text("stale\n", encoding="utf-8") + + def broken_copy(_source: install_skills_module.Traversable | Path, destination: Path) -> None: + destination.mkdir(parents=True, exist_ok=False) + (destination / "SKILL.md").write_text("# partial\n", encoding="utf-8") + raise OSError("copy failed") + + monkeypatch.setattr(install_skills_module, "_copy_skill_tree", broken_copy) + + with pytest.raises(OperationFailedError, match="copy failed"): + install_skills_module.install_skills_from_paths( + [install_skills_module.PackagedSkill(name="browser-cli-delivery", source=source)], + target_root, + dry_run=False, + ) + + assert (target / "SKILL.md").read_text(encoding="utf-8") == "# old\n" + assert (target / "stale.txt").read_text(encoding="utf-8") == "stale\n" + assert not any( + path.name.startswith("browser-cli-delivery.tmp-") for path in target_root.iterdir() + ) + + +def test_run_install_skills_command_uses_explicit_target( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + source_root = tmp_path / "source" + packaged = [ + install_skills_module.PackagedSkill( + name=name, + source=_write_skill(source_root, name), + ) + for name in install_skills_module.PUBLIC_SKILL_NAMES + ] + monkeypatch.setattr(install_skills_module, "discover_packaged_skills", lambda: packaged) + + output = install_skills_module.run_install_skills_command( + Namespace(dry_run=True, target=str(tmp_path / "custom")) + ) + + assert "Installing skills to" in output + assert str((tmp_path / "custom").resolve()) in output + assert "Total: 3 skill(s)" in output diff --git a/tests/unit/test_release_artifacts.py b/tests/unit/test_release_artifacts.py new file mode 100644 index 0000000..46380cb --- /dev/null +++ b/tests/unit/test_release_artifacts.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +import zipfile +from pathlib import Path + +import pytest + + +def _repo_root() -> Path: + return Path(__file__).resolve().parents[2] + + +def test_built_wheel_contains_packaged_browser_cli_skills() -> None: + wheels = list((_repo_root() / "dist").glob("*.whl")) + if not wheels: + pytest.skip("Build a wheel before running this test: uv build --wheel") + wheel_path = max(wheels, key=lambda path: path.stat().st_mtime) + with zipfile.ZipFile(wheel_path) as archive: + names = set(archive.namelist()) + assert "browser_cli/packaged_skills/browser-cli-delivery/SKILL.md" in names + assert "browser_cli/packaged_skills/browser-cli-explore/SKILL.md" in names + assert "browser_cli/packaged_skills/browser-cli-converge/SKILL.md" in names diff --git a/tests/unit/test_repo_metadata.py b/tests/unit/test_repo_metadata.py index 11af88e..88d2396 100644 --- a/tests/unit/test_repo_metadata.py +++ b/tests/unit/test_repo_metadata.py @@ -35,3 +35,11 @@ def test_repo_pins_python_version_for_uv() -> None: def test_repo_tracks_uv_lockfile() -> None: assert (_repo_root() / "uv.lock").exists() + + +def test_repo_includes_packaged_browser_cli_skills_in_wheel_config() -> None: + data = _load_pyproject() + + package_data = data["tool"]["setuptools"].get("package-data", {}) + assert "browser_cli.packaged_skills" in package_data + assert package_data["browser_cli.packaged_skills"] == ["*/SKILL.md"] diff --git a/tests/unit/test_repo_skill_docs.py b/tests/unit/test_repo_skill_docs.py new file mode 100644 index 0000000..6cae4f3 --- /dev/null +++ b/tests/unit/test_repo_skill_docs.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +from pathlib import Path + +from scripts.generate_packaged_skill_docs import expected_packaged_skill_docs +from scripts.guards.architecture import ALLOWED_DEPENDENCIES + + +def _repo_root() -> Path: + return Path(__file__).resolve().parents[2] + + +def _read(path: str) -> str: + return (_repo_root() / path).read_text(encoding="utf-8") + + +def test_browser_cli_skill_topology_exists() -> None: + skills_dir = _repo_root() / "skills" + actual = { + path.name + for path in skills_dir.iterdir() + if path.is_dir() and path.name.startswith("browser-cli-") + } + + assert actual == { + "browser-cli-delivery", + "browser-cli-explore", + "browser-cli-converge", + } + + +def test_agents_points_to_browser_cli_delivery_skill() -> None: + agents_text = _read("AGENTS.md") + + assert "skills/browser-cli-delivery/SKILL.md" in agents_text + assert ALLOWED_DEPENDENCIES["packaged_skills"] == set() + + +def test_browser_cli_explore_skill_records_feedback_into_task_metadata() -> None: + skill_text = _read("skills/browser-cli-explore/SKILL.md") + + assert "task.meta.json" in skill_text + assert "browser-cli is the primary browser execution path" in skill_text + assert "environment" in skill_text + assert "success_path" in skill_text + assert "recovery_hints" in skill_text + assert "failures" in skill_text + assert "knowledge" in skill_text + assert "Do not record raw logs" in skill_text + + +def test_browser_cli_converge_skill_centers_task_py_and_flow_validation() -> None: + skill_text = _read("skills/browser-cli-converge/SKILL.md") + + assert "task.py is the single source of execution logic" in skill_text + assert "browser_cli.task_runtime.Flow" in skill_text + assert "browser-cli task validate" in skill_text + assert "browser-cli task run" in skill_text + assert "must stay aligned with task.meta.json" in skill_text + + +def test_browser_cli_delivery_skill_orchestrates_explore_converge_and_optional_automation() -> None: + skill_text = _read("skills/browser-cli-delivery/SKILL.md") + + assert "browser-cli-explore" in skill_text + assert "browser-cli-converge" in skill_text + assert "task.py + task.meta.json" in skill_text + assert "automation.toml" in skill_text + assert "publish" in skill_text + assert "If validation fails because evidence is missing, go back to explore" in skill_text + + +def test_sync_packaged_skill_docs() -> None: + root = _repo_root() + + for skill_name, expected in expected_packaged_skill_docs(root).items(): + packaged_path = root / "src" / "browser_cli" / "packaged_skills" / skill_name / "SKILL.md" + assert packaged_path.read_text(encoding="utf-8") == expected