diff --git a/AGENTS.md b/AGENTS.md index 7f68db3..ec0bb80 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -25,7 +25,7 @@ uv run assembly --help # run the CLI from the locked environment Dev tooling is a PEP 735 `[dependency-groups]` group with `default-groups = ["dev"]`, not a `[project]` extra — `uv sync --extra dev` errors. -`scripts/check.sh` is the authoritative gate; keep this list in sync with it. It runs, in order: `uv lock --check` → `ruff check` → `ruff format --check` → `mypy` → `pyright` (src strict) → `pyright` (tests) → `vulture` (dead code) → `deptry` (dependency hygiene) → `lint-imports` (import-linter architecture contracts) → max-file-length (500 lines) → `xenon` (cyclomatic complexity: function max B, module avg A, project avg A) → `swiftlint` + swift compile (macOS only, skipped elsewhere) → `markdownlint` → `codespell` (spell-check code/comments/docs via `uvx`; config in `[tool.codespell]`) → `prettier` (init template JS/CSS) → `shellcheck` → `actionlint` + `zizmor` (workflow lint/audit) → `gitleaks` (secret scan) → generated `--show-code` compile gate → init template contract gate → unused snapshot/fixture gate (`scripts/unused_fixtures_gate.py`: orphaned `.ambr`/API fixtures, since xdist disables syrupy's own unused detection) → docs consistency gate (`scripts/docs_consistency_gate.py`: REFERENCE.md/README.md env vars, exit codes, and `assembly …` command refs stay in sync with the code) → docstring coverage gate (`scripts/docstring_coverage_gate.py`: public-API docstring ratchet, an `interrogate` stand-in that handles PEP 695 generics) → `brew audit --strict` (the shipped `Formula/assembly.rb`; self-skips without Homebrew) → `pytest` (90% branch coverage) → `diff-cover` (100% patch coverage vs `origin/main`) → **mutation gate** (diff-scoped: mutates each changed line and reruns the tests that cover it — a surviving mutant fails the gate, so changed lines need assertions that would *fail* if the line broke, not just coverage; suppress a genuinely unassertable line with `# pragma: no mutate`) → a "no new escape hatches" gate (`# type: ignore` / `# noqa` / `pragma: no cover` / `Any` / `cast(` / test skip/xfail/sleep, all **count-gated against the merge-base** so moving an existing hatch in a refactor doesn't false-positive but a net-new one fails) → `uv build` + `twine check --strict`. The `vulture`/`deptry`/`lint-imports`/`xenon`, patch-coverage, and mutation stages catch the failures that `ruff`+`mypy` alone won't — don't claim the gate is green until the script prints `All checks passed.` **CodeQL is intentionally NOT in this gate** — it's the slowest check (~minutes) and is enforced separately by the `codeql.yml` workflow (which also covers CI; `check.sh` self-skipped it on the hosted runner anyway), so dropping it keeps the local gate fast with no loss of CI coverage. `scripts/codeql_gate.py` still exists to reproduce a code-scanning alert locally (`uv run python scripts/codeql_gate.py`). +`scripts/check.sh` is the authoritative gate; keep this list in sync with it. It runs, in order: `uv lock --check` → `ruff check` → `ruff format --check` → `mypy` → `pyright` (src strict) → `pyright` (tests) → `vulture` (dead code) → `deptry` (dependency hygiene) → `lint-imports` (import-linter architecture contracts) → max-file-length (500 lines) → `xenon` (cyclomatic complexity: function max B, module avg A, project avg A) → `swiftlint` + swift compile (macOS only, skipped elsewhere) → `markdownlint` → `codespell` (spell-check code/comments/docs via `uvx`; config in `[tool.codespell]`) → `prettier` (init template JS/CSS) → `shellcheck` → `actionlint` + `zizmor` (workflow lint/audit) → `gitleaks` (secret scan) → generated `--show-code` compile gate → init template contract gate → unused snapshot/fixture gate (`scripts/unused_fixtures_gate.py`: orphaned `.ambr`/API fixtures, since xdist disables syrupy's own unused detection) → docs consistency gate (`scripts/docs_consistency_gate.py`: REFERENCE.md/README.md env vars, exit codes, and `assembly …` command refs stay in sync with the code) → docstring coverage gate (`scripts/docstring_coverage_gate.py`: public-API docstring ratchet, an `interrogate` stand-in that handles PEP 695 generics) → `brew audit --strict` (the shipped `Formula/assembly.rb`; self-skips without Homebrew) → `pytest` (90% branch coverage) → Textual TUI coverage (≥90% on the `textual`-importing modules — a per-surface floor so a fragile TUI module can't rot under the project-wide average; the module set is derived from the `textual` import and reuses the pytest `.coverage`, no re-run) → `diff-cover` (100% patch coverage vs `origin/main`) → **mutation gate** (diff-scoped: mutates each changed line and reruns the tests that cover it — a surviving mutant fails the gate, so changed lines need assertions that would *fail* if the line broke, not just coverage; suppress a genuinely unassertable line with `# pragma: no mutate`) → a "no new escape hatches" gate (`# type: ignore` / `# noqa` / `pragma: no cover` / `Any` / `cast(` / test skip/xfail/sleep, all **count-gated against the merge-base** so moving an existing hatch in a refactor doesn't false-positive but a net-new one fails) → `uv build` + `twine check --strict`. The `vulture`/`deptry`/`lint-imports`/`xenon`, patch-coverage, and mutation stages catch the failures that `ruff`+`mypy` alone won't — don't claim the gate is green until the script prints `All checks passed.` **CodeQL is intentionally NOT in this gate** — it's the slowest check (~minutes) and is enforced separately by the `codeql.yml` workflow (which also covers CI; `check.sh` self-skipped it on the hosted runner anyway), so dropping it keeps the local gate fast with no loss of CI coverage. `scripts/codeql_gate.py` still exists to reproduce a code-scanning alert locally (`uv run python scripts/codeql_gate.py`). **Commits are gated.** On success `check.sh` records a working-tree signature (`scripts/gate_marker.py record` → `.git/aai-gate-pass`), and a PreToolUse hook (`.claude/hooks/require-gate-before-commit.sh`) blocks `git commit` unless that signature still matches — so run the full gate to completion *before* committing (a single-file `pytest` does not satisfy it), and re-run it after any further edit. Iterate with the fast targeted commands above, gate once at the end. For a deliberate work-in-progress commit, prefix `AAI_ALLOW_COMMIT=1 git commit …`. diff --git a/aai_cli/code_agent/modals.py b/aai_cli/code_agent/modals.py index 4e21a97..041cd1c 100644 --- a/aai_cli/code_agent/modals.py +++ b/aai_cli/code_agent/modals.py @@ -65,8 +65,10 @@ class ApprovalScreen(ModalScreen[str]): DEFAULT_CSS = """ ApprovalScreen { align: center bottom; background: transparent; } + /* width: 100% (not 1fr) so the box honors its 1-col side margins — a docked 1fr container + ignores horizontal margin and overflows the screen, clipping the right border off-edge. */ ApprovalScreen #approvalbox { - dock: bottom; width: 1fr; height: auto; + dock: bottom; width: 100%; height: auto; border: round #f59e0b; background: #000000; padding: 0 1; margin: 0 1 1 1; } ApprovalScreen #approvalbox Label { height: auto; } @@ -163,7 +165,7 @@ class AskScreen(ModalScreen[str]): DEFAULT_CSS = """ AskScreen { align: center bottom; background: transparent; } AskScreen #askbox { - dock: bottom; width: 1fr; height: auto; + dock: bottom; width: 100%; height: auto; border: round #3a3f55; background: #000000; padding: 0 1; margin: 0 1 1 1; } """ diff --git a/aai_cli/code_agent/tui.py b/aai_cli/code_agent/tui.py index 099c64c..2624f4f 100644 --- a/aai_cli/code_agent/tui.py +++ b/aai_cli/code_agent/tui.py @@ -75,7 +75,9 @@ class CodeAgentApp(_VoiceLegs): /* The transcript is a scroll container of mounted message widgets (not a RichLog), so the reply streams in place and tool output can expand/collapse. */ #log {{ height: 1fr; border: none; background: #000000; padding: 1 2; }} - #promptbar {{ dock: bottom; height: 3; background: #000000; border: round #3a3f55; margin: 1 1; }} + /* width: 100% (not the 1fr default) so the bordered box fits inside its 1-col side margins; + a docked 1fr container ignores horizontal margin and overflows, clipping the right border. */ + #promptbar {{ dock: bottom; height: 3; width: 100%; background: #000000; border: round #3a3f55; margin: 1 1; }} #promptmark {{ width: 3; color: {banner.BRAND_HEX}; content-align: center middle; }} #prompt {{ border: none; background: #000000; padding: 0; }} /* Shown in place of the prompt while voice capture is on (Ctrl-V brings the prompt back). */ diff --git a/pyproject.toml b/pyproject.toml index 6412592..dc36d77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,6 +117,12 @@ dev = [ # failure instead of a wedged session (not in addopts — opt-in per run). "pytest-timeout>=2.3.1", "time-machine>=3.1.0", + # Visual-regression snapshots for the Textual TUIs (`assembly code` / `live`): the + # `snap_compare` fixture renders an app to SVG and diffs it against a committed golden, + # catching CSS/layout/docking regressions the behavioral pilot tests can't see. Stores + # SVGs under tests/__snapshots__// (regenerate with --snapshot-update like the + # .ambr goldens). See tests/AGENTS.md "Textual visual snapshots". + "pytest-textual-snapshot>=1.0.0", "hypothesis>=6.155.1", "ruff>=0.15.15", "mypy>=2.1.0", diff --git a/scripts/check.sh b/scripts/check.sh index 2bb1b6e..6ebab81 100755 --- a/scripts/check.sh +++ b/scripts/check.sh @@ -238,6 +238,21 @@ echo "==> pytest (with branch-coverage gate)" # splitting it across workers is safe. uv run pytest -q --strict-config --strict-markers -n auto -m "not e2e and not install" --cov=aai_cli --cov-branch --cov-context=test --cov-report=term-missing --cov-report=xml --cov-fail-under=90 +echo "==> Textual TUI coverage (>=90% on the textual-importing modules)" +# The project-wide 90% gate above is an average, so a TUI module can rot while the rest +# of the suite carries it. The Textual TUIs (`assembly code` / `live`) are the most +# layout-fragile, regression-prone surface in the repo (see tests/AGENTS.md), so hold +# them to their own >=90% floor. The module set is *derived* — every aai_cli file that +# imports `textual` — so a new TUI module is picked up automatically with no list to +# hand-maintain. Reuses the .coverage data the pytest step just wrote (no re-run), and +# counts branches because that data was collected with --cov-branch. +tui_modules="$(git grep -lP '^\s*(from|import) textual' -- 'aai_cli/**/*.py' | paste -sd, -)" +if [[ -z "$tui_modules" ]]; then + echo " no textual-importing modules found (the derive pattern is stale?)" + exit 1 +fi +uv run coverage report --include="$tui_modules" --fail-under=90 + echo "==> diff-cover (patch coverage: every changed line must be tested)" # The 90% gate above is project-wide, so new code can ride on the existing suite and # stay untested. diff-cover requires 100% coverage of the lines changed versus the diff --git a/tests/AGENTS.md b/tests/AGENTS.md index 6595b9f..5c29fdd 100644 --- a/tests/AGENTS.md +++ b/tests/AGENTS.md @@ -20,6 +20,25 @@ CLI output is pinned by **syrupy snapshot tests** (`tests/__snapshots__/*.ambr`) The `--help` goldens are split per command group (`tests/test_snapshots_help_.py`) so concurrent branches touching different commands regenerate *different* `.ambr` files. The partition (`HELP_GROUPS` in `tests/_snapshot_surface.py`) is **derived from each command module's `SPEC.panel`** (see `aai_cli/command_registry.py`), so a new command lands in the right group automatically; `tests/test_snapshots_help_groups.py` guards that the derived partition matches the live Typer tree. The root `assembly --help` screen — which every new command changes — has its own golden (`tests/test_snapshots_help_root.py`), so that churn stays confined to one trivially-regenerable `.ambr` file. +## Textual visual snapshots (the `code` / `live` TUIs) + +The two Textual apps — `CodeAgentApp` (`assembly code`) and `LiveAgentApp` (`assembly live`) — are **the most layout-fragile surface in the repo**: a one-line CSS edit (a dock, a width, a margin, a transparent background) silently shifts the whole painted frame, and the pilot tests (`test_code_tui.py` / `test_live_tui.py`) only ever assert one widget, region, or flag at a time — they can't see "the modal's right border is now clipped off-screen". So they're backed by **visual-regression snapshots** (`tests/test_tui_snapshots.py`, on top of the `pytest-textual-snapshot` `snap_compare` fixture): each test renders an app (or a pushed modal) to an SVG and diffs it against a committed golden under `tests/__snapshots__/test_tui_snapshots/*.raw`. (This is how the `width: 1fr` → `width: 100%` overflow bug in `#promptbar`/`#approvalbox`/`#askbox` was found — a docked `1fr` container ignores horizontal margin and overflows, and the pilot region asserts never checked the right edge.) + +The two layers are complementary, so add to whichever fits: a **behavioral** assertion (a key press changes state, a modal returns a value, a region stays docked) goes in the pilot tests; a **visual** change (chrome, colors, spacing, a new transcript widget) earns a `snap_compare` golden. When a visual fix lands, pin the precise invariant in a pilot test too (e.g. `box.region.right <= 100`) so a mutant is killed deterministically, not only by the SVG diff. + +Regenerate after an intentional UI change with `uv run pytest tests/test_tui_snapshots.py --snapshot-update` and **eyeball every changed SVG before committing** — a blessed-but-wrong baseline is worse than no snapshot. (No SVG viewer in a headless session? Reconstruct the text by grouping each `` element's content by its `y` coordinate; that's enough to read the frame and spot a clipped border.) + +A Textual app renders non-deterministically unless four things are frozen — all handled by `tests/_tui_snapshot.py` (read its module docstring before adding a test): + +- **`banner.version()`** in the splash is the hatch-vcs git-tag string (`v0.1.devN+g`), different on every commit — `pin_banner_version` freezes it. +- **The voice bar's meter** advances on a 0.3s `set_interval`; the frame at screenshot time depends on wall-clock scheduling — `freeze_animation` pins it to one frame and stops the timer (and the spinner's). +- **`LiveAgentApp` starts the blocking cascade on a worker thread on mount**, which `exit()`s the app before the screenshot — `build_live_app` returns a subclass whose `_start` is a no-op, and the test drives the transcript methods directly. +- **The code status line** renders the cwd, git branch, and `~`-abbreviated home (all machine/platform-specific) — `stable_workdir` pins `Path.home` and builds a fixed `~/demo` cwd with a fake `.git/HEAD`. + +The `.raw` SVGs live in a `tests/__snapshots__/test_tui_snapshots/` **subdirectory**, so `scripts/unused_fixtures_gate.py` (which globs only top-level `*.ambr`) doesn't police them — delete a renamed test's stale `.raw` by hand. + +On top of the project-wide 90% gate, `check.sh` enforces a **per-surface ≥90% coverage floor on the Textual modules** (every `aai_cli` file that imports `textual` — derived, not hand-listed — reusing the pytest `.coverage`), so a fragile TUI module can't rot while the rest of the suite carries the average. Keep these modules well-covered by the pilot tests; a new TUI module is held to the floor automatically. + ## Hermeticity (enforced three ways) The suite is hermetic by construction (`tests/conftest.py` + `pyproject.toml` `[tool.pytest.ini_options]`): **pytest-randomly** shuffles order, an autouse `pin_timezone` fixture pins `TZ` to a fixed non-UTC zone (UTC-normalized rendering must be unaffected; use **time-machine** to freeze `now`), and **pytest-socket** (`--disable-socket`) blocks real network so an unmocked SDK/HTTP call fails loudly instead of hitting the API. A test that only binds a loopback server opts back in with the tight `@pytest.mark.allow_hosts(["127.0.0.1"])` (still blocks external hosts). The `e2e`/`install` marker suites legitimately reach the real network in-process (PyPI reachability probes, real-API runs), so a `pytest_collection_modifyitems` hook in `conftest.py` auto-grants them full sockets — adding a network marker is all that's needed, no per-test `enable_socket`. diff --git a/tests/__snapshots__/test_tui_snapshots/test_code_approval_modal.raw b/tests/__snapshots__/test_tui_snapshots/test_code_approval_modal.raw new file mode 100644 index 0000000..9355a4d --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_code_approval_modal.raw @@ -0,0 +1,182 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Code + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Thread: default + +Ready to code! What would you like to build? +Tip: approve tools as they run, or pass --auto to skip the prompts. + + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +⚠ This command deletes files recursively/forcibly. +Run tool execute?  rm -rf build/ +y approve   a auto-approve   n reject   e expand +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ + manual ~/demo↗ main + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_code_approval_modal_benign.raw b/tests/__snapshots__/test_tui_snapshots/test_code_approval_modal_benign.raw new file mode 100644 index 0000000..673aebf --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_code_approval_modal_benign.raw @@ -0,0 +1,182 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Code + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Thread: default + +Ready to code! What would you like to build? +Tip: approve tools as they run, or pass --auto to skip the prompts. + + + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +Run tool execute?  ls -la +y approve   a auto-approve   n reject   e expand +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ + manual ~/demo↗ main + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_code_approval_modal_expanded.raw b/tests/__snapshots__/test_tui_snapshots/test_code_approval_modal_expanded.raw new file mode 100644 index 0000000..f15782d --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_code_approval_modal_expanded.raw @@ -0,0 +1,182 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Code + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Thread: default + +Ready to code! What would you like to build? +Tip: approve tools as they run, or pass --auto to skip the prompts. + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +Run tool write_file?  file_path=app.py +content=PORT = 8080 +DEBUG = 1 +y approve   a auto-approve   n reject   e expand +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ + manual ~/demo↗ main + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_code_ask_modal.raw b/tests/__snapshots__/test_tui_snapshots/test_code_ask_modal.raw new file mode 100644 index 0000000..02bbd4f --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_code_ask_modal.raw @@ -0,0 +1,183 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Code + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Thread: default + +Ready to code! What would you like to build? +Tip: approve tools as they run, or pass --auto to skip the prompts. + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +The agent asks: Which port should the dev server use? +▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔ +Type your answer and press Enter… +▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ + manual ~/demo↗ main + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_code_error.raw b/tests/__snapshots__/test_tui_snapshots/test_code_error.raw new file mode 100644 index 0000000..bb805a7 --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_code_error.raw @@ -0,0 +1,183 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Code + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Thread: default + +Ready to code! What would you like to build? +Tip: approve tools as they run, or pass --auto to skip the prompts. + +» deploy to prod +✗ gateway unreachable: connection refused + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +>Ask the agent to build something… +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ + manual ~/demo↗ main + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_code_splash.raw b/tests/__snapshots__/test_tui_snapshots/test_code_splash.raw new file mode 100644 index 0000000..770b28c --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_code_splash.raw @@ -0,0 +1,181 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Code + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Thread: default + +Ready to code! What would you like to build? +Tip: approve tools as they run, or pass --auto to skip the prompts. + + + + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +>Ask the agent to build something… +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ + manual ~/demo↗ main + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_code_status_auto_approve.raw b/tests/__snapshots__/test_tui_snapshots/test_code_status_auto_approve.raw new file mode 100644 index 0000000..71d048f --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_code_status_auto_approve.raw @@ -0,0 +1,181 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Code + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Thread: default + +Ready to code! What would you like to build? +Tip: approve tools as they run, or pass --auto to skip the prompts. + + + + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +>Ask the agent to build something… +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ + auto ~/demo↗ main + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_code_streaming_reply.raw b/tests/__snapshots__/test_tui_snapshots/test_code_streaming_reply.raw new file mode 100644 index 0000000..95ca020 --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_code_streaming_reply.raw @@ -0,0 +1,182 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Code + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Thread: default + +Ready to code! What would you like to build? +Tip: approve tools as they run, or pass --auto to skip the prompts. + +» explain the plan +Here's the plan. First **scaffold** the project, then wire up the tests. + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +>Ask the agent to build something… +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ + manual ~/demo↗ main + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_code_tool_output_collapsed.raw b/tests/__snapshots__/test_tui_snapshots/test_code_tool_output_collapsed.raw new file mode 100644 index 0000000..705dbe0 --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_code_tool_output_collapsed.raw @@ -0,0 +1,184 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Code + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Thread: default + +Ready to code! What would you like to build? +Tip: approve tools as they run, or pass --auto to skip the prompts. + +» run the tests +→ execute(pytest -q) +  execute: tests/test_module_0.py .... [ 0%] +tests/test_module_1.py .... [ 10%] +tests/test_module_2.py .... [ 20%] +tests/test_module_3.py .... [ 30%] … (+4 more lines) (Ctrl+O to expand) + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +>Ask the agent to build something… +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ + manual ~/demo↗ main + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_code_tool_output_expanded.raw b/tests/__snapshots__/test_tui_snapshots/test_code_tool_output_expanded.raw new file mode 100644 index 0000000..8e9a713 --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_code_tool_output_expanded.raw @@ -0,0 +1,184 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Code + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Thread: default + +Ready to code! What would you like to build? +Tip: approve tools as they run, or pass --auto to skip the prompts. + +» run the tests +→ execute(pytest -q) +  execute: tests/test_module_0.py .... [ 0%] +tests/test_module_1.py .... [ 10%] +tests/test_module_2.py .... [ 20%] +tests/test_module_3.py .... [ 30%] +tests/test_module_4.py .... [ 40%] +tests/test_module_5.py .... [ 50%] +tests/test_module_6.py .... [ 60%] +tests/test_module_7.py .... [ 70%] (Ctrl+O to collapse) + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +>Ask the agent to build something… +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ + manual ~/demo↗ main + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_code_transcript.raw b/tests/__snapshots__/test_tui_snapshots/test_code_transcript.raw new file mode 100644 index 0000000..152213d --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_code_transcript.raw @@ -0,0 +1,185 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Code + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Thread: default + +Ready to code! What would you like to build? +Tip: approve tools as they run, or pass --auto to skip the prompts. + +» add a /health endpoint +Adding a health check:                                                                           + + 1 New route                                                                                     + 2 A test                                                                                        +→ write_file(app.py) +  write_file: wrote 8 lines to app.py + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +>Ask the agent to build something… +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ + manual ~/demo↗ main + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_code_voice_listening.raw b/tests/__snapshots__/test_tui_snapshots/test_code_voice_listening.raw new file mode 100644 index 0000000..310de20 --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_code_voice_listening.raw @@ -0,0 +1,179 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Code + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Thread: default + +Ready to code! What would you like to build? +Tip: approve tools as they run, or pass --auto to skip the prompts. + + + + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +▁▃▅ Listening — speak your request   (Ctrl-V to type) +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ + manual ~/demo↗ main● voice on + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_code_working_spinner.raw b/tests/__snapshots__/test_tui_snapshots/test_code_working_spinner.raw new file mode 100644 index 0000000..d48171b --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_code_working_spinner.raw @@ -0,0 +1,182 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Code + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Thread: default + +Ready to code! What would you like to build? +Tip: approve tools as they run, or pass --auto to skip the prompts. + +» build a web scraper + + + + + + + + + +✶ Working… (7s) + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +>Ask the agent to build something… +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ + manual ~/demo↗ main + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_live_conversation.raw b/tests/__snapshots__/test_tui_snapshots/test_live_conversation.raw new file mode 100644 index 0000000..ac1e4dd --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_live_conversation.raw @@ -0,0 +1,179 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Live + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Listening… start talking when you're ready. +Use headphones — the mic stays open while the agent speaks. + +» what's the weather like in Boston? + +It's sunny and about sixty degrees right now.  + + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +▅▇▆ Speaking… +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ +Esc/Ctrl-C to interrupt · Ctrl-Q to quit + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_live_error.raw b/tests/__snapshots__/test_tui_snapshots/test_live_error.raw new file mode 100644 index 0000000..5dca5df --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_live_error.raw @@ -0,0 +1,178 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Live + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Listening… start talking when you're ready. +Use headphones — the mic stays open while the agent speaks. +✗ Streaming STT connection lost + + + + + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +▁▃▅ Listening — speak your request +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ +Esc/Ctrl-C to interrupt · Ctrl-Q to quit + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_live_interrupted.raw b/tests/__snapshots__/test_tui_snapshots/test_live_interrupted.raw new file mode 100644 index 0000000..0506984 --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_live_interrupted.raw @@ -0,0 +1,179 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Live + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Listening… start talking when you're ready. +Use headphones — the mic stays open while the agent speaks. + +» tell me a long story + +Once upon a time, in a faraway land,                                                             +(interrupted) + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +▆▇▅ Listening — speak your request +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ +Esc/Ctrl-C to interrupt · Ctrl-Q to quit + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_live_splash_listening.raw b/tests/__snapshots__/test_tui_snapshots/test_live_splash_listening.raw new file mode 100644 index 0000000..b415f85 --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_live_splash_listening.raw @@ -0,0 +1,177 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Live + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Listening… start talking when you're ready. +Use headphones — the mic stays open while the agent speaks. + + + + + + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +▁▃▅ Listening — speak your request +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ +Esc/Ctrl-C to interrupt · Ctrl-Q to quit + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_live_thinking.raw b/tests/__snapshots__/test_tui_snapshots/test_live_thinking.raw new file mode 100644 index 0000000..422fa12 --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_live_thinking.raw @@ -0,0 +1,179 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Live + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Listening… start talking when you're ready. +Use headphones — the mic stays open while the agent speaks. + +» what's the weather like in Boston? + + + + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +▃▅▇ Thinking… +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ +Esc/Ctrl-C to interrupt · Ctrl-Q to quit + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_live_tool_call_note.raw b/tests/__snapshots__/test_tui_snapshots/test_live_tool_call_note.raw new file mode 100644 index 0000000..42b7690 --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_live_tool_call_note.raw @@ -0,0 +1,180 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Live + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Listening… start talking when you're ready. +Use headphones — the mic stays open while the agent speaks. + +» what's the weather like in Boston? +Searching the web… + + + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +▃▅▇ Thinking… +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ +Esc/Ctrl-C to interrupt · Ctrl-Q to quit + + + diff --git a/tests/__snapshots__/test_tui_snapshots/test_live_user_partial.raw b/tests/__snapshots__/test_tui_snapshots/test_live_user_partial.raw new file mode 100644 index 0000000..85682df --- /dev/null +++ b/tests/__snapshots__/test_tui_snapshots/test_live_user_partial.raw @@ -0,0 +1,178 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AssemblyAI Live + + + + + + + + + + + █████╗  ███████╗ ███████╗ ███████╗ ███╗   ███╗ ██████╗  ██╗      ██╗   ██╗ +██╔══██╗ ██╔════╝ ██╔════╝ ██╔════╝ ████╗ ████║ ██╔══██╗ ██║      ╚██╗ ██╔╝ +███████║ ███████╗ ███████╗ █████╗   ██╔████╔██║ ██████╔╝ ██║       ╚████╔╝  +██╔══██║ ╚════██║ ╚════██║ ██╔══╝   ██║╚██╔╝██║ ██╔══██╗ ██║        ╚██╔╝   +██║  ██║ ███████║ ███████║ ███████╗ ██║ ╚═╝ ██║ ██████╔╝ ███████╗    ██║    +╚═╝  ╚═╝ ╚══════╝ ╚══════╝ ╚══════╝ ╚═╝     ╚═╝ ╚═════╝  ╚══════╝    ╚═╝    +v9.9.9 + +Listening… start talking when you're ready. +Use headphones — the mic stays open while the agent speaks. + +» what's the weather like in + + + + + + + + + + + + + +╭────────────────────────────────────────────────────────────────────────────────────────────────╮ +▃▅▇ Listening — speak your request +╰────────────────────────────────────────────────────────────────────────────────────────────────╯ +Esc/Ctrl-C to interrupt · Ctrl-Q to quit + + + diff --git a/tests/_tui_snapshot.py b/tests/_tui_snapshot.py new file mode 100644 index 0000000..227c3dc --- /dev/null +++ b/tests/_tui_snapshot.py @@ -0,0 +1,155 @@ +"""Helpers for the Textual TUI visual-snapshot suite (``test_tui_snapshots.py``). + +``pytest-textual-snapshot``'s ``snap_compare`` fixture renders a Textual ``App`` to an +SVG and diffs it against a committed golden, catching the CSS / layout / docking +regressions the behavioral pilot tests (``test_code_tui.py`` / ``test_live_tui.py``) +can't see — those assert on one widget at a time, never the whole painted frame. + +Four things make our two apps (:class:`~aai_cli.code_agent.tui.CodeAgentApp` and +:class:`~aai_cli.agent_cascade.tui.LiveAgentApp`) non-deterministic under a raw render, +so the goldens would churn or flake without neutralising them here: + +* **The splash prints ``banner.version()``**, which hatch-vcs derives from the git tag + (``v0.1.devN+g``) — a different string on every commit. ``pin_banner_version`` + freezes it. +* **The voice bar animates its meter on a 0.3s ``set_interval``.** How many times it has + ticked by screenshot time depends on wall-clock scheduling, so the frame would differ + run-to-run. :func:`freeze_animation` pins the meter to one frame and stops the timer. +* **``LiveAgentApp`` kicks the blocking cascade on a worker thread on mount**; if that + worker returns it exits the app before the screenshot. :func:`build_live_app` returns a + subclass whose ``_start`` is a no-op, so a snapshot drives the transcript directly with + no thread. +* **The code TUI status line renders the cwd, its git branch, and a ``~``-abbreviated + home** — all environment- and platform-specific. :func:`stable_workdir` builds a fixed + cwd (with a fake ``.git/HEAD``) and pins ``Path.home`` so the line is identical on every + machine the suite runs on. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +from textual.app import App + +from aai_cli.agent_cascade.tui import LiveAgentApp +from aai_cli.code_agent.tui import CodeAgentApp + +if TYPE_CHECKING: + import pytest + +# A render width/height wide enough for the ASSEMBLY wordmark splash (~75 cells) plus a +# margin, shared by both apps so every golden is captured at the same terminal size. +TERMINAL_SIZE = (100, 30) +# A stable stand-in for banner.version() in the splash (the real string changes per commit). +_PINNED_VERSION = "v9.9.9" + + +class FakeAgent: + """A no-op agent satisfying the ``CompiledAgent`` shape; a snapshot never runs a turn. + + ``invoke`` returns an empty state — it exists only so the type checks and the app can be + constructed, and is covered by ``test_fake_agent_returns_empty_state`` rather than by any + render (which deliberately never sends a turn). + """ + + def invoke(self, *args: object, **kwargs: object) -> dict[str, object]: + return {} + + +class FakeVoice: + """A no-op ``_VoiceIO``; voice-mode snapshots never reach the capture/readback legs. + + The capture leg is stubbed in :class:`_SnapshotCodeApp`, so these are unreached by any + render and are covered by ``test_fake_voice_is_inert`` instead. + """ + + def listen(self) -> str | None: + return None + + def speak(self, text: str) -> None: + pass + + def cancel(self) -> None: + pass + + +class _SnapshotLiveApp(LiveAgentApp): + """``LiveAgentApp`` whose cascade worker never starts, so the app stays up for a render. + + The real ``_start`` runs the blocking conversation on a thread; in a snapshot we drive the + transcript methods directly (see :func:`tests.test_tui_snapshots`), so starting the worker + would only race the screenshot and exit the app the moment the no-op conversation returns. + """ + + def _start(self) -> None: + pass + + +class _SnapshotCodeApp(CodeAgentApp): + """``CodeAgentApp`` whose background voice-capture leg never starts. + + In voice mode ``on_mount`` spawns a daemon thread that blocks on ``voice.listen()`` and + marshals phase changes back onto the UI thread — which would race the screenshot and make + the bar frame non-deterministic. Stubbing ``_begin_listening`` keeps the app in the + synchronously-rendered listening state (voice bar shown, prompt hidden) with no thread. + """ + + def _begin_listening(self) -> None: + pass + + +def build_code_app(*, cwd: Path, auto_approve: bool = False) -> CodeAgentApp: + """A ``CodeAgentApp`` wired to a fake agent for a visual snapshot.""" + return CodeAgentApp(agent=FakeAgent(), cwd=cwd, auto_approve=auto_approve) + + +def build_code_voice_app(*, cwd: Path) -> _SnapshotCodeApp: + """A ``CodeAgentApp`` in voice mode (listening), with the mic-capture leg stubbed out.""" + return _SnapshotCodeApp(agent=FakeAgent(), cwd=cwd, voice=FakeVoice()) + + +def build_live_app() -> _SnapshotLiveApp: + """A ``LiveAgentApp`` whose cascade worker is stubbed out so a snapshot can drive it.""" + return _SnapshotLiveApp(run_conversation=lambda renderer: None, on_stop=lambda: None) + + +def freeze_animation(app: App[None]) -> None: + """Stop every TUI animation timer so the captured frame is byte-stable. + + The voice bar's meter advances on a 0.3s ``set_interval``; left running, the number of + ticks by screenshot time depends on wall-clock scheduling, so the frame would flake. Stop + that timer (and the code TUI's spinner timer) — ``run_before`` is the first thing the + screenshot harness runs, before any pause, so no tick fires before the stop, and the bar + then holds the frame from its last explicit render (a fixed count per test). Accepts the + broad ``App`` that ``Pilot.app`` exposes and narrows to our two apps. + """ + assert isinstance(app, (CodeAgentApp, LiveAgentApp)) + if app._voice_timer is not None: + app._voice_timer.stop() + if isinstance(app, CodeAgentApp) and app._spin_timer is not None: + app._spin_timer.stop() + + +def pin_banner_version(monkeypatch: pytest.MonkeyPatch) -> None: + """Freeze the splash version string (otherwise it changes on every commit).""" + monkeypatch.setattr("aai_cli.code_agent.banner.version", lambda: _PINNED_VERSION) + + +def stable_workdir( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, *, branch: str = "main" +) -> Path: + """A fixed cwd whose status line renders identically on every machine. + + Pins ``Path.home`` to ``tmp_path`` and returns a ``tmp_path/demo`` cwd, so + ``_abbrev_home`` collapses it to ``~/demo`` regardless of the real home directory, and + writes a fake ``.git/HEAD`` so ``_git_branch`` reports a deterministic ``branch`` rather + than whatever branch the suite happens to run on. + """ + monkeypatch.setattr(Path, "home", lambda: tmp_path) + demo = tmp_path / "demo" + demo.mkdir() + git_dir = demo / ".git" + git_dir.mkdir() + (git_dir / "HEAD").write_text(f"ref: refs/heads/{branch}\n", encoding="utf-8") + return demo diff --git a/tests/test_code_tui.py b/tests/test_code_tui.py index aa34f4f..9961714 100644 --- a/tests/test_code_tui.py +++ b/tests/test_code_tui.py @@ -13,7 +13,7 @@ import pytest from langchain_core.messages import AIMessage, HumanMessage -from textual.containers import VerticalScroll +from textual.containers import Horizontal, VerticalScroll from textual.widgets import Input, Label, Static from aai_cli.code_agent.events import AssistantText, ErrorText, ToolCall, ToolResult @@ -55,6 +55,10 @@ async def go() -> None: assert len(log.children) >= 1 # the splash is mounted into the transcript assert "Ready to code" in str(log.children[0].render()) # splash intro shown assert app.focused is app.query_one("#prompt", Input) + # The bordered prompt bar must fit inside the screen so its right border isn't + # clipped off-edge — `width: 100%` honors the side margins where the docked + # default (`1fr`) would overflow to x=1..101 on a 100-wide screen. + assert app.query_one("#promptbar", Horizontal).region.right <= 100 _run(go()) @@ -265,6 +269,10 @@ async def go() -> None: assert box.region.height <= 8 # a handful of rows, not the full 30 assert box.region.bottom <= 30 # anchored within the bottom of the screen assert box.region.y >= 15 # sits in the lower half, transcript visible above + # The box must fit inside the screen so its rounded border isn't clipped off the + # right edge: a docked `width: 1fr` container ignores horizontal margin and + # overflows to x=1..101 on a 100-wide screen (the bug `width: 100%` fixes). + assert box.region.right <= 100 _run(go()) diff --git a/tests/test_tui_snapshots.py b/tests/test_tui_snapshots.py new file mode 100644 index 0000000..c841e40 --- /dev/null +++ b/tests/test_tui_snapshots.py @@ -0,0 +1,357 @@ +"""Visual-regression snapshots for the `assembly code` and `assembly live` Textual TUIs. + +Each test renders an app (or a pushed modal) to an SVG via ``pytest-textual-snapshot``'s +``snap_compare`` fixture and diffs it against a committed golden under +``tests/__snapshots__/test_tui_snapshots/``. This pins the *painted frame* — the splash, the +prompt bar, the docked status line, the voice bar, the message widgets, and the compact +approval/ask modals — so a CSS, layout, or docking regression that the per-widget pilot tests +(``test_code_tui.py`` / ``test_live_tui.py``) can't see fails loudly here instead. + +Regenerate after an intentional UI change with ``uv run pytest tests/test_tui_snapshots.py +--snapshot-update`` and **eyeball every changed SVG** before committing — a snapshot only +guards against regressions if the baseline it captured was actually correct. The helpers in +``tests/_tui_snapshot.py`` freeze the four sources of non-determinism (version string, voice-bar +animation, the cascade worker, and the cwd/branch status line); see that module's docstring. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest +from textual.widgets import Static + +from aai_cli.agent_cascade.tui import LiveAgentApp +from aai_cli.code_agent.events import AssistantDelta, AssistantText, ErrorText, ToolCall, ToolResult +from aai_cli.code_agent.messages import UserMessage +from aai_cli.code_agent.modals import ApprovalScreen, AskScreen +from aai_cli.code_agent.tui import _SPIN_FRAMES, CodeAgentApp +from aai_cli.code_agent.tui_status import _spinner_text +from tests import _tui_snapshot as h + +if TYPE_CHECKING: + from textual.pilot import Pilot + + +# More than the 4-line preview budget, so summarize_result clips it and the ToolOutput +# row becomes expandable — the collapsed/expanded snapshots below pin both states. +_LONG_OUTPUT = "\n".join(f"tests/test_module_{i}.py .... [ {i * 10}%]" for i in range(8)) + + +@pytest.fixture(autouse=True) +def _pin_version(monkeypatch: pytest.MonkeyPatch) -> None: + h.pin_banner_version(monkeypatch) + + +def test_fake_agent_returns_empty_state() -> None: + """The snapshot double satisfies CompiledAgent.invoke with an inert empty turn.""" + assert h.FakeAgent().invoke("prompt") == {} + + +def test_fake_voice_is_inert() -> None: + """The voice double satisfies _VoiceIO without capturing or speaking anything.""" + voice = h.FakeVoice() + assert voice.listen() is None + voice.speak("hello") + voice.cancel() + + +# --- assembly code ----------------------------------------------------------- + + +def test_code_splash(snap_compare, tmp_path, monkeypatch) -> None: + """The idle startup frame: ASSEMBLY wordmark splash, prompt bar, and `manual` status line.""" + cwd = h.stable_workdir(tmp_path, monkeypatch) + + async def run_before(pilot: Pilot[None]) -> None: + h.freeze_animation(pilot.app) + + assert snap_compare( + h.build_code_app(cwd=cwd), terminal_size=h.TERMINAL_SIZE, run_before=run_before + ) + + +def test_code_status_auto_approve(snap_compare, tmp_path, monkeypatch) -> None: + """Auto-approve flips the bottom badge from `manual` to `auto` — a one-glyph status diff.""" + cwd = h.stable_workdir(tmp_path, monkeypatch) + + async def run_before(pilot: Pilot[None]) -> None: + h.freeze_animation(pilot.app) + + assert snap_compare( + h.build_code_app(cwd=cwd, auto_approve=True), + terminal_size=h.TERMINAL_SIZE, + run_before=run_before, + ) + + +def test_code_transcript(snap_compare, tmp_path, monkeypatch) -> None: + """A populated transcript: the user echo, a Markdown reply, a tool-call line, tool output.""" + cwd = h.stable_workdir(tmp_path, monkeypatch) + + async def run_before(pilot: Pilot[None]) -> None: + app = pilot.app + assert isinstance(app, CodeAgentApp) + h.freeze_animation(app) + app._mount(UserMessage("add a /health endpoint")) + app._write_event(AssistantText("Adding a **health check**:\n\n1. New route\n2. A test")) + app._write_event(ToolCall(name="write_file", args={"file_path": "app.py"})) + app._write_event(ToolResult(name="write_file", content="wrote 8 lines to app.py")) + + assert snap_compare( + h.build_code_app(cwd=cwd), terminal_size=h.TERMINAL_SIZE, run_before=run_before + ) + + +def test_code_approval_modal(snap_compare, tmp_path, monkeypatch) -> None: + """The compact, bottom-docked approval prompt for a risky command (warning + y/a/n hint).""" + cwd = h.stable_workdir(tmp_path, monkeypatch) + + async def run_before(pilot: Pilot[None]) -> None: + h.freeze_animation(pilot.app) + pilot.app.push_screen(ApprovalScreen("execute", {"command": "rm -rf build/"})) + + assert snap_compare( + h.build_code_app(cwd=cwd), terminal_size=h.TERMINAL_SIZE, run_before=run_before + ) + + +def test_code_ask_modal(snap_compare, tmp_path, monkeypatch) -> None: + """The bottom-docked ask prompt: the agent's question above a text input.""" + cwd = h.stable_workdir(tmp_path, monkeypatch) + + async def run_before(pilot: Pilot[None]) -> None: + h.freeze_animation(pilot.app) + pilot.app.push_screen(AskScreen("Which port should the dev server use?")) + + assert snap_compare( + h.build_code_app(cwd=cwd), terminal_size=h.TERMINAL_SIZE, run_before=run_before + ) + + +def test_code_approval_modal_expanded(snap_compare, tmp_path, monkeypatch) -> None: + """`e` expands the approval prompt from the identifying arg to the full args. + + Collapsed, a write_file call shows only the filename; expanded, it reveals the file + content that was elided — a taller box, pinned so the reveal can't regress. + """ + cwd = h.stable_workdir(tmp_path, monkeypatch) + + async def run_before(pilot: Pilot[None]) -> None: + h.freeze_animation(pilot.app) + pilot.app.push_screen( + ApprovalScreen( + "write_file", {"file_path": "app.py", "content": "PORT = 8080\nDEBUG = 1"} + ) + ) + + assert snap_compare( + h.build_code_app(cwd=cwd), press=["e"], terminal_size=h.TERMINAL_SIZE, run_before=run_before + ) + + +def test_code_tool_output_collapsed(snap_compare, tmp_path, monkeypatch) -> None: + """Long tool output clips to a preview with a `(Ctrl+O to expand)` hint.""" + cwd = h.stable_workdir(tmp_path, monkeypatch) + + async def run_before(pilot: Pilot[None]) -> None: + app = pilot.app + assert isinstance(app, CodeAgentApp) + h.freeze_animation(app) + app._mount(UserMessage("run the tests")) + app._write_event(ToolCall(name="execute", args={"command": "pytest -q"})) + app._write_event(ToolResult(name="execute", content=_LONG_OUTPUT)) + + assert snap_compare( + h.build_code_app(cwd=cwd), terminal_size=h.TERMINAL_SIZE, run_before=run_before + ) + + +def test_code_tool_output_expanded(snap_compare, tmp_path, monkeypatch) -> None: + """Ctrl+O expands the clipped tool output to the full content with a collapse hint.""" + cwd = h.stable_workdir(tmp_path, monkeypatch) + + async def run_before(pilot: Pilot[None]) -> None: + app = pilot.app + assert isinstance(app, CodeAgentApp) + h.freeze_animation(app) + app._mount(UserMessage("run the tests")) + app._write_event(ToolCall(name="execute", args={"command": "pytest -q"})) + app._write_event(ToolResult(name="execute", content=_LONG_OUTPUT)) + await pilot.pause() # let the ToolOutput mount before toggling it + app.action_toggle_output() # Ctrl+O + + assert snap_compare( + h.build_code_app(cwd=cwd), terminal_size=h.TERMINAL_SIZE, run_before=run_before + ) + + +def test_code_working_spinner(snap_compare, tmp_path, monkeypatch) -> None: + """The working indicator: a spinner glyph + elapsed seconds, docked just above the prompt.""" + cwd = h.stable_workdir(tmp_path, monkeypatch) + + async def run_before(pilot: Pilot[None]) -> None: + app = pilot.app + assert isinstance(app, CodeAgentApp) + h.freeze_animation(app) + app._mount(UserMessage("build a web scraper")) + spinner = app.query_one("#spinner", Static) + spinner.display = True + # Render a fixed elapsed/frame through the real formatter — driving the live _tick + # would tie the readout to wall-clock timing and flake. + spinner.update(_spinner_text(7, _SPIN_FRAMES[0])) + + assert snap_compare( + h.build_code_app(cwd=cwd), terminal_size=h.TERMINAL_SIZE, run_before=run_before + ) + + +def test_code_streaming_reply(snap_compare, tmp_path, monkeypatch) -> None: + """A reply mid-stream is plain text (literal markdown) before finalize swaps it to Markdown.""" + cwd = h.stable_workdir(tmp_path, monkeypatch) + + async def run_before(pilot: Pilot[None]) -> None: + app = pilot.app + assert isinstance(app, CodeAgentApp) + h.freeze_animation(app) + app._mount(UserMessage("explain the plan")) + app._write_event(AssistantDelta("Here's the plan. First **scaffold** the project, ")) + app._write_event(AssistantDelta("then wire up the tests.")) + + assert snap_compare( + h.build_code_app(cwd=cwd), terminal_size=h.TERMINAL_SIZE, run_before=run_before + ) + + +def test_code_approval_modal_benign(snap_compare, tmp_path, monkeypatch) -> None: + """A benign command mounts no warning label — the no-warning variant of the approval prompt.""" + cwd = h.stable_workdir(tmp_path, monkeypatch) + + async def run_before(pilot: Pilot[None]) -> None: + h.freeze_animation(pilot.app) + pilot.app.push_screen(ApprovalScreen("execute", {"command": "ls -la"})) + + assert snap_compare( + h.build_code_app(cwd=cwd), terminal_size=h.TERMINAL_SIZE, run_before=run_before + ) + + +def test_code_error(snap_compare, tmp_path, monkeypatch) -> None: + """A failed turn renders as a red ✗ error line instead of crashing the UI.""" + cwd = h.stable_workdir(tmp_path, monkeypatch) + + async def run_before(pilot: Pilot[None]) -> None: + app = pilot.app + assert isinstance(app, CodeAgentApp) + h.freeze_animation(app) + app._mount(UserMessage("deploy to prod")) + app._write_event(ErrorText("gateway unreachable: connection refused")) + + assert snap_compare( + h.build_code_app(cwd=cwd), terminal_size=h.TERMINAL_SIZE, run_before=run_before + ) + + +def test_code_voice_listening(snap_compare, tmp_path, monkeypatch) -> None: + """Voice mode swaps the prompt for the listening bar (with a Ctrl-V hint) and shows the + green `● voice on` status badge — the whole alternate-input chrome.""" + cwd = h.stable_workdir(tmp_path, monkeypatch) + + async def run_before(pilot: Pilot[None]) -> None: + h.freeze_animation(pilot.app) + + assert snap_compare( + h.build_code_voice_app(cwd=cwd), terminal_size=h.TERMINAL_SIZE, run_before=run_before + ) + + +# --- assembly live ----------------------------------------------------------- + + +def test_live_splash_listening(snap_compare) -> None: + """The hands-free startup frame: the wordmark splash above the blue `Listening…` voice bar.""" + + async def run_before(pilot: Pilot[None]) -> None: + h.freeze_animation(pilot.app) + + assert snap_compare(h.build_live_app(), terminal_size=h.TERMINAL_SIZE, run_before=run_before) + + +def test_live_conversation(snap_compare) -> None: + """A spoken turn mid-reply: the user transcript, the streamed reply, the green `Speaking…` bar.""" + + async def run_before(pilot: Pilot[None]) -> None: + app = pilot.app + assert isinstance(app, LiveAgentApp) + h.freeze_animation(app) + app.show_user_final("what's the weather like in Boston?") + app.begin_reply() + app.show_agent_sentence("It's sunny and about sixty degrees right now.") + h.freeze_animation(app) # begin_reply switched the phase, which repainted the bar + + assert snap_compare(h.build_live_app(), terminal_size=h.TERMINAL_SIZE, run_before=run_before) + + +def test_live_thinking(snap_compare) -> None: + """After a finalized turn, the bar shows the amber `Thinking…` phase before the reply.""" + + async def run_before(pilot: Pilot[None]) -> None: + app = pilot.app + assert isinstance(app, LiveAgentApp) + h.freeze_animation(app) + app.show_user_final("what's the weather like in Boston?") + h.freeze_animation(app) # show_user_final switched the phase to thinking + + assert snap_compare(h.build_live_app(), terminal_size=h.TERMINAL_SIZE, run_before=run_before) + + +def test_live_user_partial(snap_compare) -> None: + """An interim (still-being-spoken) user transcript grows in place while listening.""" + + async def run_before(pilot: Pilot[None]) -> None: + app = pilot.app + assert isinstance(app, LiveAgentApp) + h.freeze_animation(app) + app.show_user_partial("what's the weather like in") + + assert snap_compare(h.build_live_app(), terminal_size=h.TERMINAL_SIZE, run_before=run_before) + + +def test_live_tool_call_note(snap_compare) -> None: + """A tool the agent uses mid-turn drops a dim progress note so the wait doesn't read as a hang.""" + + async def run_before(pilot: Pilot[None]) -> None: + app = pilot.app + assert isinstance(app, LiveAgentApp) + h.freeze_animation(app) + app.show_user_final("what's the weather like in Boston?") + app.show_tool_call("Searching the web") + + assert snap_compare(h.build_live_app(), terminal_size=h.TERMINAL_SIZE, run_before=run_before) + + +def test_live_interrupted(snap_compare) -> None: + """An interrupted reply is finalized and tagged `(interrupted)`, then returns to listening.""" + + async def run_before(pilot: Pilot[None]) -> None: + app = pilot.app + assert isinstance(app, LiveAgentApp) + h.freeze_animation(app) + app.show_user_final("tell me a long story") + app.begin_reply() + app.show_agent_sentence("Once upon a time, in a faraway land,") + app.end_reply(interrupted=True) + + assert snap_compare(h.build_live_app(), terminal_size=h.TERMINAL_SIZE, run_before=run_before) + + +def test_live_error(snap_compare) -> None: + """A cascade failure surfaces as a red ✗ error line in the transcript.""" + + async def run_before(pilot: Pilot[None]) -> None: + app = pilot.app + assert isinstance(app, LiveAgentApp) + h.freeze_animation(app) + app._show_error("Streaming STT connection lost") + + assert snap_compare(h.build_live_app(), terminal_size=h.TERMINAL_SIZE, run_before=run_before) diff --git a/uv.lock b/uv.lock index 9d8a95b..ec69ea9 100644 --- a/uv.lock +++ b/uv.lock @@ -70,6 +70,7 @@ dev = [ { name = "pytest-mock" }, { name = "pytest-randomly" }, { name = "pytest-socket" }, + { name = "pytest-textual-snapshot" }, { name = "pytest-timeout" }, { name = "pytest-xdist" }, { name = "python-dotenv" }, @@ -135,6 +136,7 @@ dev = [ { name = "pytest-mock", specifier = ">=3.14.0" }, { name = "pytest-randomly", specifier = ">=3.16.0" }, { name = "pytest-socket", specifier = ">=0.7.0" }, + { name = "pytest-textual-snapshot", specifier = ">=1.0.0" }, { name = "pytest-timeout", specifier = ">=2.3.1" }, { name = "pytest-xdist", specifier = ">=3.6.0" }, { name = "python-dotenv", specifier = ">=1.0.0" }, @@ -2787,6 +2789,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/e8/4a8568580bae3dcd678599ed8e86a82d505a44df71c1ced4246c1aa14b4b/pytest_socket-0.8.0-py3-none-any.whl", hash = "sha256:81821ba59f07d7600fe2b551d8714f40b068bd46e8b6704c48664e9d60cdacb8", size = 8414, upload-time = "2026-05-21T16:50:21.022Z" }, ] +[[package]] +name = "pytest-textual-snapshot" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jinja2" }, + { name = "pytest" }, + { name = "rich" }, + { name = "syrupy" }, + { name = "textual" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8b/75/2ef17ae52fa5bc848ff2d1d7bc317a702cbd6d7ad733ca991b9f899dbbae/pytest_textual_snapshot-1.0.0.tar.gz", hash = "sha256:065217055ed833b8a16f2320a0613f39a0154e8d9fee63535f29f32c6414b9d7", size = 11071, upload-time = "2024-07-22T15:17:44.629Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/70/2e/4bf16ed78b382b3d7c1e545475ec8cf04346870be662815540faf8f16e8c/pytest_textual_snapshot-1.0.0-py3-none-any.whl", hash = "sha256:dd3a421491a6b1987ee7b4336d7f65299524924d2b0a297e69733b73b01570e1", size = 11171, upload-time = "2024-07-22T15:17:43.167Z" }, +] + [[package]] name = "pytest-timeout" version = "2.4.0"