From 29117a6ad4f3442c3b18a2ea8942d94ec8766e3e Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 05:22:05 +0000
Subject: [PATCH 1/7] test: kill surviving mutants in
 config/output/microphone/transcribe_render

Whole-package mutation sweep (scripts/mutation_gate.py's engine, run over every
line rather than just the diff) surfaced lines whose covering tests passed even
when the line was broken. Fortify them:

- config.py: assert the exit_code=2 (usage) on invalid profile / invalid TOML /
  invalid shape / empty --api-key errors, and add a _dump test whose config dir's
  parents don't exist yet (pins mkdir parents=True).
- output.py: assert is_agentic() returns True when stdout is not a TTY.
- microphone.py: pin the `rate > 0` boundary (keep a 1 Hz reading) and the
  blocksize max(1, ...) floor for a tiny sample rate.
- transcribe_render.py: mark the chapter start/end getattr fallbacks
  `# pragma: no mutate` -- equivalent mutants (_fmt_ms(0) == _fmt_ms(1)).

Tests only (plus one pragma); no behavior change.
---
 aai_cli/transcribe_render.py |  4 +++-
 tests/test_config.py         | 23 ++++++++++++++++++++---
 tests/test_microphone.py     | 28 ++++++++++++++++++++++++++++
 tests/test_output.py         | 10 ++++++++++
 4 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/aai_cli/transcribe_render.py b/aai_cli/transcribe_render.py
index fe13286d..e787b166 100644
--- a/aai_cli/transcribe_render.py
+++ b/aai_cli/transcribe_render.py
@@ -74,7 +74,9 @@ def _render_chapters(transcript: object, console: Console) -> None:
         return
     console.print("\n[bold]Chapters:[/bold]")
     for ch in chapters:
-        span = f"{_fmt_ms(jsonshape.as_int(getattr(ch, 'start', 0)))}-{_fmt_ms(jsonshape.as_int(getattr(ch, 'end', 0)))}"
+        # The `, 0` getattr fallbacks are equivalent mutants: they apply only to a
+        # chapter missing start/end, and _fmt_ms(0) == _fmt_ms(1) == "00:00" regardless.
+        span = f"{_fmt_ms(jsonshape.as_int(getattr(ch, 'start', 0)))}-{_fmt_ms(jsonshape.as_int(getattr(ch, 'end', 0)))}"  # pragma: no mutate
         console.print(f"  {span}  {getattr(ch, 'headline', '')}")
 
 
diff --git a/tests/test_config.py b/tests/test_config.py
index d57ee6cf..1bcea9c4 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -174,14 +174,18 @@ def test_empty_api_key_flag_rejected():
 
     from aai_cli.errors import CLIError
 
-    with pytest.raises(CLIError):
+    with pytest.raises(CLIError) as exc:
         config.resolve_api_key(api_key_flag="")
+    assert exc.value.error_type == "invalid_key"
+    assert exc.value.exit_code == 2  # usage error, not the generic 1
 
 
 def test_invalid_profile_name_has_suggestion():
     with pytest.raises(CLIError) as exc:
         config.set_api_key("bad name!", "sk_x")
     assert exc.value.message.startswith("Invalid profile name")
+    assert exc.value.error_type == "invalid_profile"
+    assert exc.value.exit_code == 2  # usage error, not the generic 1
     assert exc.value.suggestion == "Use only letters, digits, '-' or '_'."
 
 
@@ -189,8 +193,10 @@ def test_malformed_config_raises_clean_error(tmp_config):
     from aai_cli.errors import CLIError
 
     (tmp_config / "config.toml").write_text("this is not = = valid toml ===\n")
-    with pytest.raises(CLIError):
+    with pytest.raises(CLIError) as exc:
         config.get_active_profile()
+    assert exc.value.error_type == "invalid_config"
+    assert exc.value.exit_code == 2  # usage error, not the generic 1
 
 
 def test_unexpected_config_shape_raises_clean_error(tmp_config):
@@ -200,7 +206,7 @@ def test_unexpected_config_shape_raises_clean_error(tmp_config):
     with pytest.raises(CLIError) as exc:
         config.get_active_profile()
     assert exc.value.error_type == "invalid_config"
-    assert exc.value.exit_code == 2
+    assert exc.value.exit_code == 2  # usage error, not the generic 1
 
 
 def test_unexpected_config_shape_error_is_compact(tmp_config):
@@ -245,6 +251,17 @@ def test_validation_summary_labels_rootlevel_problems():
     assert config._validation_summary(exc.value).startswith("top level: ")
 
 
+def test_dump_creates_missing_parent_directories(monkeypatch, tmp_path):
+    # The config dir's parents may not exist yet (first run on a fresh machine);
+    # _dump must create the whole chain (mkdir parents=True), not just the leaf.
+    nested = tmp_path / "deeply" / "nested" / "config"
+    monkeypatch.setattr("aai_cli.config.config_dir", lambda: nested)
+    config.set_api_key("default", "sk_abc")
+    assert nested.is_dir()
+    assert (nested / "config.toml").exists()
+    assert config.get_api_key("default") == "sk_abc"
+
+
 def test_config_roundtrips_after_special_value(tmp_path, monkeypatch):
     # profile names are validated; this checks tomli_w writes valid TOML for normal data
     config.set_api_key("staging", "sk_x")
diff --git a/tests/test_microphone.py b/tests/test_microphone.py
index 7e2f141c..9109747c 100644
--- a/tests/test_microphone.py
+++ b/tests/test_microphone.py
@@ -220,6 +220,16 @@ def test_device_default_rate_falls_back_on_non_numeric_rate(monkeypatch) -> None
     assert _device_default_rate(None) == _FALLBACK_RATE
 
 
+def test_device_default_rate_keeps_smallest_positive_rate(monkeypatch) -> None:
+    # A reported rate of exactly 1 is positive and must be kept as-is; only a
+    # non-positive (<= 0) rate falls back. Pins the `rate > 0` boundary so it can't
+    # drift to `rate > 1` and silently discard a legitimate 1 Hz reading.
+    fake_sd: Any = types.ModuleType("sounddevice")
+    fake_sd.query_devices = lambda device, kind: {"default_samplerate": 1.0}
+    monkeypatch.setitem(sys.modules, "sounddevice", fake_sd)
+    assert _device_default_rate(None) == 1
+
+
 def test_sounddevice_mic_yields_bytes_then_stops_and_closes():
     stream = _FakeRawStream()
     mic = _SoundDeviceMic(stream, blocksize=1024)
@@ -251,6 +261,24 @@ def raw_input_stream(**kwargs):
     assert next(iter(stream)) == b"\x01\x02"
 
 
+def test_default_mic_stream_floors_blocksize_at_one(monkeypatch) -> None:
+    # A pathologically small sample rate makes `sample_rate // 10` round to 0; the
+    # max(1, ...) floor must still open with one frame per read, never 0 (which would
+    # make sounddevice read nothing). Pins that floor at 1.
+    created: dict[str, Any] = {}
+
+    def raw_input_stream(**kwargs):
+        created.update(kwargs)
+        return _FakeRawStream(**kwargs)
+
+    fake_sd: Any = types.ModuleType("sounddevice")
+    fake_sd.RawInputStream = raw_input_stream
+    monkeypatch.setitem(sys.modules, "sounddevice", fake_sd)
+
+    _default_mic_stream(sample_rate=5, device=None)  # 5 // 10 == 0
+    assert created["blocksize"] == 1
+
+
 def test_default_mic_stream_missing_sounddevice_raises_mic_missing(monkeypatch):
     monkeypatch.setitem(sys.modules, "sounddevice", None)  # import -> ImportError
     with pytest.raises(CLIError) as exc:
diff --git a/tests/test_output.py b/tests/test_output.py
index de70994d..3aa4da71 100644
--- a/tests/test_output.py
+++ b/tests/test_output.py
@@ -37,6 +37,16 @@ def test_is_agentic_false_for_plain_interactive_tty(monkeypatch):
     assert output.is_agentic() is False
 
 
+def test_is_agentic_true_when_stdout_not_a_tty(monkeypatch):
+    # Piped/redirected stdout means no interactive human, so the spinner is
+    # suppressed even with no agent env var set -- guards the `not a tty -> True`
+    # early return (without it, this path would fall through to the env-var check).
+    monkeypatch.setattr(output, "_stdout_is_tty", lambda: False)
+    for var in output._AGENT_ENV_VARS:
+        monkeypatch.delenv(var, raising=False)
+    assert output.is_agentic() is True
+
+
 def test_mask_secret_preserves_only_short_edges():
     assert output.mask_secret("sk_1234567890") == "sk_…7890"
     assert output.mask_secret("12345678") == "123…5678"

From 291879e412cd108a9c91a2dcdcdf0e3530248b5c Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 05:32:03 +0000
Subject: [PATCH 2/7] test: kill surviving mutants in
 init/llm/transcripts/setup commands

Continue the whole-package mutation sweep, fortifying the command layer:

- init.py: pin _pick_template's stdin/stdout `or` (either stream piped -> usage
  error) and that error's exit_code; the missing-questionary exit_code; the
  agents-host derivation replacing only the FIRST "streaming" (replace count=1);
  the [:300] install-error truncation; and the DIRECTORY/--here conflict
  exit_code. Mark the don't-launch `False` on the install-failure branch
  `# pragma: no mutate` -- run_init exits on the failed step before consulting it.
- llm.py: assert `-o json` forces JSON output without the global --json flag.
- transcripts.py: assert an errored transcript surfaces its own error message,
  not the generic fallback.
- setup.py: assert subprocess capture_output, the 300s/120s skill timeouts, the
  install-hint command slice, the remove failure-detail fallback, _copy_tree's
  mkdir parents/exist_ok, and best-effort rmtree(ignore_errors). FakeRun now
  records per-call kwargs so timeouts/flags are assertable.

Tests only (plus one pragma); no behavior change.
---
 aai_cli/commands/init.py    |  5 ++-
 tests/setup_helpers.py      |  2 ++
 tests/test_init_command.py  | 71 +++++++++++++++++++++++++++++++++++++
 tests/test_llm_command.py   | 12 +++++++
 tests/test_setup.py         | 59 ++++++++++++++++++++++++++++++
 tests/test_setup_install.py | 16 +++++++--
 tests/test_transcripts.py   |  3 ++
 7 files changed, 165 insertions(+), 3 deletions(-)

diff --git a/aai_cli/commands/init.py b/aai_cli/commands/init.py
index 1d58b285..48c4142a 100644
--- a/aai_cli/commands/init.py
+++ b/aai_cli/commands/init.py
@@ -105,7 +105,10 @@ def _install_step(
             "status": "failed",
             "detail": (setup.stderr or setup.stdout).strip()[:300],
         }
-        return [row], False
+        # The False (don't-launch) is an equivalent mutant: run_init raises Exit(1) on
+        # any failed step before it ever consults will_launch, so the value is unused
+        # on this branch.
+        return [row], False  # pragma: no mutate
     return [
         {
             "name": "install",
diff --git a/tests/setup_helpers.py b/tests/setup_helpers.py
index 0f282ac2..5c79441d 100644
--- a/tests/setup_helpers.py
+++ b/tests/setup_helpers.py
@@ -32,12 +32,14 @@ class FakeRun:
 
     def __init__(self, returncodes=None, *, creates_skill=True, removes_skill=True):
         self.calls = []
+        self.invocations = []  # (cmd, kwargs) per call, so tests can assert timeout etc.
         self.returncodes = returncodes or {}
         self.creates_skill = creates_skill
         self.removes_skill = removes_skill
 
     def __call__(self, cmd, *args, **kwargs):
         self.calls.append(cmd)
+        self.invocations.append((list(cmd), dict(kwargs)))
         rc = 0
         best = -1
         for prefix, code in self.returncodes.items():
diff --git a/tests/test_init_command.py b/tests/test_init_command.py
index bf2ac71c..277ecde0 100644
--- a/tests/test_init_command.py
+++ b/tests/test_init_command.py
@@ -28,6 +28,15 @@ def test_init_scaffold_only_creates_project(tmp_path, monkeypatch):
     assert (tmp_path / "myapp" / ".env").exists()
 
 
+def test_init_rejects_dir_and_here_together(tmp_path, monkeypatch):
+    # DIRECTORY and --here are mutually exclusive; passing both is a usage error
+    # exiting 1 (pins that exit_code on the conflict).
+    monkeypatch.chdir(tmp_path)
+    result = runner.invoke(app, ["init", TEMPLATE, "somedir", "--here", "--no-install"])
+    assert result.exit_code == 1
+    assert "not both" in result.output
+
+
 def test_init_writes_key_from_env(tmp_path, monkeypatch):
     monkeypatch.chdir(tmp_path)
     monkeypatch.setenv("ASSEMBLYAI_API_KEY", "sk-from-env")
@@ -264,6 +273,34 @@ def test_pick_template_missing_questionary_errors(monkeypatch):
     with pytest.raises(CLIError) as exc:
         init_cmd._pick_template()
     assert exc.value.error_type == "missing_dependency"
+    assert exc.value.exit_code == 1
+
+
+@pytest.mark.parametrize(("stdin_tty", "stdout_tty"), [(True, False), (False, True)])
+def test_pick_template_errors_when_either_stream_not_a_tty(monkeypatch, stdin_tty, stdout_tty):
+    # The picker needs BOTH stdin and stdout interactive; if either is piped it must
+    # bail with a usage error (pins the `or`, which an `and` would weaken to "both
+    # piped"). questionary is stubbed out so a mutated fall-through is observable as a
+    # *different* error rather than the usage error this asserts.
+    monkeypatch.setattr("sys.stdin", _Tty() if stdin_tty else io.StringIO())
+    monkeypatch.setattr("sys.stdout", _Tty() if stdout_tty else io.StringIO())
+    monkeypatch.setitem(sys.modules, "questionary", None)
+    with pytest.raises(CLIError) as exc:
+        init_cmd._pick_template()
+    assert exc.value.error_type == "usage_error"
+    assert exc.value.exit_code == 1
+
+
+def test_active_env_vars_agents_host_replaces_only_first_streaming(monkeypatch):
+    # The agents host is derived by swapping the FIRST "streaming" token for "agents"
+    # (replace count=1); a host containing it twice must keep the later occurrence.
+    fake_env = types.SimpleNamespace(
+        api_base="https://api.x",
+        llm_gateway_base="https://llm.x",
+        streaming_host="streaming.streaming.example.com",
+    )
+    monkeypatch.setattr(init_cmd.environments, "active", lambda: fake_env)
+    assert init_cmd._active_env_vars()["ASSEMBLYAI_AGENTS_HOST"] == "agents.streaming.example.com"
 
 
 def test_init_install_failure_reports_and_exits(tmp_path, monkeypatch):
@@ -284,6 +321,40 @@ def test_init_install_failure_reports_and_exits(tmp_path, monkeypatch):
     assert "pip exploded" in result.output
 
 
+def test_init_install_failure_does_not_launch_even_with_key(tmp_path, monkeypatch):
+    # A failed install must flip will_launch off so the server never starts -- even
+    # when a key is present (which would otherwise satisfy the launch guard). Pins the
+    # literal `False` returned on the failure branch.
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.setenv("ASSEMBLYAI_API_KEY", "sk-real-key")
+    monkeypatch.setattr(
+        "aai_cli.init.runner.run_setup",
+        lambda *a, **k: subprocess.CompletedProcess([], 1, "", "pip exploded"),
+    )
+    launched = {"v": False}
+    monkeypatch.setattr(
+        "aai_cli.init.runner.launch_and_open",
+        lambda *a, **k: launched.__setitem__("v", True) or 0,
+    )
+    result = runner.invoke(app, ["init", TEMPLATE, "app", "--json"])
+    assert result.exit_code == 1
+    assert launched["v"] is False
+
+
+def test_init_install_failure_detail_is_truncated(tmp_path, monkeypatch):
+    # A pathologically long install error is capped at 300 chars in the report detail
+    # so it can't flood the terminal; pins the [:300] slice.
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.setattr(
+        "aai_cli.init.runner.run_setup",
+        lambda *a, **k: subprocess.CompletedProcess([], 1, "", "x" * 500),
+    )
+    result = runner.invoke(app, ["init", TEMPLATE, "app", "--json"])
+    assert result.exit_code == 1
+    assert "x" * 300 in result.output
+    assert "x" * 301 not in result.output
+
+
 def test_init_launches_when_key_present(tmp_path, monkeypatch):
     # Key present + install succeeds -> the server is launched and the browser opens.
     monkeypatch.chdir(tmp_path)
diff --git a/tests/test_llm_command.py b/tests/test_llm_command.py
index 9d4b1567..6cde0a3b 100644
--- a/tests/test_llm_command.py
+++ b/tests/test_llm_command.py
@@ -250,6 +250,18 @@ def test_llm_json_flag_emits_json(monkeypatch):
     assert json.loads(result.output)["output"] == "hello"
 
 
+def test_llm_output_json_field_forces_json_without_flag(monkeypatch):
+    # `-o json` selects machine output even without the global --json flag, at an
+    # interactive terminal (where json_mode is otherwise off). Pins the
+    # `output_field == "json"` half of the json_mode disjunction.
+    _auth()
+    monkeypatch.setattr("aai_cli.output._stdout_is_tty", lambda: True)
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload("hi42"))
+    result = runner.invoke(app, ["llm", "hi", "-o", "json"])
+    assert result.exit_code == 0
+    assert json.loads(result.output)["output"] == "hi42"
+
+
 def test_llm_output_invalid_field_exits_2(monkeypatch):
     _auth()
     monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload())
diff --git a/tests/test_setup.py b/tests/test_setup.py
index 898e41af..bb275bcb 100644
--- a/tests/test_setup.py
+++ b/tests/test_setup.py
@@ -1,3 +1,4 @@
+import json
 import subprocess
 
 import pytest
@@ -53,6 +54,12 @@ def test_remove_skill_failure_reports_failed(monkeypatch):
     result = runner.invoke(app, ["setup", "remove"])
     assert result.exit_code == 1
     assert _statuses(result)["skill"] == "failed"
+    # The failure detail surfaces the subprocess's stderr ("boom"), preferring it over
+    # the generic "still present" fallback (pins `_proc_detail(proc) or ...`).
+    skill_detail = next(
+        s["detail"] for s in json.loads(result.output)["steps"] if s["name"] == "skill"
+    )
+    assert "boom" in skill_detail
 
 
 def test_remove_skill_skipped_when_npx_missing(monkeypatch):
@@ -94,6 +101,9 @@ def test_remove_unwinds_all(monkeypatch, tmp_path):
     assert ["npx", "-y", "skills", "remove", "assemblyai", "--global"] in fake.calls
     assert not _skill_path().exists()
     assert not _cli_skill_path().exists()
+    # The skill-remove subprocess uses the explicit 120s timeout backstop.
+    remove_calls = [kw for cmd, kw in fake.invocations if cmd[:1] == ["npx"] and "remove" in cmd]
+    assert remove_calls and remove_calls[0]["timeout"] == 120
 
 
 def test_remove_when_absent_is_not_an_error(monkeypatch):
@@ -177,6 +187,35 @@ def test_copy_tree_skips_pycache_and_pyc(tmp_path):
     assert not (dest / "__pycache__").exists()
 
 
+def test_copy_tree_creates_missing_parent_dirs(tmp_path):
+    # The destination's parents may not exist yet (~/.claude/skills on a fresh
+    # machine); _copy_tree must create the whole chain (mkdir parents=True).
+    from aai_cli.commands import setup
+
+    src = tmp_path / "src"
+    src.mkdir()
+    (src / "SKILL.md").write_text("# skill")
+
+    dest = tmp_path / "a" / "b" / "c" / "dest"  # none of a/b/c exist yet
+    setup._copy_tree(src, dest)
+    assert (dest / "SKILL.md").read_text() == "# skill"
+
+
+def test_copy_tree_into_existing_dir_is_tolerated(tmp_path):
+    # _copy_tree may run with the destination already present (a forced reinstall over
+    # an existing skill dir); the mkdir must tolerate it (exist_ok=True), not raise.
+    from aai_cli.commands import setup
+
+    src = tmp_path / "src"
+    src.mkdir()
+    (src / "SKILL.md").write_text("# skill")
+
+    dest = tmp_path / "dest"
+    dest.mkdir()  # already exists before the copy
+    setup._copy_tree(src, dest)
+    assert (dest / "SKILL.md").read_text() == "# skill"
+
+
 # --- help --------------------------------------------------------------------
 
 
@@ -241,3 +280,23 @@ def test_remove_cli_skill_fails_when_rmtree_noops(monkeypatch):
     step = setup._remove_cli_skill()
     assert step["status"] == "failed"
     assert "still present" in step["detail"]
+
+
+def test_remove_cli_skill_tolerates_rmtree_error(monkeypatch):
+    # Removal is best-effort (ignore_errors=True): a deletion failure must surface as a
+    # clean "failed" step (skill still present), never an uncaught OSError. Without
+    # ignore_errors, rmtree would raise instead of returning.
+    from aai_cli.commands import setup
+
+    dest = _cli_skill_path()
+    dest.mkdir(parents=True)
+    (dest / "SKILL.md").write_text("# x")
+
+    def rmtree(path, ignore_errors=False, **kwargs):
+        if not ignore_errors:
+            raise OSError("permission denied")  # what a non-ignoring rmtree would do
+
+    monkeypatch.setattr(setup.shutil, "rmtree", rmtree)
+    step = setup._remove_cli_skill()
+    assert step["status"] == "failed"
+    assert "still present" in step["detail"]
diff --git a/tests/test_setup_install.py b/tests/test_setup_install.py
index d43d47e1..92e2b791 100644
--- a/tests/test_setup_install.py
+++ b/tests/test_setup_install.py
@@ -1,3 +1,4 @@
+import json
 import subprocess
 
 import pytest
@@ -79,6 +80,12 @@ def test_install_skill_failed_when_npx_succeeds_but_nothing_installed(monkeypatc
     result = runner.invoke(app, ["setup", "install"])
     assert result.exit_code == 1  # skill step failed
     assert _statuses(result)["skill"] == "failed"
+    # The detail quotes the install command starting at `add` (_SKILL_ADD[3:]), so the
+    # user sees exactly what to retry -- pins that slice start.
+    skill_detail = next(
+        s["detail"] for s in json.loads(result.output)["steps"] if s["name"] == "skill"
+    )
+    assert "'add AssemblyAI/assemblyai-skill --global --yes'" in skill_detail
 
     # And status agrees: still not installed.
     status_result = runner.invoke(app, ["setup", "status"])
@@ -92,16 +99,21 @@ def test_install_detaches_stdin_and_sets_timeout(monkeypatch):
     seen = []
 
     def record(cmd, *args, **kwargs):
-        seen.append(kwargs)
+        seen.append((list(cmd), kwargs))
         return subprocess.CompletedProcess(args=cmd, returncode=1, stdout="", stderr="")
 
     monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", record)
     result = runner.invoke(app, ["setup", "install"])
     assert result.exit_code in (0, 1)
     assert seen, "expected subprocess.run to be called"
-    for kwargs in seen:
+    for _cmd, kwargs in seen:
         assert kwargs.get("stdin") is subprocess.DEVNULL
         assert kwargs.get("timeout")
+        assert kwargs.get("capture_output") is True  # stdout/stderr must be captured
+
+    # The skill download gets the longer 300s timeout (vs the 120s default elsewhere).
+    add_calls = [kw for cmd, kw in seen if cmd[:1] == ["npx"] and "add" in cmd]
+    assert add_calls and add_calls[0]["timeout"] == 300
 
 
 def test_install_scope_passthrough(monkeypatch):
diff --git a/tests/test_transcripts.py b/tests/test_transcripts.py
index 802aa5ad..13922916 100644
--- a/tests/test_transcripts.py
+++ b/tests/test_transcripts.py
@@ -139,6 +139,9 @@ def test_get_errored_transcript_exits_nonzero(mocker):
     )
     result = runner.invoke(app, ["transcripts", "get", "t_err"])
     assert result.exit_code == 1
+    # The transcript's own error message is surfaced, not the generic fallback
+    # (pins `getattr(transcript, "error", None) or "Transcript failed."`).
+    assert "decode failed" in result.output
 
 
 def test_list_table_colors_status(monkeypatch, mocker):

From 1b5d0644b728755d405cd4d58343525285a05456 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 05:37:26 +0000
Subject: [PATCH 3/7] test: kill surviving mutants in init runner/scaffold

- runner.py: pin find_free_port's bind-to-port-0 (OS-assigned ephemeral port),
  the port-range error message bounds and its exit_code, the 0.2s poll interval
  in wait_for_port, run_setup's success sentinel returncode, and the
  capture_output/check/text kwargs passed to subprocess.run.
- scaffold.py: assert the unknown-template / template-missing exit_codes, that a
  nested target's parent dirs are created (target.mkdir parents=True), and that a
  re-scaffold over an existing tree is tolerated (exist_ok). The two _copy_tree
  mkdir parents=True flags are marked `# pragma: no mutate` -- equivalent mutants,
  since the copy walk always creates a node's parent before the node.

Tests only (plus two pragmas); no behavior change.
---
 aai_cli/init/scaffold.py    |  7 +++++--
 tests/test_init_runner.py   | 28 +++++++++++++++++++++++++---
 tests/test_init_scaffold.py | 23 ++++++++++++++++++++++-
 3 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/aai_cli/init/scaffold.py b/aai_cli/init/scaffold.py
index 1b8c0637..b39d1a80 100644
--- a/aai_cli/init/scaffold.py
+++ b/aai_cli/init/scaffold.py
@@ -65,10 +65,13 @@ def _copy_tree(node: Traversable, dest: Path) -> None:
         name = _DOTFILE_RENAMES.get(child.name, child.name)
         out = dest / name
         if child.is_dir():
-            out.mkdir(parents=True, exist_ok=True)
+            # parents=True is an equivalent mutant here: the walk always creates a
+            # node's parent before descending, so `dest` (and `out.parent`) already
+            # exists. exist_ok is exercised by the idempotent re-scaffold test.
+            out.mkdir(parents=True, exist_ok=True)  # pragma: no mutate
             _copy_tree(child, out)
         else:
-            out.parent.mkdir(parents=True, exist_ok=True)
+            out.parent.mkdir(parents=True, exist_ok=True)  # pragma: no mutate
             out.write_bytes(child.read_bytes())
 
 
diff --git a/tests/test_init_runner.py b/tests/test_init_runner.py
index d9d5e06c..4823504f 100644
--- a/tests/test_init_runner.py
+++ b/tests/test_init_runner.py
@@ -67,7 +67,9 @@ def test_serve_command_uv_and_venv():
 @pytest.mark.allow_hosts(["127.0.0.1"])
 def test_find_free_port_returns_preferred_when_open():
     port = runner.find_free_port(0)  # 0 -> OS assigns a free port
-    assert isinstance(port, int) and port > 0
+    # A real OS-assigned ephemeral port, not a low/privileged one: pins the bind to
+    # port 0 (binding to 1 would yield 1, or fail outright as non-root).
+    assert isinstance(port, int) and port > 1024
 
 
 @pytest.mark.allow_hosts(["127.0.0.1"])
@@ -88,20 +90,24 @@ def test_find_free_port_raises_when_all_taken(monkeypatch):
     with pytest.raises(CLIError) as exc:
         runner.find_free_port(5000, tries=3)
     assert exc.value.error_type == "port_unavailable"
-    assert "5000" in str(exc.value)
+    assert exc.value.exit_code == 1
+    # The message names the exact inclusive range probed: preferred .. preferred+tries-1.
+    assert "5000-5002" in str(exc.value)
 
 
 def test_wait_for_port_returns_true_when_port_opens(monkeypatch):
     calls = {"n": 0}
+    slept = []
 
     def fake_open(port):
         calls["n"] += 1
         return calls["n"] >= 2  # closed on first poll, open on the second
 
     monkeypatch.setattr(runner, "_port_open", fake_open)
-    monkeypatch.setattr(runner.time, "sleep", lambda _s: None)
+    monkeypatch.setattr(runner.time, "sleep", slept.append)
     assert runner.wait_for_port(3000, timeout=5.0) is True
     assert calls["n"] >= 2
+    assert slept == [0.2]  # polls once at the 0.2s interval before the port opens
 
 
 def test_wait_for_port_returns_false_on_timeout(monkeypatch):
@@ -115,15 +121,31 @@ def test_wait_for_port_returns_false_on_timeout(monkeypatch):
 
 def test_run_setup_returns_last_success(monkeypatch):
     ran = []
+    seen = {}
 
     def fake_run(cmd, cwd, capture_output, check, text):
         ran.append(cmd)
+        seen.update(capture_output=capture_output, check=check, text=text)
         return subprocess.CompletedProcess(args=cmd, returncode=0, stdout="ok", stderr="")
 
     monkeypatch.setattr(runner.subprocess, "run", fake_run)
     result = runner.run_setup(Path("/proj"), use_uv=True)
     assert result.returncode == 0
     assert len(ran) == 2  # both env-setup commands ran
+    # Output is captured as text, and a failing command is returned (not raised):
+    # check must stay False so run_setup can report the failure itself.
+    assert seen == {"capture_output": True, "check": False, "text": True}
+
+
+def test_run_setup_with_no_commands_returns_success_sentinel(monkeypatch):
+    # With no env-setup commands the seeded CompletedProcess is returned unchanged, so
+    # an empty plan reads as success (returncode 0), never a spurious failure.
+    monkeypatch.setattr(runner, "env_setup_commands", lambda *a, **k: [])
+    monkeypatch.setattr(
+        runner.subprocess, "run", lambda *a, **k: pytest.fail("no command should run")
+    )
+    result = runner.run_setup(Path("/proj"), use_uv=True)
+    assert result.returncode == 0
 
 
 def test_run_setup_stops_at_first_failure(monkeypatch):
diff --git a/tests/test_init_scaffold.py b/tests/test_init_scaffold.py
index de30657d..2c6f25a7 100644
--- a/tests/test_init_scaffold.py
+++ b/tests/test_init_scaffold.py
@@ -96,8 +96,10 @@ def test_scaffold_writes_placeholder_when_no_key(tmp_path):
 
 
 def test_scaffold_unknown_template_raises(tmp_path):
-    with pytest.raises(CLIError):
+    with pytest.raises(CLIError) as exc:
         scaffold.scaffold("nope", tmp_path / "app", api_key=None)
+    assert exc.value.error_type == "unknown_template"
+    assert exc.value.exit_code == 1
 
 
 def test_scaffold_registered_but_missing_files_raises(tmp_path, monkeypatch):
@@ -106,6 +108,25 @@ def test_scaffold_registered_but_missing_files_raises(tmp_path, monkeypatch):
     with pytest.raises(CLIError) as exc:
         scaffold.scaffold("ghost-template", tmp_path / "app", api_key=None)
     assert exc.value.error_type == "template_missing"
+    assert exc.value.exit_code == 1
+
+
+def test_scaffold_creates_nested_target_parents(tmp_path):
+    # `aai init <tmpl> a/b/app` targets a path whose parents don't exist yet; scaffold
+    # must create the whole chain (target.mkdir parents=True).
+    target = tmp_path / "a" / "b" / "app"  # a/ and b/ do not exist
+    scaffold.scaffold("audio-transcription", target, api_key="k")
+    assert (target / "api" / "index.py").exists()
+
+
+def test_scaffold_is_idempotent_over_existing_tree(tmp_path):
+    # Re-scaffolding (e.g. `--force`) runs over an already-populated tree, so every
+    # mkdir along the copy walk must tolerate existing dirs (exist_ok=True).
+    target = tmp_path / "app"
+    scaffold.scaffold("audio-transcription", target, api_key="k")
+    scaffold.scaffold("audio-transcription", target, api_key="k2")  # dirs already exist
+    assert (target / "api" / "index.py").exists()
+    assert "ASSEMBLYAI_API_KEY=k2" in (target / ".env").read_text()
 
 
 def test_target_conflict_detects_nonempty_dir(tmp_path):

From a799a3fc94876f4dfdaaba080af3f3df402e5ae0 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 12:52:08 +0000
Subject: [PATCH 4/7] test: kill surviving mutants in auth loopback + agent
 audio/session

- auth/loopback.py: assert the callback server thread is a daemon and that the
  cleanup join uses the bounded 5s timeout (spying threading.Thread while the
  server really serves, so shutdown() doesn't block).
- agent/audio.py: pin the DuplexAudio blocksize max(1, rate//10) floor for a tiny
  device rate.
- agent/session.py: pin the bounded 10s wait on ready_event in the send loop, and
  that the capture thread is a daemon.

Tests only; no behavior change.
---
 tests/test_agent_audio.py       | 14 ++++++++++
 tests/test_agent_session.py     | 16 +++++++++++
 tests/test_agent_session_run.py | 47 +++++++++++++++++++++++++++++++++
 tests/test_auth_loopback.py     | 24 +++++++++++++++++
 4 files changed, 101 insertions(+)

diff --git a/tests/test_agent_audio.py b/tests/test_agent_audio.py
index 8f70adcf..bf56f4ed 100644
--- a/tests/test_agent_audio.py
+++ b/tests/test_agent_audio.py
@@ -45,6 +45,20 @@ def factory(*, rate, blocksize, callback, device):
     assert fake.stopped and fake.closed
 
 
+def test_duplex_floors_blocksize_at_one():
+    # A pathologically small device rate (//10 == 0) must still open with at least one
+    # frame per block; the max(1, ...) floor prevents a 0-frame block.
+    seen = {}
+
+    def factory(*, rate, blocksize, callback, device):
+        seen["blocksize"] = blocksize
+        return FakeStream()
+
+    d = DuplexAudio(device_rate=5, stream_factory=factory)  # 5 // 10 == 0
+    d.player.start()
+    assert seen["blocksize"] == 1
+
+
 def test_duplex_restart_after_close_reopens_stream():
     calls = {"n": 0}
 
diff --git a/tests/test_agent_session.py b/tests/test_agent_session.py
index bcb78ebe..e91dc617 100644
--- a/tests/test_agent_session.py
+++ b/tests/test_agent_session.py
@@ -266,3 +266,19 @@ def test_send_audio_loop_waits_for_ready_event_before_streaming():
     ws = _RecordingWS()
     _send_audio_loop(ws, s, [b"\x01\x02"])
     assert len(ws.sent) == 1  # frame forwarded once the gate is open
+
+
+def test_send_audio_loop_waits_on_ready_event_with_bounded_timeout():
+    # The wait on ready_event is bounded so a server that never sends `ready` can't
+    # wedge the send loop forever; pins the 10s timeout.
+    seen = {}
+
+    class _RecordingEvent:
+        def wait(self, timeout=None):
+            seen["timeout"] = timeout
+            return True
+
+    s = _session(exit_after_reply=True, ready_event=_RecordingEvent())
+    s.ready = True
+    _send_audio_loop(_RecordingWS(), s, [b"\x01\x02"])
+    assert seen["timeout"] == 10
diff --git a/tests/test_agent_session_run.py b/tests/test_agent_session_run.py
index 2889064a..7254223f 100644
--- a/tests/test_agent_session_run.py
+++ b/tests/test_agent_session_run.py
@@ -149,6 +149,53 @@ def close(self):
     assert exc.value.exit_code == 1  # the real mic failure reaches the user, not a hang
 
 
+def test_run_session_capture_thread_is_daemon(monkeypatch):
+    # The capture thread is a daemon so a stuck mic read can't keep the process alive
+    # after the session ends.
+    import threading as _threading
+
+    from aai_cli.agent import session as session_mod
+
+    daemons = []
+    real_cls = session_mod.threading.Thread
+
+    class SpyThread(real_cls):
+        def __init__(self, *a, **k):
+            daemons.append(k.get("daemon"))
+            super().__init__(*a, **k)
+
+    monkeypatch.setattr(session_mod.threading, "Thread", SpyThread)
+
+    class _BoomMic:
+        def __iter__(self):
+            raise CLIError("no microphone", error_type="mic_error", exit_code=1)
+
+    class _BlockingWS:
+        def __init__(self):
+            self._closed = _threading.Event()
+
+        def send(self, _msg):
+            pass
+
+        def __iter__(self):
+            self._closed.wait(timeout=2)
+            return iter(())
+
+        def close(self):
+            self._closed.set()
+
+    with pytest.raises(CLIError):
+        run_session(
+            "sk_live",
+            renderer=FakeRenderer(),
+            player=FakePlayer(),
+            mic=_BoomMic(),
+            config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
+            connect=lambda url, **kwargs: _BlockingWS(),
+        )
+    assert daemons == [True]  # the one capture thread, created as a daemon
+
+
 def test_run_session_does_not_close_player_that_failed_to_open():
     # If opening the speaker stream raises, the cleanup must NOT call close() on a
     # player that never started (pins the player_started=False initializer).
diff --git a/tests/test_auth_loopback.py b/tests/test_auth_loopback.py
index 6e85c347..3530265d 100644
--- a/tests/test_auth_loopback.py
+++ b/tests/test_auth_loopback.py
@@ -144,6 +144,30 @@ def test_capture_times_out_without_callback():
     assert result.token is None
 
 
+def test_capture_server_thread_is_daemon_and_joined_with_timeout(monkeypatch):
+    # The serve_forever thread must be a daemon (so it can't block process exit) and the
+    # cleanup join must be bounded (5s) so a wedged server can't hang shutdown. The
+    # server really serves (no callback arrives, so capture just times out fast); we
+    # only spy on the thread's daemon flag and join timeout.
+    created = {}
+    real_cls = loopback.threading.Thread
+
+    class SpyThread(real_cls):
+        def __init__(self, *a, **k):
+            created["daemon"] = k.get("daemon")
+            super().__init__(*a, **k)
+
+        def join(self, timeout=None):
+            created["join_timeout"] = timeout
+            return super().join(timeout)
+
+    monkeypatch.setattr(loopback.threading, "Thread", SpyThread)
+    result = loopback.capture_callback(timeout=0.1)  # no callback -> times out
+    assert result.error == "timeout"
+    assert created["daemon"] is True
+    assert created["join_timeout"] == 5
+
+
 def test_capture_raises_clean_error_when_port_unavailable(monkeypatch):
     # Occupy a port, then point the callback server at it: binding must fail with a
     # clean APIError, not a raw OSError traceback escaping run_login_flow.

From 81a1bc7e5a75f61be7c9fa36007576c2c5bfc163 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 13:08:15 +0000
Subject: [PATCH 5/7] test: kill surviving mutants in streaming
 macos/render/session

- macos.py: assert the missing-swiftc / compile-failure exit_codes, the swiftc
  subprocess capture_output/text/check kwargs, the cache-dir mkdir parents (nested
  path) and exist_ok (pre-existing dirs), the _cleanup_process terminate guard +
  2s wait backstops + stderr-pipe close, the `returncode >= 0` boundary, and the
  chunk-frames = sample_rate//10 helper arg. The module-cache mkdir parents=True
  is `# pragma: no mutate` (equivalent: cache_dir is created the line before).
- render.py: assert a turn event missing end_of_turn reads as a partial (False).
- session.py: assert the parallel source workers are daemons; the 0.1s join poll
  interval is `# pragma: no mutate` (a responsiveness/CPU tradeoff, not behavior).

Tests only (plus two pragmas); no behavior change.
---
 aai_cli/streaming/macos.py       |  4 ++-
 aai_cli/streaming/session.py     |  4 ++-
 tests/test_macos_audio_source.py | 59 ++++++++++++++++++++++++++++++++
 tests/test_stream_session.py     |  5 +++
 tests/test_streaming_render.py   | 10 ++++++
 5 files changed, 80 insertions(+), 2 deletions(-)

diff --git a/aai_cli/streaming/macos.py b/aai_cli/streaming/macos.py
index 99e8bf7d..8ad183f5 100644
--- a/aai_cli/streaming/macos.py
+++ b/aai_cli/streaming/macos.py
@@ -84,7 +84,9 @@ def build_helper() -> Path:
 
     cache_dir.mkdir(parents=True, exist_ok=True)
     module_cache = cache_dir / "swift-module-cache"
-    module_cache.mkdir(parents=True, exist_ok=True)
+    # parents=True is an equivalent mutant here: cache_dir was just created above, so
+    # module_cache's parent always exists. exist_ok is covered by the rebuild test.
+    module_cache.mkdir(parents=True, exist_ok=True)  # pragma: no mutate
     source_path = cache_dir / f"{_HELPER_PREFIX}-{digest}.swift"
     source_path.write_bytes(source)
     tmp_helper = helper.with_suffix(".tmp")
diff --git a/aai_cli/streaming/session.py b/aai_cli/streaming/session.py
index 1ef8aa04..ff01d7ac 100644
--- a/aai_cli/streaming/session.py
+++ b/aai_cli/streaming/session.py
@@ -296,7 +296,9 @@ def worker(source_label: str, audio: Iterable[bytes], rate: int) -> None:
             thread.start()
         while any(thread.is_alive() for thread in threads):
             for thread in threads:
-                thread.join(timeout=0.1)
+                # Poll interval: a responsiveness/CPU tradeoff, not behavior -- the loop
+                # surfaces a worker error within ~0.1s. Exact value isn't assertable.
+                thread.join(timeout=0.1)  # pragma: no mutate
             if not errors.empty():
                 raise errors.get()
         if not errors.empty():
diff --git a/tests/test_macos_audio_source.py b/tests/test_macos_audio_source.py
index 1afadfef..5266d1bd 100644
--- a/tests/test_macos_audio_source.py
+++ b/tests/test_macos_audio_source.py
@@ -54,6 +54,7 @@ def test_build_helper_requires_swiftc(monkeypatch):
     with pytest.raises(CLIError) as exc:
         macos.build_helper()
     assert "xcode-select" in (exc.value.suggestion or "")
+    assert exc.value.exit_code == 2
 
 
 def test_build_helper_compiles_to_cache(monkeypatch, tmp_path):
@@ -65,6 +66,7 @@ def test_build_helper_compiles_to_cache(monkeypatch, tmp_path):
 
     def fake_run(cmd, **kwargs):
         seen["cmd"] = cmd
+        seen["kwargs"] = kwargs
         Path(cmd[-1]).write_bytes(b"binary")
         return types.SimpleNamespace(returncode=0, stderr="", stdout="")
 
@@ -73,6 +75,51 @@ def fake_run(cmd, **kwargs):
     assert helper.read_bytes() == b"binary"
     assert "-parse-as-library" in seen["cmd"]
     assert "ScreenCaptureKit" in seen["cmd"]
+    # stderr/stdout are captured as text, and a non-zero compile is inspected (not
+    # raised): check must stay False so build_helper surfaces its own error.
+    assert seen["kwargs"]["capture_output"] is True
+    assert seen["kwargs"]["text"] is True
+    assert seen["kwargs"]["check"] is False
+
+
+def test_build_helper_creates_missing_cache_parents(monkeypatch, tmp_path):
+    # The cache dir's parents may not exist yet; build_helper must create the whole
+    # chain (cache_dir.mkdir parents=True), not just the leaf.
+    nested = tmp_path / "missing1" / "missing2"  # parents do not exist
+    monkeypatch.setattr(macos.sys, "platform", "darwin")
+    monkeypatch.setattr(macos.shutil, "which", lambda _tool: "/usr/bin/swiftc")
+    monkeypatch.setattr(macos, "_resource_bytes", lambda: b"swift source")
+    monkeypatch.setattr(macos, "user_cache_path", lambda _app: nested)
+    monkeypatch.setattr(
+        macos.subprocess,
+        "run",
+        lambda cmd, **k: (
+            Path(cmd[-1]).write_bytes(b"bin"),
+            types.SimpleNamespace(returncode=0, stderr="", stdout=""),
+        )[1],
+    )
+    helper = macos.build_helper()
+    assert helper.read_bytes() == b"bin"
+
+
+def test_build_helper_tolerates_existing_cache_dirs(monkeypatch, tmp_path):
+    # A rebuild (new source digest) runs with the cache dir and module cache already
+    # present, so their mkdirs must tolerate existing dirs (exist_ok=True).
+    monkeypatch.setattr(macos.sys, "platform", "darwin")
+    monkeypatch.setattr(macos.shutil, "which", lambda _tool: "/usr/bin/swiftc")
+    monkeypatch.setattr(macos, "_resource_bytes", lambda: b"swift source")
+    monkeypatch.setattr(macos, "user_cache_path", lambda _app: tmp_path)
+    (tmp_path / "macos-system-audio" / "swift-module-cache").mkdir(parents=True)  # pre-exist
+    monkeypatch.setattr(
+        macos.subprocess,
+        "run",
+        lambda cmd, **k: (
+            Path(cmd[-1]).write_bytes(b"bin"),
+            types.SimpleNamespace(returncode=0, stderr="", stdout=""),
+        )[1],
+    )
+    helper = macos.build_helper()  # must not raise FileExistsError on the mkdirs
+    assert helper.read_bytes() == b"bin"
 
 
 def test_build_helper_reuses_cached_binary(monkeypatch, tmp_path):
@@ -106,6 +153,7 @@ def test_build_helper_compile_failure_surfaces_stderr(monkeypatch, tmp_path):
     with pytest.raises(CLIError) as exc:
         macos.build_helper()
     assert exc.value.error_type == "mac_system_audio_unavailable"
+    assert exc.value.exit_code == 2
     assert exc.value.suggestion == "compile broke"
 
 
@@ -140,9 +188,11 @@ class TimeoutProc(_FakeProc):
         def __init__(self):
             super().__init__(stdout=b"")
             self.waits = 0
+            self.wait_timeouts = []
 
         def wait(self, timeout=None):
             self.waits += 1
+            self.wait_timeouts.append(timeout)
             if self.waits == 1:
                 raise macos.subprocess.TimeoutExpired("helper", timeout or 0.0)
             return self.returncode
@@ -152,6 +202,8 @@ def wait(self, timeout=None):
     macos._cleanup_process(proc, proc.stdout, completed=True)
     assert proc.killed is True
     assert proc.waits == 2
+    assert proc.terminated is False  # completed=True -> the `and` guard skips terminate()
+    assert proc.wait_timeouts == [2.0, 2.0]  # both waits use the 2s backstop
 
 
 def test_raise_helper_exit_handles_clean_eof():
@@ -166,6 +218,7 @@ def test_returncode_detail_names_signals():
     assert macos._returncode_detail(-5) == "SIGTRAP (-5)"
     assert macos._returncode_detail(-99999) == "signal 99999 (-99999)"
     assert macos._returncode_detail(2) == "exit 2"
+    assert macos._returncode_detail(0) == "exit 0"  # 0 is a clean exit (pins `>= 0`)
     assert macos._returncode_detail(None) == "unknown exit"
 
 
@@ -201,6 +254,12 @@ def fake_popen(cmd):
     assert events == ["open"]
     assert "--system-only" in commands[0]
     assert procs[0].terminated is True
+    # On a non-completed teardown the helper's stderr pipe is closed too (pins the
+    # `proc.stderr is not None` guard against an `is None` flip that would leak it).
+    assert procs[0].stderr is not None and procs[0].stderr.closed is True
+    # chunk-frames is ~100 ms of frames at the target rate (sample_rate // 10).
+    cmd = commands[0]
+    assert cmd[cmd.index("--chunk-frames") + 1] == str(src.sample_rate // 10)
 
 
 def test_source_start_failure_is_cli_error(tmp_path):
diff --git a/tests/test_stream_session.py b/tests/test_stream_session.py
index 36743628..740e98f1 100644
--- a/tests/test_stream_session.py
+++ b/tests/test_stream_session.py
@@ -291,10 +291,13 @@ def __init__(self, *, target_rate=None, device=None, capture_rate=None, on_open=
         def __iter__(self):
             return iter([b"mic"])
 
+    daemons = []
+
     class ImmediateThread:
         def __init__(self, *, target, args, daemon):
             self._target = target
             self._args = args
+            daemons.append(daemon)
 
         def start(self):
             self._target(*self._args)
@@ -315,6 +318,8 @@ def fake_stream_audio(api_key, source, *, params, **_kwargs):
     result = runner.invoke(app, ["stream", "--system-audio", "--json"])
     assert result.exit_code == 1
     assert "failed" in result.output
+    # Both source workers run as daemons so a wedged stream can't block process exit.
+    assert daemons and all(d is True for d in daemons)
 
 
 def test_stream_system_audio_parallel_keyboard_interrupt_exits_cleanly(monkeypatch):
diff --git a/tests/test_streaming_render.py b/tests/test_streaming_render.py
index e4050f13..73a7b587 100644
--- a/tests/test_streaming_render.py
+++ b/tests/test_streaming_render.py
@@ -160,6 +160,16 @@ def test_json_mode_emits_ndjson_events():
     assert lines[1] == {"type": "turn", "transcript": "hi", "end_of_turn": True}
 
 
+def test_turn_defaults_end_of_turn_to_false_when_absent():
+    # An event missing end_of_turn must read as a partial (False), never a finalized
+    # turn; pins the getattr default against a flip to True.
+    out = io.StringIO()
+    r = StreamRenderer(json_mode=True, out=out)
+    r.turn(types.SimpleNamespace(transcript="hi"))  # no end_of_turn attribute
+    event = json.loads(out.getvalue().splitlines()[0])
+    assert event["end_of_turn"] is False
+
+
 def test_json_mode_emits_source_when_labeled():
     out = io.StringIO()
     r = StreamRenderer(json_mode=True, out=out)

From 39acac8f2d7440c89a8ccc6845848bcb7878608a Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 13:25:26 +0000
Subject: [PATCH 6/7] test: assert subprocess check/text in setup; pragma
 config validation-summary

Two follow-ups found by a full re-sweep after the rebase:

- setup.py _run: also assert the subprocess.run text=True and check=False kwargs
  (capture_output was already pinned) so all three are mutation-covered.
- config.py _validation_summary: mark exc.errors(include_url/include_input=False)
  `# pragma: no mutate` -- equivalent mutants, since the summary reads only loc+msg
  and never the url/input fields those flags toggle.

Tests only (plus one pragma); no behavior change.
---
 aai_cli/config.py           | 5 ++++-
 tests/test_setup_install.py | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/aai_cli/config.py b/aai_cli/config.py
index 6d9d010f..8adfff8b 100644
--- a/aai_cli/config.py
+++ b/aai_cli/config.py
@@ -81,7 +81,10 @@ def _validation_summary(exc: ValidationError) -> str:
     in a one-line CLI error.
     """
     problems: list[str] = []
-    for err in exc.errors(include_url=False, include_input=False):
+    # include_url/include_input=False keep pydantic's url/input fields out of each
+    # error dict, but this summary only reads loc + msg, so flipping them is an
+    # equivalent mutant (the rendered string is identical either way).
+    for err in exc.errors(include_url=False, include_input=False):  # pragma: no mutate
         loc = ".".join(str(part) for part in err["loc"]) or "top level"
         problems.append(f"{loc}: {err['msg']}")
     return "; ".join(problems)
diff --git a/tests/test_setup_install.py b/tests/test_setup_install.py
index 92e2b791..6a1f0c21 100644
--- a/tests/test_setup_install.py
+++ b/tests/test_setup_install.py
@@ -110,6 +110,8 @@ def record(cmd, *args, **kwargs):
         assert kwargs.get("stdin") is subprocess.DEVNULL
         assert kwargs.get("timeout")
         assert kwargs.get("capture_output") is True  # stdout/stderr must be captured
+        assert kwargs.get("text") is True  # decoded to str, not bytes
+        assert kwargs.get("check") is False  # we inspect returncode, never raise
 
     # The skill download gets the longer 300s timeout (vs the 120s default elsewhere).
     add_calls = [kw for cmd, kw in seen if cmd[:1] == ["npx"] and "add" in cmd]

From b8cbdf108870f01f3297287ed9f42d8a2d603fe8 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 13:29:53 +0000
Subject: [PATCH 7/7] tooling: add whole-package mutation sweep + document it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Developer-experience follow-up. The mutation gate is diff-scoped, so auditing
existing code against its bar meant rebuilding a throwaway sweep script each time.
Promote it to scripts/mutation_sweep.py: it reuses the gate's own
collect/cover/survive engine over every line of the named files (or the whole
package), reports surviving mutants (exit 1) and an UNCOVERED bucket separately,
and is robust to the line-number shifts that make per-line checks brittle.

Document the workflow in AGENTS.md next to the diff-scoped gate, including the
reminder to pass `--timeout` to the coverage-refresh pytest run — the default
suite leaves per-test timeouts opt-in, so a deadlocked test otherwise wedges the
whole run instead of failing fast.
---
 AGENTS.md                 | 12 +++++
 scripts/mutation_sweep.py | 99 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 111 insertions(+)
 create mode 100644 scripts/mutation_sweep.py

diff --git a/AGENTS.md b/AGENTS.md
index 4bc00dc5..54add9fb 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -40,6 +40,18 @@ uv run diff-cover coverage.xml --compare-branch=origin/main --fail-under=100
 uv run python scripts/mutation_gate.py origin/main                                       # mutation gate
 ```
 
+The gate is diff-scoped, so code predating it is never mutation-tested. To audit
+existing code (or a whole module) against the same bar, `scripts/mutation_sweep.py`
+reuses the gate's engine over *every* line of the files you name (or the whole
+package). Refresh coverage first, and pass `--timeout` to that pytest step — the
+default suite has no per-test timeout (it's opt-in; see `pyproject.toml`), so a
+deadlocked test would wedge the run instead of failing fast:
+
+```sh
+uv run pytest -q -n auto --timeout=60 --cov=aai_cli --cov-branch --cov-context=test --cov-report=
+uv run python scripts/mutation_sweep.py aai_cli/config.py   # or omit paths for the whole package
+```
+
 ### Test markers
 
 The default suite **excludes** three slow/credentialed marker sets — `pyproject.toml`'s `addopts` carries `-m "not e2e and not install and not install_script"`, so a bare `pytest` matches what `check.sh` gates. An explicit command-line `-m` overrides it for the opt-in runs:
diff --git a/scripts/mutation_sweep.py b/scripts/mutation_sweep.py
new file mode 100644
index 00000000..837a92ab
--- /dev/null
+++ b/scripts/mutation_sweep.py
@@ -0,0 +1,99 @@
+"""Whole-file mutation sweep — the diff-scoped gate's repo-wide companion.
+
+``scripts/mutation_gate.py`` only mutates lines changed versus a branch, so code
+that predates the gate is never held to its bar. This sweeps EVERY eligible line
+of the given files (or the whole package) and reports the mutants that survive —
+i.e. the suite still passes with the line deliberately broken — so you can add an
+assertion that kills each one (or mark a genuinely-equivalent line
+``# pragma: no mutate``). It reuses the gate's own mutation/kill engine, so a
+survivor here is a survivor there.
+
+Usage::
+
+    # 1. Refresh per-test coverage contexts the sweep reads from .coverage:
+    uv run pytest -q -n auto --timeout=60 --cov=aai_cli --cov-branch \
+        --cov-context=test --cov-report=
+    # 2. Sweep specific files (or omit paths to sweep the whole package):
+    uv run python scripts/mutation_sweep.py aai_cli/config.py
+    uv run python scripts/mutation_sweep.py
+
+Pass ``--timeout`` to the pytest step above: the default suite has no per-test
+timeout (it is opt-in; see pyproject), and a deadlocked test would otherwise wedge
+the whole run instead of failing fast.
+
+Exit status is 1 if any real survivor is found, else 0. Lines whose mutants have
+no covering test are reported separately as UNCOVERED (not failed): coverage
+attributes import-time evaluated defaults to no test, so that bucket needs a
+manual look rather than blind action.
+"""
+
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+from types import ModuleType
+
+import coverage
+
+_HERE = Path(__file__).resolve().parent
+_PKG = _HERE.parent / "aai_cli"
+_TEMPLATES = _PKG / "init" / "templates"
+
+
+def _load_gate() -> ModuleType:
+    # ModuleType attribute access is dynamic, so reusing the gate's private helpers
+    # (_collect/_covering_tests/_survives) below needs no type-checker escape hatch.
+    spec = importlib.util.spec_from_file_location("mutation_gate", _HERE / "mutation_gate.py")
+    if spec is None or spec.loader is None:
+        raise RuntimeError("could not load scripts/mutation_gate.py")
+    module = importlib.util.module_from_spec(spec)
+    sys.modules["mutation_gate"] = module
+    spec.loader.exec_module(module)
+    return module
+
+
+def _package_files() -> list[Path]:
+    return sorted(p for p in _PKG.rglob("*.py") if _TEMPLATES not in p.parents)
+
+
+def _sweep_file(
+    mg: ModuleType, path: Path, data: coverage.CoverageData
+) -> tuple[int, list[str], list[str]]:
+    line_count = len(path.read_text(encoding="utf-8").splitlines())
+    tree, src, mutants = mg._collect(path, set(range(1, line_count + 1)))
+    survivors: list[str] = []
+    uncovered: list[str] = []
+    for mutant in mutants:
+        if not mg._covering_tests(data, path, mutant.linenos):
+            uncovered.append(mutant.label)
+        elif mg._survives(path, tree, src, mutant, data):
+            survivors.append(mutant.label)
+    return len(mutants), survivors, uncovered
+
+
+def main() -> int:
+    mg = _load_gate()
+    args = [Path(a) for a in sys.argv[1:]] or _package_files()
+    data = coverage.CoverageData()
+    data.read()
+    total = 0
+    all_survivors: list[str] = []
+    for path in args:
+        tested, survivors, uncovered = _sweep_file(mg, path, data)
+        total += tested
+        all_survivors += survivors
+        sys.stdout.write(f"\n=== {path} : {tested} mutants ===\n")
+        for label in survivors:
+            sys.stdout.write(f"  SURVIVES  {label}\n")
+        for label in uncovered:
+            sys.stdout.write(f"  uncovered {label}\n")
+        if not survivors and not uncovered:
+            sys.stdout.write("  clean\n")
+        sys.stdout.flush()
+    sys.stdout.write(f"\nTOTAL {total} mutant(s); {len(all_survivors)} surviving\n")
+    return 1 if all_survivors else 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())