From 29117a6ad4f3442c3b18a2ea8942d94ec8766e3e Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 10 Jun 2026 05:22:05 +0000 Subject: [PATCH 1/7] test: kill surviving mutants in config/output/microphone/transcribe_render Whole-package mutation sweep (scripts/mutation_gate.py's engine, run over every line rather than just the diff) surfaced lines whose covering tests passed even when the line was broken. Fortify them: - config.py: assert the exit_code=2 (usage) on invalid profile / invalid TOML / invalid shape / empty --api-key errors, and add a _dump test whose config dir's parents don't exist yet (pins mkdir parents=True). - output.py: assert is_agentic() returns True when stdout is not a TTY. - microphone.py: pin the `rate > 0` boundary (keep a 1 Hz reading) and the blocksize max(1, ...) floor for a tiny sample rate. - transcribe_render.py: mark the chapter start/end getattr fallbacks `# pragma: no mutate` -- equivalent mutants (_fmt_ms(0) == _fmt_ms(1)). Tests only (plus one pragma); no behavior change. --- aai_cli/transcribe_render.py | 4 +++- tests/test_config.py | 23 ++++++++++++++++++++--- tests/test_microphone.py | 28 ++++++++++++++++++++++++++++ tests/test_output.py | 10 ++++++++++ 4 files changed, 61 insertions(+), 4 deletions(-) diff --git a/aai_cli/transcribe_render.py b/aai_cli/transcribe_render.py index fe13286d..e787b166 100644 --- a/aai_cli/transcribe_render.py +++ b/aai_cli/transcribe_render.py @@ -74,7 +74,9 @@ def _render_chapters(transcript: object, console: Console) -> None: return console.print("\n[bold]Chapters:[/bold]") for ch in chapters: - span = f"{_fmt_ms(jsonshape.as_int(getattr(ch, 'start', 0)))}-{_fmt_ms(jsonshape.as_int(getattr(ch, 'end', 0)))}" + # The `, 0` getattr fallbacks are equivalent mutants: they apply only to a + # chapter missing start/end, and _fmt_ms(0) == _fmt_ms(1) == "00:00" regardless. + span = f"{_fmt_ms(jsonshape.as_int(getattr(ch, 'start', 0)))}-{_fmt_ms(jsonshape.as_int(getattr(ch, 'end', 0)))}" # pragma: no mutate console.print(f" {span} {getattr(ch, 'headline', '')}") diff --git a/tests/test_config.py b/tests/test_config.py index d57ee6cf..1bcea9c4 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -174,14 +174,18 @@ def test_empty_api_key_flag_rejected(): from aai_cli.errors import CLIError - with pytest.raises(CLIError): + with pytest.raises(CLIError) as exc: config.resolve_api_key(api_key_flag="") + assert exc.value.error_type == "invalid_key" + assert exc.value.exit_code == 2 # usage error, not the generic 1 def test_invalid_profile_name_has_suggestion(): with pytest.raises(CLIError) as exc: config.set_api_key("bad name!", "sk_x") assert exc.value.message.startswith("Invalid profile name") + assert exc.value.error_type == "invalid_profile" + assert exc.value.exit_code == 2 # usage error, not the generic 1 assert exc.value.suggestion == "Use only letters, digits, '-' or '_'." @@ -189,8 +193,10 @@ def test_malformed_config_raises_clean_error(tmp_config): from aai_cli.errors import CLIError (tmp_config / "config.toml").write_text("this is not = = valid toml ===\n") - with pytest.raises(CLIError): + with pytest.raises(CLIError) as exc: config.get_active_profile() + assert exc.value.error_type == "invalid_config" + assert exc.value.exit_code == 2 # usage error, not the generic 1 def test_unexpected_config_shape_raises_clean_error(tmp_config): @@ -200,7 +206,7 @@ def test_unexpected_config_shape_raises_clean_error(tmp_config): with pytest.raises(CLIError) as exc: config.get_active_profile() assert exc.value.error_type == "invalid_config" - assert exc.value.exit_code == 2 + assert exc.value.exit_code == 2 # usage error, not the generic 1 def test_unexpected_config_shape_error_is_compact(tmp_config): @@ -245,6 +251,17 @@ def test_validation_summary_labels_rootlevel_problems(): assert config._validation_summary(exc.value).startswith("top level: ") +def test_dump_creates_missing_parent_directories(monkeypatch, tmp_path): + # The config dir's parents may not exist yet (first run on a fresh machine); + # _dump must create the whole chain (mkdir parents=True), not just the leaf. + nested = tmp_path / "deeply" / "nested" / "config" + monkeypatch.setattr("aai_cli.config.config_dir", lambda: nested) + config.set_api_key("default", "sk_abc") + assert nested.is_dir() + assert (nested / "config.toml").exists() + assert config.get_api_key("default") == "sk_abc" + + def test_config_roundtrips_after_special_value(tmp_path, monkeypatch): # profile names are validated; this checks tomli_w writes valid TOML for normal data config.set_api_key("staging", "sk_x") diff --git a/tests/test_microphone.py b/tests/test_microphone.py index 7e2f141c..9109747c 100644 --- a/tests/test_microphone.py +++ b/tests/test_microphone.py @@ -220,6 +220,16 @@ def test_device_default_rate_falls_back_on_non_numeric_rate(monkeypatch) -> None assert _device_default_rate(None) == _FALLBACK_RATE +def test_device_default_rate_keeps_smallest_positive_rate(monkeypatch) -> None: + # A reported rate of exactly 1 is positive and must be kept as-is; only a + # non-positive (<= 0) rate falls back. Pins the `rate > 0` boundary so it can't + # drift to `rate > 1` and silently discard a legitimate 1 Hz reading. + fake_sd: Any = types.ModuleType("sounddevice") + fake_sd.query_devices = lambda device, kind: {"default_samplerate": 1.0} + monkeypatch.setitem(sys.modules, "sounddevice", fake_sd) + assert _device_default_rate(None) == 1 + + def test_sounddevice_mic_yields_bytes_then_stops_and_closes(): stream = _FakeRawStream() mic = _SoundDeviceMic(stream, blocksize=1024) @@ -251,6 +261,24 @@ def raw_input_stream(**kwargs): assert next(iter(stream)) == b"\x01\x02" +def test_default_mic_stream_floors_blocksize_at_one(monkeypatch) -> None: + # A pathologically small sample rate makes `sample_rate // 10` round to 0; the + # max(1, ...) floor must still open with one frame per read, never 0 (which would + # make sounddevice read nothing). Pins that floor at 1. + created: dict[str, Any] = {} + + def raw_input_stream(**kwargs): + created.update(kwargs) + return _FakeRawStream(**kwargs) + + fake_sd: Any = types.ModuleType("sounddevice") + fake_sd.RawInputStream = raw_input_stream + monkeypatch.setitem(sys.modules, "sounddevice", fake_sd) + + _default_mic_stream(sample_rate=5, device=None) # 5 // 10 == 0 + assert created["blocksize"] == 1 + + def test_default_mic_stream_missing_sounddevice_raises_mic_missing(monkeypatch): monkeypatch.setitem(sys.modules, "sounddevice", None) # import -> ImportError with pytest.raises(CLIError) as exc: diff --git a/tests/test_output.py b/tests/test_output.py index de70994d..3aa4da71 100644 --- a/tests/test_output.py +++ b/tests/test_output.py @@ -37,6 +37,16 @@ def test_is_agentic_false_for_plain_interactive_tty(monkeypatch): assert output.is_agentic() is False +def test_is_agentic_true_when_stdout_not_a_tty(monkeypatch): + # Piped/redirected stdout means no interactive human, so the spinner is + # suppressed even with no agent env var set -- guards the `not a tty -> True` + # early return (without it, this path would fall through to the env-var check). + monkeypatch.setattr(output, "_stdout_is_tty", lambda: False) + for var in output._AGENT_ENV_VARS: + monkeypatch.delenv(var, raising=False) + assert output.is_agentic() is True + + def test_mask_secret_preserves_only_short_edges(): assert output.mask_secret("sk_1234567890") == "sk_…7890" assert output.mask_secret("12345678") == "123…5678" From 291879e412cd108a9c91a2dcdcdf0e3530248b5c Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 10 Jun 2026 05:32:03 +0000 Subject: [PATCH 2/7] test: kill surviving mutants in init/llm/transcripts/setup commands Continue the whole-package mutation sweep, fortifying the command layer: - init.py: pin _pick_template's stdin/stdout `or` (either stream piped -> usage error) and that error's exit_code; the missing-questionary exit_code; the agents-host derivation replacing only the FIRST "streaming" (replace count=1); the [:300] install-error truncation; and the DIRECTORY/--here conflict exit_code. Mark the don't-launch `False` on the install-failure branch `# pragma: no mutate` -- run_init exits on the failed step before consulting it. - llm.py: assert `-o json` forces JSON output without the global --json flag. - transcripts.py: assert an errored transcript surfaces its own error message, not the generic fallback. - setup.py: assert subprocess capture_output, the 300s/120s skill timeouts, the install-hint command slice, the remove failure-detail fallback, _copy_tree's mkdir parents/exist_ok, and best-effort rmtree(ignore_errors). FakeRun now records per-call kwargs so timeouts/flags are assertable. Tests only (plus one pragma); no behavior change. --- aai_cli/commands/init.py | 5 ++- tests/setup_helpers.py | 2 ++ tests/test_init_command.py | 71 +++++++++++++++++++++++++++++++++++++ tests/test_llm_command.py | 12 +++++++ tests/test_setup.py | 59 ++++++++++++++++++++++++++++++ tests/test_setup_install.py | 16 +++++++-- tests/test_transcripts.py | 3 ++ 7 files changed, 165 insertions(+), 3 deletions(-) diff --git a/aai_cli/commands/init.py b/aai_cli/commands/init.py index 1d58b285..48c4142a 100644 --- a/aai_cli/commands/init.py +++ b/aai_cli/commands/init.py @@ -105,7 +105,10 @@ def _install_step( "status": "failed", "detail": (setup.stderr or setup.stdout).strip()[:300], } - return [row], False + # The False (don't-launch) is an equivalent mutant: run_init raises Exit(1) on + # any failed step before it ever consults will_launch, so the value is unused + # on this branch. + return [row], False # pragma: no mutate return [ { "name": "install", diff --git a/tests/setup_helpers.py b/tests/setup_helpers.py index 0f282ac2..5c79441d 100644 --- a/tests/setup_helpers.py +++ b/tests/setup_helpers.py @@ -32,12 +32,14 @@ class FakeRun: def __init__(self, returncodes=None, *, creates_skill=True, removes_skill=True): self.calls = [] + self.invocations = [] # (cmd, kwargs) per call, so tests can assert timeout etc. self.returncodes = returncodes or {} self.creates_skill = creates_skill self.removes_skill = removes_skill def __call__(self, cmd, *args, **kwargs): self.calls.append(cmd) + self.invocations.append((list(cmd), dict(kwargs))) rc = 0 best = -1 for prefix, code in self.returncodes.items(): diff --git a/tests/test_init_command.py b/tests/test_init_command.py index bf2ac71c..277ecde0 100644 --- a/tests/test_init_command.py +++ b/tests/test_init_command.py @@ -28,6 +28,15 @@ def test_init_scaffold_only_creates_project(tmp_path, monkeypatch): assert (tmp_path / "myapp" / ".env").exists() +def test_init_rejects_dir_and_here_together(tmp_path, monkeypatch): + # DIRECTORY and --here are mutually exclusive; passing both is a usage error + # exiting 1 (pins that exit_code on the conflict). + monkeypatch.chdir(tmp_path) + result = runner.invoke(app, ["init", TEMPLATE, "somedir", "--here", "--no-install"]) + assert result.exit_code == 1 + assert "not both" in result.output + + def test_init_writes_key_from_env(tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) monkeypatch.setenv("ASSEMBLYAI_API_KEY", "sk-from-env") @@ -264,6 +273,34 @@ def test_pick_template_missing_questionary_errors(monkeypatch): with pytest.raises(CLIError) as exc: init_cmd._pick_template() assert exc.value.error_type == "missing_dependency" + assert exc.value.exit_code == 1 + + +@pytest.mark.parametrize(("stdin_tty", "stdout_tty"), [(True, False), (False, True)]) +def test_pick_template_errors_when_either_stream_not_a_tty(monkeypatch, stdin_tty, stdout_tty): + # The picker needs BOTH stdin and stdout interactive; if either is piped it must + # bail with a usage error (pins the `or`, which an `and` would weaken to "both + # piped"). questionary is stubbed out so a mutated fall-through is observable as a + # *different* error rather than the usage error this asserts. + monkeypatch.setattr("sys.stdin", _Tty() if stdin_tty else io.StringIO()) + monkeypatch.setattr("sys.stdout", _Tty() if stdout_tty else io.StringIO()) + monkeypatch.setitem(sys.modules, "questionary", None) + with pytest.raises(CLIError) as exc: + init_cmd._pick_template() + assert exc.value.error_type == "usage_error" + assert exc.value.exit_code == 1 + + +def test_active_env_vars_agents_host_replaces_only_first_streaming(monkeypatch): + # The agents host is derived by swapping the FIRST "streaming" token for "agents" + # (replace count=1); a host containing it twice must keep the later occurrence. + fake_env = types.SimpleNamespace( + api_base="https://api.x", + llm_gateway_base="https://llm.x", + streaming_host="streaming.streaming.example.com", + ) + monkeypatch.setattr(init_cmd.environments, "active", lambda: fake_env) + assert init_cmd._active_env_vars()["ASSEMBLYAI_AGENTS_HOST"] == "agents.streaming.example.com" def test_init_install_failure_reports_and_exits(tmp_path, monkeypatch): @@ -284,6 +321,40 @@ def test_init_install_failure_reports_and_exits(tmp_path, monkeypatch): assert "pip exploded" in result.output +def test_init_install_failure_does_not_launch_even_with_key(tmp_path, monkeypatch): + # A failed install must flip will_launch off so the server never starts -- even + # when a key is present (which would otherwise satisfy the launch guard). Pins the + # literal `False` returned on the failure branch. + monkeypatch.chdir(tmp_path) + monkeypatch.setenv("ASSEMBLYAI_API_KEY", "sk-real-key") + monkeypatch.setattr( + "aai_cli.init.runner.run_setup", + lambda *a, **k: subprocess.CompletedProcess([], 1, "", "pip exploded"), + ) + launched = {"v": False} + monkeypatch.setattr( + "aai_cli.init.runner.launch_and_open", + lambda *a, **k: launched.__setitem__("v", True) or 0, + ) + result = runner.invoke(app, ["init", TEMPLATE, "app", "--json"]) + assert result.exit_code == 1 + assert launched["v"] is False + + +def test_init_install_failure_detail_is_truncated(tmp_path, monkeypatch): + # A pathologically long install error is capped at 300 chars in the report detail + # so it can't flood the terminal; pins the [:300] slice. + monkeypatch.chdir(tmp_path) + monkeypatch.setattr( + "aai_cli.init.runner.run_setup", + lambda *a, **k: subprocess.CompletedProcess([], 1, "", "x" * 500), + ) + result = runner.invoke(app, ["init", TEMPLATE, "app", "--json"]) + assert result.exit_code == 1 + assert "x" * 300 in result.output + assert "x" * 301 not in result.output + + def test_init_launches_when_key_present(tmp_path, monkeypatch): # Key present + install succeeds -> the server is launched and the browser opens. monkeypatch.chdir(tmp_path) diff --git a/tests/test_llm_command.py b/tests/test_llm_command.py index 9d4b1567..6cde0a3b 100644 --- a/tests/test_llm_command.py +++ b/tests/test_llm_command.py @@ -250,6 +250,18 @@ def test_llm_json_flag_emits_json(monkeypatch): assert json.loads(result.output)["output"] == "hello" +def test_llm_output_json_field_forces_json_without_flag(monkeypatch): + # `-o json` selects machine output even without the global --json flag, at an + # interactive terminal (where json_mode is otherwise off). Pins the + # `output_field == "json"` half of the json_mode disjunction. + _auth() + monkeypatch.setattr("aai_cli.output._stdout_is_tty", lambda: True) + monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload("hi42")) + result = runner.invoke(app, ["llm", "hi", "-o", "json"]) + assert result.exit_code == 0 + assert json.loads(result.output)["output"] == "hi42" + + def test_llm_output_invalid_field_exits_2(monkeypatch): _auth() monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload()) diff --git a/tests/test_setup.py b/tests/test_setup.py index 898e41af..bb275bcb 100644 --- a/tests/test_setup.py +++ b/tests/test_setup.py @@ -1,3 +1,4 @@ +import json import subprocess import pytest @@ -53,6 +54,12 @@ def test_remove_skill_failure_reports_failed(monkeypatch): result = runner.invoke(app, ["setup", "remove"]) assert result.exit_code == 1 assert _statuses(result)["skill"] == "failed" + # The failure detail surfaces the subprocess's stderr ("boom"), preferring it over + # the generic "still present" fallback (pins `_proc_detail(proc) or ...`). + skill_detail = next( + s["detail"] for s in json.loads(result.output)["steps"] if s["name"] == "skill" + ) + assert "boom" in skill_detail def test_remove_skill_skipped_when_npx_missing(monkeypatch): @@ -94,6 +101,9 @@ def test_remove_unwinds_all(monkeypatch, tmp_path): assert ["npx", "-y", "skills", "remove", "assemblyai", "--global"] in fake.calls assert not _skill_path().exists() assert not _cli_skill_path().exists() + # The skill-remove subprocess uses the explicit 120s timeout backstop. + remove_calls = [kw for cmd, kw in fake.invocations if cmd[:1] == ["npx"] and "remove" in cmd] + assert remove_calls and remove_calls[0]["timeout"] == 120 def test_remove_when_absent_is_not_an_error(monkeypatch): @@ -177,6 +187,35 @@ def test_copy_tree_skips_pycache_and_pyc(tmp_path): assert not (dest / "__pycache__").exists() +def test_copy_tree_creates_missing_parent_dirs(tmp_path): + # The destination's parents may not exist yet (~/.claude/skills on a fresh + # machine); _copy_tree must create the whole chain (mkdir parents=True). + from aai_cli.commands import setup + + src = tmp_path / "src" + src.mkdir() + (src / "SKILL.md").write_text("# skill") + + dest = tmp_path / "a" / "b" / "c" / "dest" # none of a/b/c exist yet + setup._copy_tree(src, dest) + assert (dest / "SKILL.md").read_text() == "# skill" + + +def test_copy_tree_into_existing_dir_is_tolerated(tmp_path): + # _copy_tree may run with the destination already present (a forced reinstall over + # an existing skill dir); the mkdir must tolerate it (exist_ok=True), not raise. + from aai_cli.commands import setup + + src = tmp_path / "src" + src.mkdir() + (src / "SKILL.md").write_text("# skill") + + dest = tmp_path / "dest" + dest.mkdir() # already exists before the copy + setup._copy_tree(src, dest) + assert (dest / "SKILL.md").read_text() == "# skill" + + # --- help -------------------------------------------------------------------- @@ -241,3 +280,23 @@ def test_remove_cli_skill_fails_when_rmtree_noops(monkeypatch): step = setup._remove_cli_skill() assert step["status"] == "failed" assert "still present" in step["detail"] + + +def test_remove_cli_skill_tolerates_rmtree_error(monkeypatch): + # Removal is best-effort (ignore_errors=True): a deletion failure must surface as a + # clean "failed" step (skill still present), never an uncaught OSError. Without + # ignore_errors, rmtree would raise instead of returning. + from aai_cli.commands import setup + + dest = _cli_skill_path() + dest.mkdir(parents=True) + (dest / "SKILL.md").write_text("# x") + + def rmtree(path, ignore_errors=False, **kwargs): + if not ignore_errors: + raise OSError("permission denied") # what a non-ignoring rmtree would do + + monkeypatch.setattr(setup.shutil, "rmtree", rmtree) + step = setup._remove_cli_skill() + assert step["status"] == "failed" + assert "still present" in step["detail"] diff --git a/tests/test_setup_install.py b/tests/test_setup_install.py index d43d47e1..92e2b791 100644 --- a/tests/test_setup_install.py +++ b/tests/test_setup_install.py @@ -1,3 +1,4 @@ +import json import subprocess import pytest @@ -79,6 +80,12 @@ def test_install_skill_failed_when_npx_succeeds_but_nothing_installed(monkeypatc result = runner.invoke(app, ["setup", "install"]) assert result.exit_code == 1 # skill step failed assert _statuses(result)["skill"] == "failed" + # The detail quotes the install command starting at `add` (_SKILL_ADD[3:]), so the + # user sees exactly what to retry -- pins that slice start. + skill_detail = next( + s["detail"] for s in json.loads(result.output)["steps"] if s["name"] == "skill" + ) + assert "'add AssemblyAI/assemblyai-skill --global --yes'" in skill_detail # And status agrees: still not installed. status_result = runner.invoke(app, ["setup", "status"]) @@ -92,16 +99,21 @@ def test_install_detaches_stdin_and_sets_timeout(monkeypatch): seen = [] def record(cmd, *args, **kwargs): - seen.append(kwargs) + seen.append((list(cmd), kwargs)) return subprocess.CompletedProcess(args=cmd, returncode=1, stdout="", stderr="") monkeypatch.setattr("aai_cli.commands.setup.subprocess.run", record) result = runner.invoke(app, ["setup", "install"]) assert result.exit_code in (0, 1) assert seen, "expected subprocess.run to be called" - for kwargs in seen: + for _cmd, kwargs in seen: assert kwargs.get("stdin") is subprocess.DEVNULL assert kwargs.get("timeout") + assert kwargs.get("capture_output") is True # stdout/stderr must be captured + + # The skill download gets the longer 300s timeout (vs the 120s default elsewhere). + add_calls = [kw for cmd, kw in seen if cmd[:1] == ["npx"] and "add" in cmd] + assert add_calls and add_calls[0]["timeout"] == 300 def test_install_scope_passthrough(monkeypatch): diff --git a/tests/test_transcripts.py b/tests/test_transcripts.py index 802aa5ad..13922916 100644 --- a/tests/test_transcripts.py +++ b/tests/test_transcripts.py @@ -139,6 +139,9 @@ def test_get_errored_transcript_exits_nonzero(mocker): ) result = runner.invoke(app, ["transcripts", "get", "t_err"]) assert result.exit_code == 1 + # The transcript's own error message is surfaced, not the generic fallback + # (pins `getattr(transcript, "error", None) or "Transcript failed."`). + assert "decode failed" in result.output def test_list_table_colors_status(monkeypatch, mocker): From 1b5d0644b728755d405cd4d58343525285a05456 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 10 Jun 2026 05:37:26 +0000 Subject: [PATCH 3/7] test: kill surviving mutants in init runner/scaffold - runner.py: pin find_free_port's bind-to-port-0 (OS-assigned ephemeral port), the port-range error message bounds and its exit_code, the 0.2s poll interval in wait_for_port, run_setup's success sentinel returncode, and the capture_output/check/text kwargs passed to subprocess.run. - scaffold.py: assert the unknown-template / template-missing exit_codes, that a nested target's parent dirs are created (target.mkdir parents=True), and that a re-scaffold over an existing tree is tolerated (exist_ok). The two _copy_tree mkdir parents=True flags are marked `# pragma: no mutate` -- equivalent mutants, since the copy walk always creates a node's parent before the node. Tests only (plus two pragmas); no behavior change. --- aai_cli/init/scaffold.py | 7 +++++-- tests/test_init_runner.py | 28 +++++++++++++++++++++++++--- tests/test_init_scaffold.py | 23 ++++++++++++++++++++++- 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/aai_cli/init/scaffold.py b/aai_cli/init/scaffold.py index 1b8c0637..b39d1a80 100644 --- a/aai_cli/init/scaffold.py +++ b/aai_cli/init/scaffold.py @@ -65,10 +65,13 @@ def _copy_tree(node: Traversable, dest: Path) -> None: name = _DOTFILE_RENAMES.get(child.name, child.name) out = dest / name if child.is_dir(): - out.mkdir(parents=True, exist_ok=True) + # parents=True is an equivalent mutant here: the walk always creates a + # node's parent before descending, so `dest` (and `out.parent`) already + # exists. exist_ok is exercised by the idempotent re-scaffold test. + out.mkdir(parents=True, exist_ok=True) # pragma: no mutate _copy_tree(child, out) else: - out.parent.mkdir(parents=True, exist_ok=True) + out.parent.mkdir(parents=True, exist_ok=True) # pragma: no mutate out.write_bytes(child.read_bytes()) diff --git a/tests/test_init_runner.py b/tests/test_init_runner.py index d9d5e06c..4823504f 100644 --- a/tests/test_init_runner.py +++ b/tests/test_init_runner.py @@ -67,7 +67,9 @@ def test_serve_command_uv_and_venv(): @pytest.mark.allow_hosts(["127.0.0.1"]) def test_find_free_port_returns_preferred_when_open(): port = runner.find_free_port(0) # 0 -> OS assigns a free port - assert isinstance(port, int) and port > 0 + # A real OS-assigned ephemeral port, not a low/privileged one: pins the bind to + # port 0 (binding to 1 would yield 1, or fail outright as non-root). + assert isinstance(port, int) and port > 1024 @pytest.mark.allow_hosts(["127.0.0.1"]) @@ -88,20 +90,24 @@ def test_find_free_port_raises_when_all_taken(monkeypatch): with pytest.raises(CLIError) as exc: runner.find_free_port(5000, tries=3) assert exc.value.error_type == "port_unavailable" - assert "5000" in str(exc.value) + assert exc.value.exit_code == 1 + # The message names the exact inclusive range probed: preferred .. preferred+tries-1. + assert "5000-5002" in str(exc.value) def test_wait_for_port_returns_true_when_port_opens(monkeypatch): calls = {"n": 0} + slept = [] def fake_open(port): calls["n"] += 1 return calls["n"] >= 2 # closed on first poll, open on the second monkeypatch.setattr(runner, "_port_open", fake_open) - monkeypatch.setattr(runner.time, "sleep", lambda _s: None) + monkeypatch.setattr(runner.time, "sleep", slept.append) assert runner.wait_for_port(3000, timeout=5.0) is True assert calls["n"] >= 2 + assert slept == [0.2] # polls once at the 0.2s interval before the port opens def test_wait_for_port_returns_false_on_timeout(monkeypatch): @@ -115,15 +121,31 @@ def test_wait_for_port_returns_false_on_timeout(monkeypatch): def test_run_setup_returns_last_success(monkeypatch): ran = [] + seen = {} def fake_run(cmd, cwd, capture_output, check, text): ran.append(cmd) + seen.update(capture_output=capture_output, check=check, text=text) return subprocess.CompletedProcess(args=cmd, returncode=0, stdout="ok", stderr="") monkeypatch.setattr(runner.subprocess, "run", fake_run) result = runner.run_setup(Path("/proj"), use_uv=True) assert result.returncode == 0 assert len(ran) == 2 # both env-setup commands ran + # Output is captured as text, and a failing command is returned (not raised): + # check must stay False so run_setup can report the failure itself. + assert seen == {"capture_output": True, "check": False, "text": True} + + +def test_run_setup_with_no_commands_returns_success_sentinel(monkeypatch): + # With no env-setup commands the seeded CompletedProcess is returned unchanged, so + # an empty plan reads as success (returncode 0), never a spurious failure. + monkeypatch.setattr(runner, "env_setup_commands", lambda *a, **k: []) + monkeypatch.setattr( + runner.subprocess, "run", lambda *a, **k: pytest.fail("no command should run") + ) + result = runner.run_setup(Path("/proj"), use_uv=True) + assert result.returncode == 0 def test_run_setup_stops_at_first_failure(monkeypatch): diff --git a/tests/test_init_scaffold.py b/tests/test_init_scaffold.py index de30657d..2c6f25a7 100644 --- a/tests/test_init_scaffold.py +++ b/tests/test_init_scaffold.py @@ -96,8 +96,10 @@ def test_scaffold_writes_placeholder_when_no_key(tmp_path): def test_scaffold_unknown_template_raises(tmp_path): - with pytest.raises(CLIError): + with pytest.raises(CLIError) as exc: scaffold.scaffold("nope", tmp_path / "app", api_key=None) + assert exc.value.error_type == "unknown_template" + assert exc.value.exit_code == 1 def test_scaffold_registered_but_missing_files_raises(tmp_path, monkeypatch): @@ -106,6 +108,25 @@ def test_scaffold_registered_but_missing_files_raises(tmp_path, monkeypatch): with pytest.raises(CLIError) as exc: scaffold.scaffold("ghost-template", tmp_path / "app", api_key=None) assert exc.value.error_type == "template_missing" + assert exc.value.exit_code == 1 + + +def test_scaffold_creates_nested_target_parents(tmp_path): + # `aai init a/b/app` targets a path whose parents don't exist yet; scaffold + # must create the whole chain (target.mkdir parents=True). + target = tmp_path / "a" / "b" / "app" # a/ and b/ do not exist + scaffold.scaffold("audio-transcription", target, api_key="k") + assert (target / "api" / "index.py").exists() + + +def test_scaffold_is_idempotent_over_existing_tree(tmp_path): + # Re-scaffolding (e.g. `--force`) runs over an already-populated tree, so every + # mkdir along the copy walk must tolerate existing dirs (exist_ok=True). + target = tmp_path / "app" + scaffold.scaffold("audio-transcription", target, api_key="k") + scaffold.scaffold("audio-transcription", target, api_key="k2") # dirs already exist + assert (target / "api" / "index.py").exists() + assert "ASSEMBLYAI_API_KEY=k2" in (target / ".env").read_text() def test_target_conflict_detects_nonempty_dir(tmp_path): From a799a3fc94876f4dfdaaba080af3f3df402e5ae0 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 10 Jun 2026 12:52:08 +0000 Subject: [PATCH 4/7] test: kill surviving mutants in auth loopback + agent audio/session - auth/loopback.py: assert the callback server thread is a daemon and that the cleanup join uses the bounded 5s timeout (spying threading.Thread while the server really serves, so shutdown() doesn't block). - agent/audio.py: pin the DuplexAudio blocksize max(1, rate//10) floor for a tiny device rate. - agent/session.py: pin the bounded 10s wait on ready_event in the send loop, and that the capture thread is a daemon. Tests only; no behavior change. --- tests/test_agent_audio.py | 14 ++++++++++ tests/test_agent_session.py | 16 +++++++++++ tests/test_agent_session_run.py | 47 +++++++++++++++++++++++++++++++++ tests/test_auth_loopback.py | 24 +++++++++++++++++ 4 files changed, 101 insertions(+) diff --git a/tests/test_agent_audio.py b/tests/test_agent_audio.py index 8f70adcf..bf56f4ed 100644 --- a/tests/test_agent_audio.py +++ b/tests/test_agent_audio.py @@ -45,6 +45,20 @@ def factory(*, rate, blocksize, callback, device): assert fake.stopped and fake.closed +def test_duplex_floors_blocksize_at_one(): + # A pathologically small device rate (//10 == 0) must still open with at least one + # frame per block; the max(1, ...) floor prevents a 0-frame block. + seen = {} + + def factory(*, rate, blocksize, callback, device): + seen["blocksize"] = blocksize + return FakeStream() + + d = DuplexAudio(device_rate=5, stream_factory=factory) # 5 // 10 == 0 + d.player.start() + assert seen["blocksize"] == 1 + + def test_duplex_restart_after_close_reopens_stream(): calls = {"n": 0} diff --git a/tests/test_agent_session.py b/tests/test_agent_session.py index bcb78ebe..e91dc617 100644 --- a/tests/test_agent_session.py +++ b/tests/test_agent_session.py @@ -266,3 +266,19 @@ def test_send_audio_loop_waits_for_ready_event_before_streaming(): ws = _RecordingWS() _send_audio_loop(ws, s, [b"\x01\x02"]) assert len(ws.sent) == 1 # frame forwarded once the gate is open + + +def test_send_audio_loop_waits_on_ready_event_with_bounded_timeout(): + # The wait on ready_event is bounded so a server that never sends `ready` can't + # wedge the send loop forever; pins the 10s timeout. + seen = {} + + class _RecordingEvent: + def wait(self, timeout=None): + seen["timeout"] = timeout + return True + + s = _session(exit_after_reply=True, ready_event=_RecordingEvent()) + s.ready = True + _send_audio_loop(_RecordingWS(), s, [b"\x01\x02"]) + assert seen["timeout"] == 10 diff --git a/tests/test_agent_session_run.py b/tests/test_agent_session_run.py index 2889064a..7254223f 100644 --- a/tests/test_agent_session_run.py +++ b/tests/test_agent_session_run.py @@ -149,6 +149,53 @@ def close(self): assert exc.value.exit_code == 1 # the real mic failure reaches the user, not a hang +def test_run_session_capture_thread_is_daemon(monkeypatch): + # The capture thread is a daemon so a stuck mic read can't keep the process alive + # after the session ends. + import threading as _threading + + from aai_cli.agent import session as session_mod + + daemons = [] + real_cls = session_mod.threading.Thread + + class SpyThread(real_cls): + def __init__(self, *a, **k): + daemons.append(k.get("daemon")) + super().__init__(*a, **k) + + monkeypatch.setattr(session_mod.threading, "Thread", SpyThread) + + class _BoomMic: + def __iter__(self): + raise CLIError("no microphone", error_type="mic_error", exit_code=1) + + class _BlockingWS: + def __init__(self): + self._closed = _threading.Event() + + def send(self, _msg): + pass + + def __iter__(self): + self._closed.wait(timeout=2) + return iter(()) + + def close(self): + self._closed.set() + + with pytest.raises(CLIError): + run_session( + "sk_live", + renderer=FakeRenderer(), + player=FakePlayer(), + mic=_BoomMic(), + config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"), + connect=lambda url, **kwargs: _BlockingWS(), + ) + assert daemons == [True] # the one capture thread, created as a daemon + + def test_run_session_does_not_close_player_that_failed_to_open(): # If opening the speaker stream raises, the cleanup must NOT call close() on a # player that never started (pins the player_started=False initializer). diff --git a/tests/test_auth_loopback.py b/tests/test_auth_loopback.py index 6e85c347..3530265d 100644 --- a/tests/test_auth_loopback.py +++ b/tests/test_auth_loopback.py @@ -144,6 +144,30 @@ def test_capture_times_out_without_callback(): assert result.token is None +def test_capture_server_thread_is_daemon_and_joined_with_timeout(monkeypatch): + # The serve_forever thread must be a daemon (so it can't block process exit) and the + # cleanup join must be bounded (5s) so a wedged server can't hang shutdown. The + # server really serves (no callback arrives, so capture just times out fast); we + # only spy on the thread's daemon flag and join timeout. + created = {} + real_cls = loopback.threading.Thread + + class SpyThread(real_cls): + def __init__(self, *a, **k): + created["daemon"] = k.get("daemon") + super().__init__(*a, **k) + + def join(self, timeout=None): + created["join_timeout"] = timeout + return super().join(timeout) + + monkeypatch.setattr(loopback.threading, "Thread", SpyThread) + result = loopback.capture_callback(timeout=0.1) # no callback -> times out + assert result.error == "timeout" + assert created["daemon"] is True + assert created["join_timeout"] == 5 + + def test_capture_raises_clean_error_when_port_unavailable(monkeypatch): # Occupy a port, then point the callback server at it: binding must fail with a # clean APIError, not a raw OSError traceback escaping run_login_flow. From 81a1bc7e5a75f61be7c9fa36007576c2c5bfc163 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 10 Jun 2026 13:08:15 +0000 Subject: [PATCH 5/7] test: kill surviving mutants in streaming macos/render/session - macos.py: assert the missing-swiftc / compile-failure exit_codes, the swiftc subprocess capture_output/text/check kwargs, the cache-dir mkdir parents (nested path) and exist_ok (pre-existing dirs), the _cleanup_process terminate guard + 2s wait backstops + stderr-pipe close, the `returncode >= 0` boundary, and the chunk-frames = sample_rate//10 helper arg. The module-cache mkdir parents=True is `# pragma: no mutate` (equivalent: cache_dir is created the line before). - render.py: assert a turn event missing end_of_turn reads as a partial (False). - session.py: assert the parallel source workers are daemons; the 0.1s join poll interval is `# pragma: no mutate` (a responsiveness/CPU tradeoff, not behavior). Tests only (plus two pragmas); no behavior change. --- aai_cli/streaming/macos.py | 4 ++- aai_cli/streaming/session.py | 4 ++- tests/test_macos_audio_source.py | 59 ++++++++++++++++++++++++++++++++ tests/test_stream_session.py | 5 +++ tests/test_streaming_render.py | 10 ++++++ 5 files changed, 80 insertions(+), 2 deletions(-) diff --git a/aai_cli/streaming/macos.py b/aai_cli/streaming/macos.py index 99e8bf7d..8ad183f5 100644 --- a/aai_cli/streaming/macos.py +++ b/aai_cli/streaming/macos.py @@ -84,7 +84,9 @@ def build_helper() -> Path: cache_dir.mkdir(parents=True, exist_ok=True) module_cache = cache_dir / "swift-module-cache" - module_cache.mkdir(parents=True, exist_ok=True) + # parents=True is an equivalent mutant here: cache_dir was just created above, so + # module_cache's parent always exists. exist_ok is covered by the rebuild test. + module_cache.mkdir(parents=True, exist_ok=True) # pragma: no mutate source_path = cache_dir / f"{_HELPER_PREFIX}-{digest}.swift" source_path.write_bytes(source) tmp_helper = helper.with_suffix(".tmp") diff --git a/aai_cli/streaming/session.py b/aai_cli/streaming/session.py index 1ef8aa04..ff01d7ac 100644 --- a/aai_cli/streaming/session.py +++ b/aai_cli/streaming/session.py @@ -296,7 +296,9 @@ def worker(source_label: str, audio: Iterable[bytes], rate: int) -> None: thread.start() while any(thread.is_alive() for thread in threads): for thread in threads: - thread.join(timeout=0.1) + # Poll interval: a responsiveness/CPU tradeoff, not behavior -- the loop + # surfaces a worker error within ~0.1s. Exact value isn't assertable. + thread.join(timeout=0.1) # pragma: no mutate if not errors.empty(): raise errors.get() if not errors.empty(): diff --git a/tests/test_macos_audio_source.py b/tests/test_macos_audio_source.py index 1afadfef..5266d1bd 100644 --- a/tests/test_macos_audio_source.py +++ b/tests/test_macos_audio_source.py @@ -54,6 +54,7 @@ def test_build_helper_requires_swiftc(monkeypatch): with pytest.raises(CLIError) as exc: macos.build_helper() assert "xcode-select" in (exc.value.suggestion or "") + assert exc.value.exit_code == 2 def test_build_helper_compiles_to_cache(monkeypatch, tmp_path): @@ -65,6 +66,7 @@ def test_build_helper_compiles_to_cache(monkeypatch, tmp_path): def fake_run(cmd, **kwargs): seen["cmd"] = cmd + seen["kwargs"] = kwargs Path(cmd[-1]).write_bytes(b"binary") return types.SimpleNamespace(returncode=0, stderr="", stdout="") @@ -73,6 +75,51 @@ def fake_run(cmd, **kwargs): assert helper.read_bytes() == b"binary" assert "-parse-as-library" in seen["cmd"] assert "ScreenCaptureKit" in seen["cmd"] + # stderr/stdout are captured as text, and a non-zero compile is inspected (not + # raised): check must stay False so build_helper surfaces its own error. + assert seen["kwargs"]["capture_output"] is True + assert seen["kwargs"]["text"] is True + assert seen["kwargs"]["check"] is False + + +def test_build_helper_creates_missing_cache_parents(monkeypatch, tmp_path): + # The cache dir's parents may not exist yet; build_helper must create the whole + # chain (cache_dir.mkdir parents=True), not just the leaf. + nested = tmp_path / "missing1" / "missing2" # parents do not exist + monkeypatch.setattr(macos.sys, "platform", "darwin") + monkeypatch.setattr(macos.shutil, "which", lambda _tool: "/usr/bin/swiftc") + monkeypatch.setattr(macos, "_resource_bytes", lambda: b"swift source") + monkeypatch.setattr(macos, "user_cache_path", lambda _app: nested) + monkeypatch.setattr( + macos.subprocess, + "run", + lambda cmd, **k: ( + Path(cmd[-1]).write_bytes(b"bin"), + types.SimpleNamespace(returncode=0, stderr="", stdout=""), + )[1], + ) + helper = macos.build_helper() + assert helper.read_bytes() == b"bin" + + +def test_build_helper_tolerates_existing_cache_dirs(monkeypatch, tmp_path): + # A rebuild (new source digest) runs with the cache dir and module cache already + # present, so their mkdirs must tolerate existing dirs (exist_ok=True). + monkeypatch.setattr(macos.sys, "platform", "darwin") + monkeypatch.setattr(macos.shutil, "which", lambda _tool: "/usr/bin/swiftc") + monkeypatch.setattr(macos, "_resource_bytes", lambda: b"swift source") + monkeypatch.setattr(macos, "user_cache_path", lambda _app: tmp_path) + (tmp_path / "macos-system-audio" / "swift-module-cache").mkdir(parents=True) # pre-exist + monkeypatch.setattr( + macos.subprocess, + "run", + lambda cmd, **k: ( + Path(cmd[-1]).write_bytes(b"bin"), + types.SimpleNamespace(returncode=0, stderr="", stdout=""), + )[1], + ) + helper = macos.build_helper() # must not raise FileExistsError on the mkdirs + assert helper.read_bytes() == b"bin" def test_build_helper_reuses_cached_binary(monkeypatch, tmp_path): @@ -106,6 +153,7 @@ def test_build_helper_compile_failure_surfaces_stderr(monkeypatch, tmp_path): with pytest.raises(CLIError) as exc: macos.build_helper() assert exc.value.error_type == "mac_system_audio_unavailable" + assert exc.value.exit_code == 2 assert exc.value.suggestion == "compile broke" @@ -140,9 +188,11 @@ class TimeoutProc(_FakeProc): def __init__(self): super().__init__(stdout=b"") self.waits = 0 + self.wait_timeouts = [] def wait(self, timeout=None): self.waits += 1 + self.wait_timeouts.append(timeout) if self.waits == 1: raise macos.subprocess.TimeoutExpired("helper", timeout or 0.0) return self.returncode @@ -152,6 +202,8 @@ def wait(self, timeout=None): macos._cleanup_process(proc, proc.stdout, completed=True) assert proc.killed is True assert proc.waits == 2 + assert proc.terminated is False # completed=True -> the `and` guard skips terminate() + assert proc.wait_timeouts == [2.0, 2.0] # both waits use the 2s backstop def test_raise_helper_exit_handles_clean_eof(): @@ -166,6 +218,7 @@ def test_returncode_detail_names_signals(): assert macos._returncode_detail(-5) == "SIGTRAP (-5)" assert macos._returncode_detail(-99999) == "signal 99999 (-99999)" assert macos._returncode_detail(2) == "exit 2" + assert macos._returncode_detail(0) == "exit 0" # 0 is a clean exit (pins `>= 0`) assert macos._returncode_detail(None) == "unknown exit" @@ -201,6 +254,12 @@ def fake_popen(cmd): assert events == ["open"] assert "--system-only" in commands[0] assert procs[0].terminated is True + # On a non-completed teardown the helper's stderr pipe is closed too (pins the + # `proc.stderr is not None` guard against an `is None` flip that would leak it). + assert procs[0].stderr is not None and procs[0].stderr.closed is True + # chunk-frames is ~100 ms of frames at the target rate (sample_rate // 10). + cmd = commands[0] + assert cmd[cmd.index("--chunk-frames") + 1] == str(src.sample_rate // 10) def test_source_start_failure_is_cli_error(tmp_path): diff --git a/tests/test_stream_session.py b/tests/test_stream_session.py index 36743628..740e98f1 100644 --- a/tests/test_stream_session.py +++ b/tests/test_stream_session.py @@ -291,10 +291,13 @@ def __init__(self, *, target_rate=None, device=None, capture_rate=None, on_open= def __iter__(self): return iter([b"mic"]) + daemons = [] + class ImmediateThread: def __init__(self, *, target, args, daemon): self._target = target self._args = args + daemons.append(daemon) def start(self): self._target(*self._args) @@ -315,6 +318,8 @@ def fake_stream_audio(api_key, source, *, params, **_kwargs): result = runner.invoke(app, ["stream", "--system-audio", "--json"]) assert result.exit_code == 1 assert "failed" in result.output + # Both source workers run as daemons so a wedged stream can't block process exit. + assert daemons and all(d is True for d in daemons) def test_stream_system_audio_parallel_keyboard_interrupt_exits_cleanly(monkeypatch): diff --git a/tests/test_streaming_render.py b/tests/test_streaming_render.py index e4050f13..73a7b587 100644 --- a/tests/test_streaming_render.py +++ b/tests/test_streaming_render.py @@ -160,6 +160,16 @@ def test_json_mode_emits_ndjson_events(): assert lines[1] == {"type": "turn", "transcript": "hi", "end_of_turn": True} +def test_turn_defaults_end_of_turn_to_false_when_absent(): + # An event missing end_of_turn must read as a partial (False), never a finalized + # turn; pins the getattr default against a flip to True. + out = io.StringIO() + r = StreamRenderer(json_mode=True, out=out) + r.turn(types.SimpleNamespace(transcript="hi")) # no end_of_turn attribute + event = json.loads(out.getvalue().splitlines()[0]) + assert event["end_of_turn"] is False + + def test_json_mode_emits_source_when_labeled(): out = io.StringIO() r = StreamRenderer(json_mode=True, out=out) From 39acac8f2d7440c89a8ccc6845848bcb7878608a Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 10 Jun 2026 13:25:26 +0000 Subject: [PATCH 6/7] test: assert subprocess check/text in setup; pragma config validation-summary Two follow-ups found by a full re-sweep after the rebase: - setup.py _run: also assert the subprocess.run text=True and check=False kwargs (capture_output was already pinned) so all three are mutation-covered. - config.py _validation_summary: mark exc.errors(include_url/include_input=False) `# pragma: no mutate` -- equivalent mutants, since the summary reads only loc+msg and never the url/input fields those flags toggle. Tests only (plus one pragma); no behavior change. --- aai_cli/config.py | 5 ++++- tests/test_setup_install.py | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/aai_cli/config.py b/aai_cli/config.py index 6d9d010f..8adfff8b 100644 --- a/aai_cli/config.py +++ b/aai_cli/config.py @@ -81,7 +81,10 @@ def _validation_summary(exc: ValidationError) -> str: in a one-line CLI error. """ problems: list[str] = [] - for err in exc.errors(include_url=False, include_input=False): + # include_url/include_input=False keep pydantic's url/input fields out of each + # error dict, but this summary only reads loc + msg, so flipping them is an + # equivalent mutant (the rendered string is identical either way). + for err in exc.errors(include_url=False, include_input=False): # pragma: no mutate loc = ".".join(str(part) for part in err["loc"]) or "top level" problems.append(f"{loc}: {err['msg']}") return "; ".join(problems) diff --git a/tests/test_setup_install.py b/tests/test_setup_install.py index 92e2b791..6a1f0c21 100644 --- a/tests/test_setup_install.py +++ b/tests/test_setup_install.py @@ -110,6 +110,8 @@ def record(cmd, *args, **kwargs): assert kwargs.get("stdin") is subprocess.DEVNULL assert kwargs.get("timeout") assert kwargs.get("capture_output") is True # stdout/stderr must be captured + assert kwargs.get("text") is True # decoded to str, not bytes + assert kwargs.get("check") is False # we inspect returncode, never raise # The skill download gets the longer 300s timeout (vs the 120s default elsewhere). add_calls = [kw for cmd, kw in seen if cmd[:1] == ["npx"] and "add" in cmd] From b8cbdf108870f01f3297287ed9f42d8a2d603fe8 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 10 Jun 2026 13:29:53 +0000 Subject: [PATCH 7/7] tooling: add whole-package mutation sweep + document it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Developer-experience follow-up. The mutation gate is diff-scoped, so auditing existing code against its bar meant rebuilding a throwaway sweep script each time. Promote it to scripts/mutation_sweep.py: it reuses the gate's own collect/cover/survive engine over every line of the named files (or the whole package), reports surviving mutants (exit 1) and an UNCOVERED bucket separately, and is robust to the line-number shifts that make per-line checks brittle. Document the workflow in AGENTS.md next to the diff-scoped gate, including the reminder to pass `--timeout` to the coverage-refresh pytest run — the default suite leaves per-test timeouts opt-in, so a deadlocked test otherwise wedges the whole run instead of failing fast. --- AGENTS.md | 12 +++++ scripts/mutation_sweep.py | 99 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 scripts/mutation_sweep.py diff --git a/AGENTS.md b/AGENTS.md index 4bc00dc5..54add9fb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -40,6 +40,18 @@ uv run diff-cover coverage.xml --compare-branch=origin/main --fail-under=100 uv run python scripts/mutation_gate.py origin/main # mutation gate ``` +The gate is diff-scoped, so code predating it is never mutation-tested. To audit +existing code (or a whole module) against the same bar, `scripts/mutation_sweep.py` +reuses the gate's engine over *every* line of the files you name (or the whole +package). Refresh coverage first, and pass `--timeout` to that pytest step — the +default suite has no per-test timeout (it's opt-in; see `pyproject.toml`), so a +deadlocked test would wedge the run instead of failing fast: + +```sh +uv run pytest -q -n auto --timeout=60 --cov=aai_cli --cov-branch --cov-context=test --cov-report= +uv run python scripts/mutation_sweep.py aai_cli/config.py # or omit paths for the whole package +``` + ### Test markers The default suite **excludes** three slow/credentialed marker sets — `pyproject.toml`'s `addopts` carries `-m "not e2e and not install and not install_script"`, so a bare `pytest` matches what `check.sh` gates. An explicit command-line `-m` overrides it for the opt-in runs: diff --git a/scripts/mutation_sweep.py b/scripts/mutation_sweep.py new file mode 100644 index 00000000..837a92ab --- /dev/null +++ b/scripts/mutation_sweep.py @@ -0,0 +1,99 @@ +"""Whole-file mutation sweep — the diff-scoped gate's repo-wide companion. + +``scripts/mutation_gate.py`` only mutates lines changed versus a branch, so code +that predates the gate is never held to its bar. This sweeps EVERY eligible line +of the given files (or the whole package) and reports the mutants that survive — +i.e. the suite still passes with the line deliberately broken — so you can add an +assertion that kills each one (or mark a genuinely-equivalent line +``# pragma: no mutate``). It reuses the gate's own mutation/kill engine, so a +survivor here is a survivor there. + +Usage:: + + # 1. Refresh per-test coverage contexts the sweep reads from .coverage: + uv run pytest -q -n auto --timeout=60 --cov=aai_cli --cov-branch \ + --cov-context=test --cov-report= + # 2. Sweep specific files (or omit paths to sweep the whole package): + uv run python scripts/mutation_sweep.py aai_cli/config.py + uv run python scripts/mutation_sweep.py + +Pass ``--timeout`` to the pytest step above: the default suite has no per-test +timeout (it is opt-in; see pyproject), and a deadlocked test would otherwise wedge +the whole run instead of failing fast. + +Exit status is 1 if any real survivor is found, else 0. Lines whose mutants have +no covering test are reported separately as UNCOVERED (not failed): coverage +attributes import-time evaluated defaults to no test, so that bucket needs a +manual look rather than blind action. +""" + +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + +import coverage + +_HERE = Path(__file__).resolve().parent +_PKG = _HERE.parent / "aai_cli" +_TEMPLATES = _PKG / "init" / "templates" + + +def _load_gate() -> ModuleType: + # ModuleType attribute access is dynamic, so reusing the gate's private helpers + # (_collect/_covering_tests/_survives) below needs no type-checker escape hatch. + spec = importlib.util.spec_from_file_location("mutation_gate", _HERE / "mutation_gate.py") + if spec is None or spec.loader is None: + raise RuntimeError("could not load scripts/mutation_gate.py") + module = importlib.util.module_from_spec(spec) + sys.modules["mutation_gate"] = module + spec.loader.exec_module(module) + return module + + +def _package_files() -> list[Path]: + return sorted(p for p in _PKG.rglob("*.py") if _TEMPLATES not in p.parents) + + +def _sweep_file( + mg: ModuleType, path: Path, data: coverage.CoverageData +) -> tuple[int, list[str], list[str]]: + line_count = len(path.read_text(encoding="utf-8").splitlines()) + tree, src, mutants = mg._collect(path, set(range(1, line_count + 1))) + survivors: list[str] = [] + uncovered: list[str] = [] + for mutant in mutants: + if not mg._covering_tests(data, path, mutant.linenos): + uncovered.append(mutant.label) + elif mg._survives(path, tree, src, mutant, data): + survivors.append(mutant.label) + return len(mutants), survivors, uncovered + + +def main() -> int: + mg = _load_gate() + args = [Path(a) for a in sys.argv[1:]] or _package_files() + data = coverage.CoverageData() + data.read() + total = 0 + all_survivors: list[str] = [] + for path in args: + tested, survivors, uncovered = _sweep_file(mg, path, data) + total += tested + all_survivors += survivors + sys.stdout.write(f"\n=== {path} : {tested} mutants ===\n") + for label in survivors: + sys.stdout.write(f" SURVIVES {label}\n") + for label in uncovered: + sys.stdout.write(f" uncovered {label}\n") + if not survivors and not uncovered: + sys.stdout.write(" clean\n") + sys.stdout.flush() + sys.stdout.write(f"\nTOTAL {total} mutant(s); {len(all_survivors)} surviving\n") + return 1 if all_survivors else 0 + + +if __name__ == "__main__": + raise SystemExit(main())