From 7450aa9e4f22d0bd493095e8aa9f879e4282a5aa Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 12 Jun 2026 05:20:09 +0000
Subject: [PATCH] Fix all 38 CodeQL quality alerts and gate CodeQL locally in
 check.sh
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resolve everything on the repo's code-scanning *quality* tab (reproduced
locally with the same python/actions/javascript code-quality suites the
CodeQL workflow uploads; js+actions were already clean):

- py/file-not-closed (the one real bug): silence_stdout leaked the
  devnull fd after dup2 — close it in a finally; test asserts the close.
- py/ineffectual-statement (31): Protocol stub `...` bodies become
  one-line docstrings; the non-None-returning stubs gain @abstractmethod
  so mypy/pyright strict still accept the empty body, with no
  never-executed line for the patch-coverage gate to miss.
- py/unreachable-statement (3): raise via a _raise() helper inside the
  multi-manager `with pytest.raises(...), telemetry.track(...)` blocks so
  the trailing assertions are visibly reachable to CodeQL's CFG.
- py/empty-except: explain why share's Ctrl-C handler is a no-op.
- py/repeated-import / py/import-and-import-from: drop duplicate local
  imports in tests.

Then make the alerts un-regressable: a new check.sh stage
(scripts/codeql_gate.py) runs the exact security + quality suites over a
python/actions/javascript db-cluster and fails on any finding. Self-skips
without the CodeQL bundle on PATH (codeql.yml stays the CI enforcement,
so PRs aren't double-scanned); the web session-start hook now provisions
the bundle, pinned in gate_tool_pins.sh.

https://claude.ai/code/session_01DW3ZQmfbnKsgjrA4PnsF8U
---
 .claude/hooks/session-start.sh |  23 ++++++-
 AGENTS.md                      |   2 +-
 aai_cli/client.py              |  12 ++--
 aai_cli/commands/doctor.py     |   5 +-
 aai_cli/commands/share.py      |   2 +
 aai_cli/microphone.py          |  18 ++++--
 aai_cli/onboard/prompter.py    |  23 +++++--
 aai_cli/stdio.py               |   8 ++-
 aai_cli/streaming/macos.py     |  21 ++++---
 aai_cli/tts/audio.py           |  19 ++++--
 aai_cli/tts/session.py         |  13 +++-
 scripts/check.sh               |  14 +++++
 scripts/codeql_gate.py         | 106 +++++++++++++++++++++++++++++++++
 scripts/gate_marker.py         |   3 +-
 scripts/gate_tool_pins.sh      |  10 +++-
 tests/test_context.py          |   2 -
 tests/test_microphone.py       |   9 +--
 tests/test_stdio.py            |   3 +
 tests/test_telemetry.py        |  12 +++-
 19 files changed, 257 insertions(+), 48 deletions(-)
 create mode 100644 scripts/codeql_gate.py

diff --git a/.claude/hooks/session-start.sh b/.claude/hooks/session-start.sh
index 6f806017..f6b7e56f 100755
--- a/.claude/hooks/session-start.sh
+++ b/.claude/hooks/session-start.sh
@@ -9,6 +9,8 @@
 #   - go:     actionlint + gitleaks (Go binaries, no PyPI/npm wheel) — without them
 #             check.sh silently self-skips those gates here and the failure only
 #             surfaces in CI
+#   - codeql: the CLI+query-pack bundle for check.sh's codeql gate (security +
+#             quality suites; alerts otherwise only surface on GitHub after push)
 #   - python: `uv sync` to materialize the locked dev environment up front
 #
 # Hook stdout is injected into the agent's context at session start, so emit one
@@ -99,7 +101,22 @@ else
   log "go not found; skipping actionlint/gitleaks (check.sh self-skips them; CI still runs them)"
 fi
 
-# 4. Git history — web containers start from a shallow clone, where origin/main
+# 4. CodeQL bundle (CLI + query packs, pinned in gate_tool_pins.sh) — without it
+#    check.sh self-skips its codeql gate and security/quality alerts only surface
+#    on GitHub after push. ~1 GB release tarball; soft-fails like everything else.
+if command -v codeql >/dev/null 2>&1; then
+  log "codeql already present"
+elif curl -fsSL "https://github.com/github/codeql-action/releases/download/${CODEQL_BUNDLE_VERSION}/codeql-bundle-linux64.tar.gz" -o /tmp/codeql-bundle.tar.gz >>"$LOG" 2>&1 \
+  && tar -xzf /tmp/codeql-bundle.tar.gz -C /usr/local/lib >>"$LOG" 2>&1 \
+  && ln -sf /usr/local/lib/codeql/codeql /usr/local/bin/codeql \
+  && rm -f /tmp/codeql-bundle.tar.gz; then
+  log "installed codeql (${CODEQL_BUNDLE_VERSION})"
+else
+  rm -f /tmp/codeql-bundle.tar.gz
+  log "WARNING: codeql bundle install failed; check.sh self-skips its codeql gate (codeql.yml still runs it; see $LOG)"
+fi
+
+# 5. Git history — web containers start from a shallow clone, where origin/main
 #    can exist with NO merge base to the session branch; check.sh's diff-scoped
 #    tail gates (diff-cover/mutation) then crash with "fatal: ... no merge base"
 #    instead of self-skipping, and the branch auto-update below can't merge.
@@ -114,7 +131,7 @@ else
   log "clone already has full history"
 fi
 
-# 5. Keep the session branch current. Resumed web containers hold a clone frozen
+# 6. Keep the session branch current. Resumed web containers hold a clone frozen
 #    at creation time, so two things can go stale: the branch's own remote tip
 #    (pushes from another session/machine) and origin/main (which the diff-scoped
 #    gates — diff-cover, mutation — compare against). Fast-forward to the remote
@@ -149,7 +166,7 @@ if [ "$branch" != "HEAD" ] && [ "$branch" != "main" ]; then
   fi
 fi
 
-# 6. Python environment — materialize the locked dev env so the first `uv run`
+# 7. Python environment — materialize the locked dev env so the first `uv run`
 #    doesn't pay the full sync cost mid-task. `uv` syncs the default dev group.
 if uv sync >>"$LOG" 2>&1; then
   log "uv environment synced (locked dev group)"
diff --git a/AGENTS.md b/AGENTS.md
index 7c49469a..03e45bc8 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -16,7 +16,7 @@ uv run assembly --help            # run the CLI from the locked environment
 
 Dev tooling is a PEP 735 `[dependency-groups]` group with `default-groups = ["dev"]`, not a `[project]` extra — `uv sync --extra dev` errors.
 
-`scripts/check.sh` is the authoritative gate; keep this list in sync with it. It runs, in order: `uv lock --check` → `ruff check` → `ruff format --check` → `mypy` → `pyright` (src strict) → `pyright` (tests) → `vulture` (dead code) → `deptry` (dependency hygiene) → `lint-imports` (import-linter architecture contracts) → max-file-length (500 lines) → `xenon` (cyclomatic complexity, max grade B / project avg A) → `swiftlint` + swift compile (macOS only, skipped elsewhere) → `markdownlint` → `prettier` (init template JS/CSS) → `shellcheck` → `actionlint` + `zizmor` (workflow lint/audit) → `gitleaks` (secret scan) → generated `--show-code` compile gate → init template contract gate → `pytest` (90% branch coverage) → `diff-cover` (100% patch coverage vs `origin/main`) → **mutation gate** (diff-scoped: mutates each changed line and reruns the tests that cover it — a surviving mutant fails the gate, so changed lines need assertions that would *fail* if the line broke, not just coverage; suppress a genuinely unassertable line with `# pragma: no mutate`) → a "no new escape hatches" diff gate (`# type: ignore` / `# noqa` / `pragma: no cover` / net-new `Any` / `cast(`) → `uv build` + `twine check --strict`. The `vulture`/`deptry`/`lint-imports`/`xenon`, patch-coverage, and mutation stages catch the failures that `ruff`+`mypy` alone won't — don't claim the gate is green until the script prints `All checks passed.`
+`scripts/check.sh` is the authoritative gate; keep this list in sync with it. It runs, in order: `uv lock --check` → `ruff check` → `ruff format --check` → `mypy` → `pyright` (src strict) → `pyright` (tests) → `vulture` (dead code) → `deptry` (dependency hygiene) → `lint-imports` (import-linter architecture contracts) → max-file-length (500 lines) → `xenon` (cyclomatic complexity, max grade B / project avg A) → `swiftlint` + swift compile (macOS only, skipped elsewhere) → `markdownlint` → `prettier` (init template JS/CSS) → `shellcheck` → `actionlint` + `zizmor` (workflow lint/audit) → `gitleaks` (secret scan) → generated `--show-code` compile gate → init template contract gate → `pytest` (90% branch coverage) → `diff-cover` (100% patch coverage vs `origin/main`) → **mutation gate** (diff-scoped: mutates each changed line and reruns the tests that cover it — a surviving mutant fails the gate, so changed lines need assertions that would *fail* if the line broke, not just coverage; suppress a genuinely unassertable line with `# pragma: no mutate`) → a "no new escape hatches" diff gate (`# type: ignore` / `# noqa` / `pragma: no cover` / net-new `Any` / `cast(`) → **CodeQL gate** (`scripts/codeql_gate.py`: the same security + quality suites the CodeQL workflow uploads to GitHub's code-scanning/quality tabs, run locally over python/actions/javascript so alerts fail before push instead of on the PR; needs the CodeQL bundle on PATH — self-skips otherwise, `codeql.yml` covers CI, and the web session-start hook provisions it) → `uv build` + `twine check --strict`. The `vulture`/`deptry`/`lint-imports`/`xenon`, patch-coverage, and mutation stages catch the failures that `ruff`+`mypy` alone won't — don't claim the gate is green until the script prints `All checks passed.`
 
 **Commits are gated.** On success `check.sh` records a working-tree signature (`scripts/gate_marker.py record` → `.git/aai-gate-pass`), and a PreToolUse hook (`.claude/hooks/require-gate-before-commit.sh`) blocks `git commit` unless that signature still matches — so run the full gate to completion *before* committing (a single-file `pytest` does not satisfy it), and re-run it after any further edit. Iterate with the fast targeted commands above, gate once at the end. For a deliberate work-in-progress commit, prefix `AAI_ALLOW_COMMIT=1 git commit …`.
 
diff --git a/aai_cli/client.py b/aai_cli/client.py
index 48086ef5..81cc4584 100644
--- a/aai_cli/client.py
+++ b/aai_cli/client.py
@@ -24,13 +24,17 @@
 
 
 class _StreamingClientLike(Protocol):
-    def on(self, event: StreamingEvents, handler: _StreamHandler) -> None: ...
+    def on(self, event: StreamingEvents, handler: _StreamHandler) -> None:
+        """Register a handler for a streaming event."""
 
-    def connect(self, params: StreamingParameters) -> None: ...
+    def connect(self, params: StreamingParameters) -> None:
+        """Open the realtime session."""
 
-    def stream(self, data: bytes | Generator[bytes, None, None] | Iterable[bytes]) -> None: ...
+    def stream(self, data: bytes | Generator[bytes, None, None] | Iterable[bytes]) -> None:
+        """Send audio to the session."""
 
-    def disconnect(self, *, terminate: Literal[False, True] = False) -> None: ...
+    def disconnect(self, *, terminate: Literal[False, True] = False) -> None:  # pragma: no mutate
+        """Close the session."""
 
 
 def _make_streaming_client(api_key: str) -> _StreamingClientLike:
diff --git a/aai_cli/commands/doctor.py b/aai_cli/commands/doctor.py
index a2d1c86d..5fe4328b 100644
--- a/aai_cli/commands/doctor.py
+++ b/aai_cli/commands/doctor.py
@@ -2,6 +2,7 @@
 
 import shutil
 import sys
+from abc import abstractmethod
 from collections.abc import Mapping, Sequence
 from typing import NotRequired, Protocol, TypedDict
 
@@ -37,7 +38,9 @@ class DoctorResult(TypedDict):
 
 
 class _SoundDeviceModule(Protocol):
-    def query_devices(self) -> Sequence[Mapping[str, object]]: ...
+    @abstractmethod
+    def query_devices(self) -> Sequence[Mapping[str, object]]:
+        """List the audio devices sounddevice can see."""
 
 
 # Status -> (affordance symbol, render style). "fail" is a blocker; "warn" is
diff --git a/aai_cli/commands/share.py b/aai_cli/commands/share.py
index 5e151c35..488e4a4e 100644
--- a/aai_cli/commands/share.py
+++ b/aai_cli/commands/share.py
@@ -115,6 +115,8 @@ def run_share(*, port: int, no_install: bool, json_mode: bool, quiet: bool) -> N
         output.emit(payload, _render_share, json_mode=json_mode)
         server.wait()
     except KeyboardInterrupt:
+        # Ctrl-C is the expected way to stop a foreground share; the finally
+        # block below tears down the tunnel and server.
         pass
     finally:
         _terminate(proxy)
diff --git a/aai_cli/microphone.py b/aai_cli/microphone.py
index 5b624a3d..0217dbc1 100644
--- a/aai_cli/microphone.py
+++ b/aai_cli/microphone.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import warnings
+from abc import abstractmethod
 from collections.abc import Callable, Iterable, Iterator, Mapping
 from typing import Any, Protocol, cast
 
@@ -18,21 +19,28 @@
 
 
 class _RawInputStream(Protocol):
-    def start(self) -> None: ...
+    def start(self) -> None:
+        """Begin capturing."""
 
-    def read(self, frames: int) -> tuple[bytes, object]: ...
+    @abstractmethod
+    def read(self, frames: int) -> tuple[bytes, object]:
+        """Read up to `frames` frames of PCM plus an overflow flag."""
 
-    def stop(self) -> None: ...
+    def stop(self) -> None:
+        """Stop capturing."""
 
-    def close(self) -> None: ...
+    def close(self) -> None:
+        """Release the device."""
 
 
 class _SoundDeviceModule(Protocol):
     RawInputStream: Callable[..., _RawInputStream]
 
+    @abstractmethod
     def query_devices(
         self, device: int | None = None, kind: str | None = None
-    ) -> Mapping[str, object]: ...
+    ) -> Mapping[str, object]:
+        """Describe an audio device (or the default one for `kind`)."""
 
 
 def audio_missing_error() -> CLIError:
diff --git a/aai_cli/onboard/prompter.py b/aai_cli/onboard/prompter.py
index 24121340..47b0cba1 100644
--- a/aai_cli/onboard/prompter.py
+++ b/aai_cli/onboard/prompter.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from abc import abstractmethod
 from typing import Protocol
 
 import typer
@@ -19,13 +20,25 @@ class Prompter(Protocol):
     # the wizard reads this to skip steps that would otherwise hang a headless run.
     interactive: bool
 
-    def section(self, title: str) -> None: ...
-    def note(self, message: str) -> None: ...
-    def confirm(self, title: str, *, default: bool = True) -> bool: ...  # pragma: no mutate
+    def section(self, title: str) -> None:
+        """Print a step heading."""
+
+    def note(self, message: str) -> None:
+        """Print an informational line."""
+
+    @abstractmethod
+    def confirm(self, title: str, *, default: bool = True) -> bool:  # pragma: no mutate
+        """Ask a yes/no question."""
+
+    @abstractmethod
     def select(
         self, title: str, options: list[tuple[str, str]], *, default: str | None = None
-    ) -> str: ...
-    def text(self, title: str, *, default: str | None = None) -> str: ...
+    ) -> str:
+        """Pick one value from `options` (label, value) pairs."""
+
+    @abstractmethod
+    def text(self, title: str, *, default: str | None = None) -> str:
+        """Ask for a free-form line of text."""
 
 
 class InteractivePrompter:
diff --git a/aai_cli/stdio.py b/aai_cli/stdio.py
index 3d54552d..1ec00737 100644
--- a/aai_cli/stdio.py
+++ b/aai_cli/stdio.py
@@ -15,7 +15,13 @@ def silence_stdout() -> None:
     one-shot entry point and the streaming reader thread.
     """
     with contextlib.suppress(OSError):
-        os.dup2(os.open(os.devnull, os.O_WRONLY), sys.stdout.fileno())
+        devnull_fd = os.open(os.devnull, os.O_WRONLY)
+        try:
+            os.dup2(devnull_fd, sys.stdout.fileno())
+        finally:
+            # dup2 duplicates the descriptor, so the original must be closed
+            # or it leaks one fd per call.
+            os.close(devnull_fd)
 
 
 def stdin_is_piped() -> bool:
diff --git a/aai_cli/streaming/macos.py b/aai_cli/streaming/macos.py
index 8ad183f5..6c9404f2 100644
--- a/aai_cli/streaming/macos.py
+++ b/aai_cli/streaming/macos.py
@@ -25,21 +25,28 @@
 
 class _CaptureProcess(Protocol):
     @property
-    def stdout(self) -> _Pipe | None: ...
+    def stdout(self) -> _Pipe | None:
+        """The helper's PCM output pipe."""
 
     @property
-    def stderr(self) -> _Pipe | None: ...
+    def stderr(self) -> _Pipe | None:
+        """The helper's diagnostic pipe."""
 
     @property
-    def returncode(self) -> int | None: ...
+    def returncode(self) -> int | None:
+        """Exit code once the helper has exited."""
 
-    def poll(self) -> int | None: ...
+    def poll(self) -> int | None:
+        """Non-blocking exit-code check."""
 
-    def terminate(self) -> None: ...
+    def terminate(self) -> None:
+        """Ask the helper to exit."""
 
-    def kill(self) -> None: ...
+    def kill(self) -> None:
+        """Force the helper to exit."""
 
-    def wait(self, timeout: float | None = None) -> int | None: ...
+    def wait(self, timeout: float | None = None) -> int | None:
+        """Block until the helper exits."""
 
 
 def _unsupported_platform() -> CLIError:
diff --git a/aai_cli/tts/audio.py b/aai_cli/tts/audio.py
index 4bd9de2c..e17351d1 100644
--- a/aai_cli/tts/audio.py
+++ b/aai_cli/tts/audio.py
@@ -14,11 +14,20 @@ class _OutputStream(Protocol):
     """The slice of a sounddevice output stream play_pcm drives — named as a
     Protocol so the untyped library boundary is structurally typed, not opaque."""
 
-    def start(self) -> None: ...
-    def write(self, data: bytes, /) -> object: ...  # real write returns a bool we ignore
-    def stop(self) -> None: ...
-    def abort(self) -> None: ...  # immediate stop: discards buffered frames (vs stop's drain)
-    def close(self) -> None: ...
+    def start(self) -> None:
+        """Begin playback."""
+
+    def write(self, data: bytes, /) -> object:
+        """Queue PCM for playback (the real write returns a bool we ignore)."""
+
+    def stop(self) -> None:
+        """Stop after draining buffered frames."""
+
+    def abort(self) -> None:
+        """Immediate stop: discards buffered frames (vs stop's drain)."""
+
+    def close(self) -> None:
+        """Release the stream."""
 
 
 # Write playback in ~4 KiB chunks (≈85 ms of 16-bit mono at 24 kHz) instead of one
diff --git a/aai_cli/tts/session.py b/aai_cli/tts/session.py
index 263b79d4..a3d1c8f6 100644
--- a/aai_cli/tts/session.py
+++ b/aai_cli/tts/session.py
@@ -4,6 +4,7 @@
 import binascii
 import contextlib
 import json
+from abc import abstractmethod
 from collections.abc import Callable
 from dataclasses import dataclass
 from typing import Protocol
@@ -20,9 +21,15 @@ class _WebSocket(Protocol):
     """The slice of a websockets sync connection this module drives — named as a
     Protocol so the untyped library boundary is structurally typed, not opaque."""
 
-    def recv(self, timeout: float | None = None) -> str | bytes: ...
-    def send(self, data: str, /) -> None: ...  # positional-only: matches ws send(message)
-    def close(self) -> None: ...
+    @abstractmethod
+    def recv(self, timeout: float | None = None) -> str | bytes:
+        """Receive the next text or binary frame."""
+
+    def send(self, data: str, /) -> None:
+        """Send a text frame (positional-only: matches ws send(message))."""
+
+    def close(self) -> None:
+        """Close the connection."""
 
 
 # The connect factory: returns a fresh _WebSocket. websockets' real sync client
diff --git a/scripts/check.sh b/scripts/check.sh
index 9ebda92b..690860f8 100755
--- a/scripts/check.sh
+++ b/scripts/check.sh
@@ -251,6 +251,20 @@ else
   echo "   origin/main not found; skipping escape-hatch diff gate (CI provides it)"
 fi
 
+echo "==> codeql (security + quality suites, mirrors codeql.yml minus swift)"
+# Runs the same query suites the CodeQL workflow uploads to GitHub's code-scanning
+# and quality tabs, so an alert fails here instead of surfacing on the PR after
+# push. The CLI ships as a ~1 GB bundle with no PyPI/npm distribution, so this
+# self-skips when absent — codeql.yml is the CI enforcement (the hosted runner's
+# PATH has no codeql, so ci.yml's check job skips this too and the PR isn't
+# double-scanned), and the web session-start hook provisions the bundle. Last of
+# the analysis gates because it's the slowest (~minutes, not diff-scoped).
+if command -v codeql >/dev/null 2>&1; then
+  uv run python scripts/codeql_gate.py
+else
+  echo "   codeql not found; skipping (codeql.yml runs it in CI; install: https://github.com/github/codeql-action/releases)"
+fi
+
 echo "==> build + twine check (PyPI publish readiness)"
 # Build sdist + wheel into ./dist, then validate the metadata and README render
 # the way PyPI requires. --strict fails on any warning (e.g. a missing readme).
diff --git a/scripts/codeql_gate.py b/scripts/codeql_gate.py
new file mode 100644
index 00000000..088c3d01
--- /dev/null
+++ b/scripts/codeql_gate.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+"""Run CodeQL's security + quality suites locally — the same alerts GitHub shows.
+
+The CodeQL workflow (.github/workflows/codeql.yml) uploads two alert sets per
+language: the default security suite (the repo's *code scanning* tab) and the
+code-quality suite (the *quality* tab). Those only surface after a push, so an
+agent or dev session can land a PR that's green on check.sh yet grows alerts on
+GitHub. This gate runs the exact same suites against the working tree and fails
+on any finding.
+
+Scope mirrors codeql.yml minus swift: python, actions, and javascript-typescript
+extract with ``--build-mode=none``; the swift helper needs a real macOS build and
+stays CI-only. Requires the CodeQL *bundle* (CLI + bundled query packs) on PATH —
+check.sh self-skips when it's absent, and the web session-start hook provisions it.
+
+stdlib-only on purpose (nothing here imports the package under test).
+"""
+
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+# Cluster sub-database name -> query-pack prefix. Keys are the directory names
+# `codeql database create --db-cluster` produces for codeql.yml's non-swift
+# languages (the `javascript-typescript` language extracts into `javascript`).
+_LANGUAGES = {
+    "python": "python",
+    "actions": "actions",
+    "javascript": "javascript",
+}
+_SUITES = ("code-scanning", "code-quality")
+
+
+def _run(args: list[str]) -> None:
+    """Run a codeql command, surfacing its output only when it fails."""
+    proc = subprocess.run(args, capture_output=True, text=True, check=False)
+    if proc.returncode != 0:
+        sys.stdout.write(proc.stdout + proc.stderr)
+        raise SystemExit(f"codeql command failed: {' '.join(args)}")
+
+
+def _findings(sarif_path: Path) -> list[str]:
+    sarif = json.loads(sarif_path.read_text(encoding="utf-8"))
+    lines: list[str] = []
+    for run in sarif["runs"]:
+        for result in run.get("results", []):
+            location = result["locations"][0]["physicalLocation"]
+            uri = location["artifactLocation"]["uri"]
+            line = location.get("region", {}).get("startLine", "?")
+            lines.append(f"  {result['ruleId']}\t{uri}:{line}\t{result['message']['text']}")
+    return lines
+
+
+def main() -> int:
+    repo_root = Path(__file__).resolve().parent.parent
+    failures: list[str] = []
+    with tempfile.TemporaryDirectory(prefix="aai-codeql-") as tmp:
+        cluster = Path(tmp) / "dbs"
+        _run(
+            [
+                "codeql",
+                "database",
+                "create",
+                str(cluster),
+                "--db-cluster",
+                "--language=python,actions,javascript-typescript",
+                "--build-mode=none",
+                f"--source-root={repo_root}",
+                "--threads=0",
+            ]
+        )
+        for db_name, pack in sorted(_LANGUAGES.items()):
+            suites = [
+                f"codeql/{pack}-queries:codeql-suites/{pack}-{suite}.qls" for suite in _SUITES
+            ]
+            sarif_path = Path(tmp) / f"{db_name}.sarif"
+            _run(
+                [
+                    "codeql",
+                    "database",
+                    "analyze",
+                    str(cluster / db_name),
+                    *suites,
+                    "--format=sarif-latest",
+                    f"--output={sarif_path}",
+                    "--threads=0",
+                ]
+            )
+            found = _findings(sarif_path)
+            failures.extend(found)
+            status = f"{len(found)} finding(s)" if found else "clean"
+            sys.stdout.write(f"  {db_name}: {status}\n")
+
+    if failures:
+        sys.stdout.write("CodeQL findings (fix them; GitHub will alert on these):\n")
+        sys.stdout.write("\n".join(failures) + "\n")
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/gate_marker.py b/scripts/gate_marker.py
index 2d6dfc7f..52ac22f2 100644
--- a/scripts/gate_marker.py
+++ b/scripts/gate_marker.py
@@ -20,7 +20,8 @@
 
 
 class _Digest(Protocol):
-    def update(self, data: bytes, /) -> None: ...
+    def update(self, data: bytes, /) -> None:
+        """Feed bytes into the hash."""
 
 
 MARKER_NAME = "aai-gate-pass"
diff --git a/scripts/gate_tool_pins.sh b/scripts/gate_tool_pins.sh
index c90df9f2..107180a3 100644
--- a/scripts/gate_tool_pins.sh
+++ b/scripts/gate_tool_pins.sh
@@ -1,8 +1,9 @@
 # shellcheck shell=bash
 # Single source of truth for the gate's non-Python tool pins. Python tools are
-# pinned in pyproject.toml/uv.lock; these four have no PyPI distribution
-# (markdownlint/prettier are npm packages, actionlint/gitleaks are Go binaries),
-# so their versions live here instead. Sourced by both provisioning paths:
+# pinned in pyproject.toml/uv.lock; these have no PyPI distribution
+# (markdownlint/prettier are npm packages, actionlint/gitleaks are Go binaries,
+# codeql is a GitHub release bundle), so their versions live here instead.
+# Sourced by both provisioning paths:
 #   - .github/workflows/ci.yml (the CI runner)
 #   - .claude/hooks/session-start.sh (Claude Code on the web containers)
 # Bump a pin here and both environments pick it up together.
@@ -10,3 +11,6 @@ export MARKDOWNLINT_VERSION="0.45.0"
 export PRETTIER_VERSION="3.8.3"
 export ACTIONLINT_MODULE="github.com/rhysd/actionlint/cmd/actionlint@v1.7.7"
 export GITLEAKS_MODULE="github.com/zricethezav/gitleaks/v8@v8.21.2"
+# The CLI+query-pack bundle check.sh's codeql gate runs (codeql.yml's CI runs use
+# the version pinned to the codeql-action release instead; keep them roughly in step).
+export CODEQL_BUNDLE_VERSION="codeql-bundle-v2.25.6"
diff --git a/tests/test_context.py b/tests/test_context.py
index b792f268..b8cbf9fe 100644
--- a/tests/test_context.py
+++ b/tests/test_context.py
@@ -415,8 +415,6 @@ def body(state, json_mode):
 
 
 def test_run_command_unexpected_exception_keeps_json_error_shape():
-    import json
-
     def body(state, json_mode):
         raise ValueError("kaboom")
 
diff --git a/tests/test_microphone.py b/tests/test_microphone.py
index 9109747c..6e554573 100644
--- a/tests/test_microphone.py
+++ b/tests/test_microphone.py
@@ -4,6 +4,7 @@
 
 import pytest
 
+from aai_cli import microphone
 from aai_cli.errors import CLIError
 from aai_cli.microphone import (
     _FALLBACK_RATE,
@@ -11,6 +12,7 @@
     _default_mic_stream,
     _device_default_rate,
     _SoundDeviceMic,
+    resample_pcm16,
 )
 
 
@@ -193,11 +195,10 @@ def test_resample_pcm16_uses_16bit_mono_params():
     # resample_pcm16 must treat the buffer as 16-bit (2-byte) mono (1-channel) PCM.
     # Compare against audioop driven with those exact params; a mutated width/channel
     # count yields different bytes (or rejects the frame count), killing the mutant.
-    import aai_cli.microphone as m
-
+    # (`microphone.audioop` is the module's own import, so both sides agree.)
     chunk = bytes(range(256))  # 128 little-endian 16-bit mono samples (a ramp)
-    expected, _ = m.audioop.ratecv(chunk, 2, 1, 48000, 24000, None)
-    out, _ = m.resample_pcm16(chunk, None, src_rate=48000, dst_rate=24000)
+    expected, _ = microphone.audioop.ratecv(chunk, 2, 1, 48000, 24000, None)
+    out, _ = resample_pcm16(chunk, None, src_rate=48000, dst_rate=24000)
     assert out == expected
     assert out != chunk  # 48k -> 24k actually changes the data
 
diff --git a/tests/test_stdio.py b/tests/test_stdio.py
index deaf520c..4ce64de7 100644
--- a/tests/test_stdio.py
+++ b/tests/test_stdio.py
@@ -73,9 +73,12 @@ def fake_dup2(fd_src, fd_dst):
 
     monkeypatch.setattr("os.open", fake_open)
     monkeypatch.setattr("os.dup2", fake_dup2)
+    monkeypatch.setattr("os.close", lambda fd: calls.setdefault("closed", fd))
     stdio.silence_stdout()
     assert calls["path"] == __import__("os").devnull
     assert calls["dup2"][0] == 99
+    # The temporary devnull fd must be closed after dup2 — it leaks otherwise.
+    assert calls["closed"] == 99
 
 
 def test_silence_stdout_suppresses_oserror(monkeypatch):
diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py
index cf667653..df84191b 100644
--- a/tests/test_telemetry.py
+++ b/tests/test_telemetry.py
@@ -342,9 +342,15 @@ def test_track_success(events, monkeypatch):
     assert event["duration_ms"] == 2000
 
 
+def _raise(exc: BaseException) -> None:
+    # Raising via a call (not a literal `raise` in the `with` body) keeps the
+    # assertions below visibly reachable to static analysis (CodeQL).
+    raise exc
+
+
 def test_track_cli_error_keeps_error_type_and_reraises(events):
     with pytest.raises(UsageError), telemetry.track("aai transcribe"):
-        raise UsageError("bad flag")
+        _raise(UsageError("bad flag"))
     (event,) = events
     assert event["outcome"] == "usage_error"
     assert event["exit_code"] == 2
@@ -357,7 +363,7 @@ def test_track_cli_error_keeps_error_type_and_reraises(events):
 )
 def test_track_typer_exit_maps_code(events, code, outcome):
     with pytest.raises(typer.Exit), telemetry.track("aai login"):
-        raise typer.Exit(code=code)
+        _raise(typer.Exit(code=code))
     (event,) = events
     assert event["outcome"] == outcome
     assert event["exit_code"] == code
@@ -367,7 +373,7 @@ def test_track_typer_exit_maps_code(events, code, outcome):
 
 def test_track_unexpected_exception_is_internal_error(events):
     with pytest.raises(RuntimeError), telemetry.track("aai stream"):
-        raise RuntimeError("boom")
+        _raise(RuntimeError("boom"))
     (event,) = events
     assert event["outcome"] == "internal_error"
     assert event["exit_code"] == 1