diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1c3b0d22..90bc1d74 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -132,19 +132,25 @@ jobs:
       # (require_ffmpeg) before doing their work, so without it those tests fail at the
       # probe rather than exercising the mocked run. PortAudio needs no install — the
       # sounddevice wheel bundles it on Windows. choco ships on the runner but its download
-      # occasionally flakes (one matrix cell got ffmpeg, the other didn't), so retry and
-      # verify ffmpeg is callable here — a real miss fails this step instead of surfacing as
-      # confusing "ffmpeg not on PATH" test failures. The shim lands in choco's bin dir,
-      # already on the runner PATH, so later steps pick it up.
+      # from community.chocolatey.org doesn't just flake — it sometimes *hangs* for the whole
+      # job timeout, and a plain retry loop never gets to retry because the stuck attempt
+      # never returns. So bound each attempt with a hard timeout (Start-Job + Wait-Job): a
+      # hung download is killed and the next attempt retries, instead of wedging the cell
+      # until it's cancelled. The shim lands in choco's bin dir (machine-wide, already on the
+      # runner PATH), so the parent shell and later steps pick it up.
       - name: System deps (ffmpeg)
         shell: pwsh
         run: |
           $ErrorActionPreference = "Stop"
           $env:PATH = "C:\ProgramData\chocolatey\bin;$env:PATH"
           for ($i = 1; $i -le 3; $i++) {
-            choco install ffmpeg --no-progress -y
+            $job = Start-Job { choco install ffmpeg --no-progress -y }
+            if (Wait-Job $job -Timeout 240) { Receive-Job $job } else {
+              Stop-Job $job
+              Write-Host "choco install ffmpeg hung (attempt $i); killing and retrying…"
+            }
+            Remove-Job $job -Force
             if (Get-Command ffmpeg -ErrorAction SilentlyContinue) { break }
-            Write-Host "ffmpeg not yet on PATH (attempt $i); retrying…"
             Start-Sleep -Seconds 5
           }
           ffmpeg -version
diff --git a/README.md b/README.md
index c913546d..cc2979d0 100644
--- a/README.md
+++ b/README.md
@@ -46,7 +46,7 @@ That's it. Run `assembly onboard` for a guided tour, or see [Installation](#-ins
 | :--- | :--- |
 | `assembly transcribe` | Transcribe files, URLs, YouTube/podcast pages, podcast RSS feeds, directories, globs, or bucket storage (`s3://`, `gs://`, `az://`) — with speaker labels, PII redaction, summarization, SRT/VTT captions, and resumable batch runs |
 | `assembly stream` | Real-time transcription from your microphone, a file, or a URL — on macOS it can capture system audio too |
-| `assembly dictate` | Push-to-talk dictation: recording starts immediately, press Enter for instant text (Sync STT API, up to 120 s per utterance) |
+| `assembly dictate` | Signal-driven dictation: records immediately, send SIGTERM for instant text — scriptable from hotkey tools like Hammerspoon (Sync STT API, up to 120 s per utterance) |
 | `assembly agent` | Full-duplex spoken conversation with a voice agent, right in your terminal |
 | `assembly agent-cascade` | Same live conversation, but wired client-side from Streaming STT + the LLM Gateway + streaming TTS, like the `agent-cascade` starter (sandbox-only) |
 | `assembly speak` | Synthesize text to speech over the streaming-TTS WebSocket (sandbox-only) |
diff --git a/aai_cli/AGENTS.md b/aai_cli/AGENTS.md
index 2e9c58e0..6ee011b6 100644
--- a/aai_cli/AGENTS.md
+++ b/aai_cli/AGENTS.md
@@ -32,9 +32,9 @@ between layers is enforced — higher may import lower, never the reverse:
   `help_text`, `typer_patches`, `update_check`.
 - **`core/`** — the Rich-free library layer: `client`, `config`,
   `config_builder`, `keyring_store`, `environments`, `env`, `errors`, `llm`,
-  `telemetry`, `debuglog`, `remotefs`, `sync_stt`, `hotkey`, `ws`, `youtube`,
+  `telemetry`, `debuglog`, `remotefs`, `sync_stt`, `signals`, `ws`, `youtube`,
   `wer`, `argscan`, `jsonshape`, `timeparse`, `microphone`, `procs`, `stdio`,
-  `choices`, `locking`, `config_lock`. Contract 4 also forbids `rich` here, so
+  `choices`. Contract 4 also forbids `rich` here, so
   "no Rich below the UI layer" is structural.
 
 Three things sit *beside* the stack, intentionally unlisted in the layers
@@ -139,7 +139,7 @@ heavily-reworked commands with long bodies; small commands keep the inline
 ### Cross-cutting state (resolution order matters)
 
 - **`app/context.py`** — `AppState` (profile, env) is attached to the Typer context in the root `@app.callback()`. `run_command` is the standard command wrapper.
-- **`core/config.py`** — profiles persisted in `config.toml` (via `platformdirs`); the **API key lives only in the OS keyring**, never in a dotfile. The keyring access itself is factored into **`core/keyring_store.py`** (the single importer of `keyring`, holding `KEYRING_SERVICE = "assemblyai-cli"` + `set_secret`/`get_secret`/`restore_secret`/`delete_secret`/`usable`), so the "secrets never touch the dotfile" split is structural; `config` reads/writes secrets through it and only `config.keyring_usable` re-surfaces the probe on the auth facade. Key resolution order: `--api-key` flag (validation paths only) → `ASSEMBLYAI_API_KEY` env → keyring. **Run commands deliberately expose no `--api-key` flag** so keys can't leak into `ps`/shell history. Every `config.toml` write is a read-modify-write (`_load` → mutate → `_dump`): `_dump` is a temp-file + atomic `os.replace` (a reader never sees a torn file), and the whole RMW runs under a cross-process `filelock` (`config_lock.update`/`.locked`, built on `core/locking.py`) so two concurrent `assembly` processes can't lose each other's updates. Readers stay lock-free. The lock helpers live in `config_lock.py` (not `config.py`) only to keep the latter under the file-length gate; reuse one cached `FileLock` per path so nested writers (`persist_login`) stay reentrant.
+- **`core/config.py`** — profiles persisted in `config.toml` (via `platformdirs`); the **API key lives only in the OS keyring**, never in a dotfile. The keyring access itself is factored into **`core/keyring_store.py`** (the single importer of `keyring`, holding `KEYRING_SERVICE = "assemblyai-cli"` + `set_secret`/`get_secret`/`restore_secret`/`delete_secret`/`usable`), so the "secrets never touch the dotfile" split is structural; `config` reads/writes secrets through it and only `config.keyring_usable` re-surfaces the probe on the auth facade. Key resolution order: `--api-key` flag (validation paths only) → `ASSEMBLYAI_API_KEY` env → keyring. **Run commands deliberately expose no `--api-key` flag** so keys can't leak into `ps`/shell history. Every `config.toml` write is a read-modify-write (`_load` → mutate → `_dump`) via the `config._update` context manager: `_dump` is a temp-file + atomic `os.replace`, so a reader never sees a torn file. Writers and readers are otherwise unsynchronized — last write wins (there is **no** cross-process lock; an earlier `filelock`-based serialization was removed because it was a recurring Windows CI flake and the lost-update race it closed isn't worth the cost for a single-user CLI). On Windows the atomic replace has no replace-over-open guarantee, so both the lock-free read and the `os.replace` ride out the transient `PermissionError` through `config._retry_on_sharing_violation` (a no-op on POSIX).
 - **`core/environments.py`** — a frozen `Environment` (api_base, streaming_host, llm_gateway_base, ams_base, stytch_*). `DEFAULT_ENV` is **`production`**; use `--sandbox` (or `--env sandbox000` / `AAI_ENV`) to target the sandbox. The active environment is a process-global set once at startup; precedence: `--env` → `AAI_ENV` → profile's stored env → default. A credential is only valid against the environment that minted it.
 - **`core/client.py`** — thin wrappers over the `assemblyai` SDK (`transcribe`, `list_transcripts`, `stream_audio`, etc.). It normalizes SDK exceptions: auth failures become a single clean `auth_failure()` `CLIError`; everything else becomes `APIError`. New SDK calls should follow this try/except shape.
 - **`core/errors.py`** — the `CLIError` hierarchy (each with `error_type` + `exit_code`). `ui/output.py` emits errors to **stderr**; stdout stays clean for pipelines. `--json` switches to machine-readable output; it is never auto-enabled — `output.resolve_json()` deliberately keeps human text the default even when piped or agent-run.
@@ -149,7 +149,7 @@ heavily-reworked commands with long bodies; small commands keep the inline
 ### Feature subsystems
 
 - **`streaming/`** + `client.stream_audio` — v3 realtime API. Event callbacks run on the SDK reader thread and guard against `BrokenPipeError` (`stdio.silence_stdout()`) so a closed pipe never dumps a thread traceback.
-- **`core/sync_stt.py`** + **`core/hotkey.py`** + `commands/dictate/` — `assembly dictate`: push-to-talk dictation over the **Sync STT API** (`Environment.sync_base`, one POST `/transcribe` per utterance with the required `X-AAI-Model: u3-sync-pro` header; 80 ms–120 s of PCM/WAV). `hotkey.TerminalKeys` scopes stdin into cbreak (Ctrl-C still signals) and reads single keypresses; `dictate_exec._record` polls it with a zero timeout between ~100 ms mic chunks. All three boundaries (keys, mic, HTTP) are injectable, so the suite never needs a real terminal — `tests/test_hotkey.py` drives a pty pair for the termios behavior.
+- **`core/sync_stt.py`** + **`core/signals.py`** + `commands/dictate/` — `assembly dictate`: headless dictation over the **Sync STT API** (`Environment.sync_base`, one POST `/transcribe` per utterance with the required `X-AAI-Model: u3-sync-pro` header; 80 ms–120 s of PCM/WAV). It needs no terminal: recording starts immediately and `dictate_exec._record` polls `signals.stop_on_terminate` between ~100 ms mic chunks for a SIGTERM, which finishes the utterance (clean exit 0) — so a hotkey tool like Hammerspoon can launch it as a background task and `kill -TERM`/`task:terminate()` to transcribe. SIGINT (Ctrl-C) still cancels (exit 130). Both boundaries (the stop latch, mic, HTTP) are injectable, so the suite never needs a real signal or microphone (`tests/test_dictate_exec.py` scripts the SIGTERM latch). Contrast `signals.terminate_as_interrupt` (used by `stream`/`agent`/`speak`), which routes SIGTERM into the *cancel* path instead.
 - **`agent/`** — full-duplex voice agent (mic in, TTS out via `voices.py`).
 - **`agent_cascade/`** + `commands/agent_cascade/` — `assembly agent-cascade`: the same live terminal conversation as `assembly agent`, but **client-orchestrated** — `engine.run_cascade` wires Streaming STT → the LLM Gateway → streaming TTS itself instead of talking to the Voice Agent endpoint, mirroring what the `agent-cascade` `assembly init` template does server-side. **Sandbox-only** (streaming TTS has no prod host; guarded via `tts.session.require_available`). Reuses the agent slice's `DuplexAudio`/`AgentRenderer` and `core.client.stream_audio`/`core.llm.complete`/`tts.session.synthesize`; the three network legs are injected through `engine.CascadeDeps` (the `tts/session.py` seam) so the cascade — greeting, per-sentence TTS, barge-in, history window — is unit-tested against fakes with no sockets/mic/speaker.
 - **`tts/`** + `commands/speak.py` — `assembly speak` synthesizes text to speech over the sandbox streaming-TTS WebSocket (`streaming-tts.sandbox000.…`). **Sandbox-only:** `session.is_available()` is false in production (empty `Environment.streaming_tts_host`), so the command exits 2 with a `--sandbox` hint. `session.synthesize` drives a Begin→Generate→Flush→Audio→Terminate protocol with an injectable `connect` for hermetic tests (mirrors `agent/session.py`); `audio.py` plays the PCM (default) or writes a WAV (`--out`).
diff --git a/aai_cli/commands/dictate/__init__.py b/aai_cli/commands/dictate/__init__.py
index d99e8a3d..75bb119c 100644
--- a/aai_cli/commands/dictate/__init__.py
+++ b/aai_cli/commands/dictate/__init__.py
@@ -22,7 +22,11 @@
     rich_help_panel=help_panels.TRANSCRIPTION,
     epilog=examples_epilog(
         [
-            ("Dictate one utterance: recording starts, Enter transcribes it", "assembly dictate"),
+            ("Record until SIGTERM, then print the transcript", "assembly dictate"),
+            (
+                "Stop the recording and transcribe (e.g. from a hotkey tool)",
+                "kill -TERM $(pgrep -f 'assembly dictate')",
+            ),
             (
                 "Pipe the utterance into another command",
                 'assembly dictate | assembly llm "write a conventional commit"',
@@ -75,13 +79,15 @@ def dictate(
         help="Output mode: text (the bare transcript per utterance, pipe-friendly) or json",
     ),
 ) -> None:
-    """Push-to-talk dictation: record the mic, get the transcript back
+    """Signal-driven dictation: record the mic, get the transcript back
 
-    Recording starts immediately; press Enter (or Space) to stop and the
-    utterance is sent to the AssemblyAI Sync API — the transcript prints right
-    away (no polling) and dictate exits, so it flows straight to the next
-    command in a pipe. The recording can be up to 120 seconds long. Press
-    Ctrl-C to cancel without transcribing.
+    Recording starts immediately and runs headless — no terminal needed — so a
+    hotkey tool like Hammerspoon can launch it as a background task and send
+    SIGTERM (kill -TERM, task:terminate()) to stop. On SIGTERM the utterance is
+    sent to the AssemblyAI Sync API, the transcript prints right away (no
+    polling), and dictate exits, so it flows straight to the next command in a
+    pipe. The recording can be up to 120 seconds long. Ctrl-C (SIGINT) cancels
+    without transcribing.
     """
     opts = dictate_exec.DictateOptions(
         language=language,
diff --git a/aai_cli/commands/dictate/_exec.py b/aai_cli/commands/dictate/_exec.py
index fba52d81..1df0f1e0 100644
--- a/aai_cli/commands/dictate/_exec.py
+++ b/aai_cli/commands/dictate/_exec.py
@@ -1,16 +1,20 @@
 """Run logic for `assembly dictate`: the options/run split (see AGENTS.md).
 
-Push-to-talk dictation over the Sync STT API: recording starts immediately,
-runs until a hotkey is pressed (or the duration cap), then the utterance is
-POSTed to the Sync API, the transcript is printed, and dictate exits. The
-command module (aai_cli/commands/dictate/__init__.py) only parses argv into a
-``DictateOptions``; tests drive the session by constructing options directly and
-injecting the key/mic/HTTP boundaries, with no CliRunner argv round-trip and no
-real terminal.
+Headless dictation over the Sync STT API: recording starts immediately and runs
+until SIGTERM is delivered (or the duration cap), then the utterance is POSTed to
+the Sync API, the transcript is printed, and dictate exits 0. There is no terminal
+interaction — a controller like Hammerspoon launches `assembly dictate` as a
+background task and sends SIGTERM (``task:terminate()`` / ``kill -TERM``) to mean
+"I'm done dictating", so the transcript flows straight to the next command in a
+pipe. SIGINT (Ctrl-C) cancels without transcribing (exit 130). The command module
+(aai_cli/commands/dictate/__init__.py) only parses argv into a ``DictateOptions``;
+tests drive the session by constructing options directly and injecting the
+stop-signal/mic/HTTP boundaries, with no real signals, microphone, or network.
 """
 
 from __future__ import annotations
 
+from collections.abc import Callable
 from dataclasses import dataclass
 
 import typer
@@ -18,8 +22,8 @@
 from aai_cli.app.context import AppState
 from aai_cli.core import choices, errors, sync_stt
 from aai_cli.core.config_builder import split_csv
-from aai_cli.core.hotkey import CTRL_C, CTRL_D, ESC, TerminalKeys
 from aai_cli.core.microphone import MicrophoneSource
+from aai_cli.core.signals import stop_on_terminate
 from aai_cli.streaming.validate import resolve_output_modes
 from aai_cli.ui import output
 
@@ -28,10 +32,6 @@
 TARGET_RATE = 16000
 _BYTES_PER_SECOND = TARGET_RATE * 2  # PCM16 mono
 
-# Enter or Space stops the (auto-started) recording; q / Esc / Ctrl-D also stop
-# it (Ctrl-C cancels — cbreak mode keeps SIGINT delivery).
-STOP_KEYS = frozenset({"\r", "\n", " ", "q", "Q", ESC, CTRL_C, CTRL_D})
-
 
 @dataclass(frozen=True)
 class DictateOptions:
@@ -52,7 +52,7 @@ class DictateOptions:
 
 
 def _note(message: str, *, json_mode: bool, quiet: bool) -> None:
-    """A muted stderr hint guiding the interactive session; silent under --json
+    """A muted stderr hint naming how to finish the recording; silent under --json
     (stderr must stay machine-readable) and --quiet."""
     if json_mode or quiet:
         return
@@ -68,11 +68,14 @@ def _languages(language: str | None) -> str | list[str] | None:
     return codes[0] if len(codes) == 1 else codes
 
 
-def _record(keys: TerminalKeys, mic: MicrophoneSource, *, max_seconds: float) -> bytes:
-    """Capture PCM until a hotkey is pressed again or the duration cap is hit.
+def _record(
+    stop_requested: Callable[[], bool], mic: MicrophoneSource, *, max_seconds: float
+) -> bytes:
+    """Capture PCM until SIGTERM is delivered (``stop_requested`` flips True) or the
+    duration cap is hit.
 
-    The key poll runs between ~100 ms mic chunks with a zero timeout, so the mic
-    read loop is never blocked waiting on the keyboard.
+    The stop poll runs between ~100 ms mic chunks, so a SIGTERM is honored within one
+    chunk without blocking the mic read loop.
     """
     pcm = bytearray()
     frames = iter(mic)
@@ -81,8 +84,7 @@ def _record(keys: TerminalKeys, mic: MicrophoneSource, *, max_seconds: float) ->
             pcm += chunk
             if len(pcm) >= int(max_seconds * _BYTES_PER_SECOND):
                 break
-            # None (no key pending) is simply not in the set.
-            if keys.read(0) in STOP_KEYS:
+            if stop_requested():
                 break
     finally:
         # MicrophoneSource yields from a generator whose cleanup releases the
@@ -122,8 +124,8 @@ def _transcribe_utterance(
 ) -> None:
     """Send one recorded utterance to the Sync API and print the transcript.
 
-    A recording below the API's 80 ms floor (a double-tapped hotkey) is skipped
-    with a warning rather than bounced off the server as a 400.
+    A recording below the API's 80 ms floor (an instant SIGTERM) is skipped with a
+    warning rather than bounced off the server as a 400.
     """
     if len(pcm) < sync_stt.MIN_AUDIO_MS * _BYTES_PER_SECOND // 1000:
         output.emit_warning(
@@ -144,7 +146,7 @@ def _transcribe_utterance(
 
 
 def _capture_and_transcribe(
-    keys: TerminalKeys,
+    stop_requested: Callable[[], bool],
     api_key: str,
     opts: DictateOptions,
     state: AppState,
@@ -156,10 +158,12 @@ def _capture_and_transcribe(
         target_rate=TARGET_RATE,
         device=opts.device,
         on_open=lambda: _note(
-            "● Recording — press Enter to stop.", json_mode=json_mode, quiet=state.quiet
+            "● Recording — send SIGTERM to transcribe (Ctrl-C cancels).",
+            json_mode=json_mode,
+            quiet=state.quiet,
         ),
     )
-    pcm = _record(keys, mic, max_seconds=opts.max_seconds)
+    pcm = _record(stop_requested, mic, max_seconds=opts.max_seconds)
     _transcribe_utterance(api_key, pcm, opts, state, json_mode=json_mode)
 
 
@@ -170,34 +174,30 @@ def run_dictate(opts: DictateOptions, state: AppState, *, json_mode: bool) -> No
     # dictate has no live panel, so the text_mode half is unused — plain
     # transcript text is already the non-JSON default in `_emit`.
     _, json_mode = resolve_output_modes(opts.output_field, json_mode=json_mode)
+    # Resolve credentials before recording: don't capture audio we can't transcribe.
+    api_key = state.resolve_api_key()
+    if opts.prompt and opts.language:
+        # The server ignores language_code whenever a custom prompt is set;
+        # never drop a requested flag silently (mirrors the speak warnings).
+        output.emit_warning(
+            "--language is ignored when --prompt is set; state the language inside the prompt.",
+            json_mode=json_mode,
+        )
+    if opts.once and not state.quiet:
+        # Deprecation trap, not removal: --once still parses so old scripts don't
+        # break, but recording one utterance and exiting is now the default, so the
+        # flag does nothing — say so once (mirrors `login`).
+        output.emit_warning(
+            "--once is now the default and can be omitted.",
+            json_mode=json_mode,
+        )
     try:
-        # Entering TerminalKeys validates the terminal (a usage precondition)
-        # before credentials, so a piped stdin reads as "needs a terminal" — not
-        # as a login prompt.
-        with TerminalKeys() as keys:
-            api_key = state.resolve_api_key()
-            if opts.prompt and opts.language:
-                # The server ignores language_code whenever a custom prompt is set;
-                # never drop a requested flag silently (mirrors the speak warnings).
-                output.emit_warning(
-                    "--language is ignored when --prompt is set; "
-                    "state the language inside the prompt.",
-                    json_mode=json_mode,
-                )
-            if opts.once and not state.quiet:
-                # Deprecation trap, not removal: --once still parses so old scripts
-                # don't break, but recording one utterance and exiting is now the
-                # default, so the flag does nothing — say so once (mirrors `login`).
-                output.emit_warning(
-                    "--once is now the default and can be omitted.",
-                    json_mode=json_mode,
-                )
-            # Recording auto-starts and exits after one utterance: a single
-            # keystroke stops the capture, which also closes a piped stdout so
-            # `assembly dictate | assembly llm …` unblocks the downstream command.
-            _capture_and_transcribe(keys, api_key, opts, state, json_mode=json_mode)
+        # Recording auto-starts and exits after one utterance: SIGTERM stops the
+        # capture, which also closes a piped stdout so `assembly dictate | assembly
+        # llm …` unblocks the downstream command.
+        with stop_on_terminate() as stop_requested:
+            _capture_and_transcribe(stop_requested, api_key, opts, state, json_mode=json_mode)
     except KeyboardInterrupt:
-        # Ctrl-C cancels dictation, so it exits 130 (cancel) — distinct from `q`, which
-        # ends the session normally (exit 0). The with-block above already restored the
-        # terminal on the way out.
+        # Ctrl-C / SIGINT cancels dictation, so it exits 130 (cancel) — distinct from
+        # SIGTERM, which finishes the utterance normally (exit 0).
         raise typer.Exit(code=errors.CANCELLED_EXIT_CODE) from None
diff --git a/aai_cli/core/config.py b/aai_cli/core/config.py
index 11324f45..f8933df8 100644
--- a/aai_cli/core/config.py
+++ b/aai_cli/core/config.py
@@ -1,18 +1,21 @@
 from __future__ import annotations
 
 import contextlib
+import io
 import os
 import re
 import tempfile
+import time
 import tomllib
 import uuid
+from collections.abc import Callable, Generator
 from pathlib import Path
 
 import platformdirs
 import tomli_w
 from pydantic import BaseModel, ConfigDict, Field, ValidationError
 
-from aai_cli.core import config_lock, debuglog, env, keyring_store
+from aai_cli.core import debuglog, env, keyring_store
 from aai_cli.core.errors import CLIError, NotAuthenticated
 
 ENV_API_KEY = "ASSEMBLYAI_API_KEY"
@@ -115,6 +118,26 @@ def _validation_summary(exc: ValidationError) -> str:
 # snapshots one for rollback), so hand out deep copies, never the cached object.
 _load_cache: dict[Path, tuple[int, int, Config]] = {}
 
+# Windows has no atomic replace-over-open like POSIX: while _dump swaps the temp
+# file in (os.replace), a racing open on the same path transiently fails with
+# PermissionError. Since readers are lock-free, both a lock-free reader's open and
+# the writer's replace can lose that race, so each retries a few short backoffs to
+# ride out the (sub-millisecond) rename window. POSIX replaces atomically and never
+# raises here, so this only ever loops on Windows.
+_SHARING_RETRIES = 5  # pragma: no mutate -- a ±1 change in the retry budget is equivalent
+_SHARING_BACKOFF = 0.02  # pragma: no mutate -- a timing constant; any small value works
+
+
+def _retry_on_sharing_violation[T](op: Callable[[], T]) -> T:
+    """Run a file op, retrying the transient PermissionError Windows raises when an
+    open and an os.replace race on the same path (see _SHARING_RETRIES)."""
+    for _ in range(_SHARING_RETRIES - 1):
+        try:
+            return op()
+        except PermissionError:
+            time.sleep(_SHARING_BACKOFF)
+    return op()  # the last attempt's error (a genuine permission problem) propagates
+
 
 def _load() -> Config:
     path = _config_file()
@@ -125,15 +148,15 @@ def _load() -> Config:
     cached = _load_cache.get(path)
     if cached is not None and cached[0] == stat.st_mtime_ns and cached[1] == stat.st_size:
         return cached[2].model_copy(deep=True)
-    with path.open("rb") as fh:
-        try:
-            data = tomllib.load(fh)
-        except tomllib.TOMLDecodeError as exc:
-            raise CLIError(
-                f"Config file at {path} is not valid TOML ({exc}). Fix or delete it.",
-                error_type="invalid_config",
-                exit_code=2,
-            ) from exc
+    raw = _retry_on_sharing_violation(path.read_bytes)
+    try:
+        data = tomllib.load(io.BytesIO(raw))
+    except tomllib.TOMLDecodeError as exc:
+        raise CLIError(
+            f"Config file at {path} is not valid TOML ({exc}). Fix or delete it.",
+            error_type="invalid_config",
+            exit_code=2,
+        ) from exc
     try:
         cfg = Config.model_validate(data)
     except ValidationError as exc:
@@ -159,7 +182,7 @@ def _dump(cfg: Config) -> None:
     try:
         with os.fdopen(fd, "wb") as fh:
             tomli_w.dump(cfg.model_dump(exclude_none=True), fh)
-        tmp.replace(path)
+        _retry_on_sharing_violation(lambda: tmp.replace(path))
         # The mtime/size key usually invalidates on its own, but drop the entry
         # explicitly so a same-size rewrite on a coarse-mtime filesystem can't
         # serve the pre-write parse.
@@ -170,6 +193,17 @@ def _dump(cfg: Config) -> None:
         raise
 
 
+@contextlib.contextmanager
+def _update() -> Generator[Config]:
+    """Run a config.toml read-modify-write: ``_load`` -> mutate the yielded config ->
+    ``_dump``. The dump runs on clean exit; an exception in the block propagates and
+    skips it. The atomic os.replace in ``_dump`` keeps a reader from ever seeing a torn
+    file (writers and readers are otherwise unsynchronized: last write wins)."""
+    cfg = _load()
+    yield cfg
+    _dump(cfg)
+
+
 def get_active_profile() -> str:
     return _load().active_profile or DEFAULT_PROFILE
 
@@ -187,7 +221,7 @@ def set_active_profile(name: str) -> None:
     with no hint why, so the typo is rejected here with the known names listed.
     """
     validate_profile(name)
-    with config_lock.update(_load, _dump) as cfg:
+    with _update() as cfg:
         if name not in cfg.profiles:
             known = ", ".join(sorted(cfg.profiles)) or "none yet"
             raise CLIError(
@@ -202,7 +236,7 @@ def set_active_profile(name: str) -> None:
 def set_api_key(profile: str, api_key: str) -> None:
     validate_profile(profile)
     keyring_store.set_secret(profile, api_key)
-    with config_lock.update(_load, _dump) as cfg:
+    with _update() as cfg:
         cfg.profiles.setdefault(profile, Profile())
         if cfg.active_profile is None:
             cfg.active_profile = profile
@@ -232,7 +266,7 @@ def get_profile_env(profile: str) -> str | None:
 def set_profile_env(profile: str, env: str) -> None:
     """Bind a backend environment to a profile so its key and hosts stay matched."""
     validate_profile(profile)
-    with config_lock.update(_load, _dump) as cfg:
+    with _update() as cfg:
         cfg.profiles.setdefault(profile, Profile()).env = env
 
 
@@ -245,7 +279,7 @@ def get_profile_email(profile: str) -> str | None:
 def set_profile_email(profile: str, email: str) -> None:
     """Persist the login email for a profile (gates internal-environment access)."""
     validate_profile(profile)
-    with config_lock.update(_load, _dump) as cfg:
+    with _update() as cfg:
         cfg.profiles.setdefault(profile, Profile()).email = email
 
 
@@ -271,7 +305,7 @@ def set_session(profile: str, *, session_jwt: str, session_token: str, account_i
         _session_username(profile),
         StoredSession(jwt=session_jwt, token=session_token).model_dump_json(),
     )
-    with config_lock.update(_load, _dump) as cfg:
+    with _update() as cfg:
         cfg.profiles.setdefault(profile, Profile()).account_id = account_id
 
 
@@ -295,12 +329,11 @@ def get_account_id(profile: str) -> int | None:
 
 def clear_session(profile: str) -> None:
     keyring_store.delete_secret(_session_username(profile))
-    with config_lock.locked():
-        cfg = _load()
-        prof = cfg.profiles.get(profile)
-        if prof and prof.account_id is not None:
-            prof.account_id = None
-            _dump(cfg)
+    cfg = _load()
+    prof = cfg.profiles.get(profile)
+    if prof and prof.account_id is not None:
+        prof.account_id = None
+        _dump(cfg)
 
 
 def persist_login(
@@ -323,32 +356,29 @@ def persist_login(
     restored best-effort.
     """
     validate_profile(profile)
-    # Hold the write lock across the whole snapshot -> writes -> rollback so a concurrent
-    # writer can't slip a change between the snapshot and a rollback dump. The set_*
-    # helpers re-take the same (reentrant) lock, so the nesting is safe.
-    with config_lock.locked():
-        prior_api_key = keyring_store.get_secret(profile)
-        prior_session = keyring_store.get_secret(_session_username(profile))
-        prior_cfg = _load()
-        done = False
-        try:
-            set_api_key(profile, api_key)
-            set_profile_env(profile, env)
-            set_session(
-                profile,
-                session_jwt=session_jwt,
-                session_token=session_token,
-                account_id=account_id,
-            )
-            # Within the same atomic rollback so the sandbox gate can't read stale identity.
-            if email is not None:
-                set_profile_email(profile, email)
-            done = True
-        finally:
-            if not done:
-                keyring_store.restore_secret(profile, prior_api_key)
-                keyring_store.restore_secret(_session_username(profile), prior_session)
-                _dump(prior_cfg)
+    # Snapshot the prior state so a mid-sequence failure rolls back cleanly to it.
+    prior_api_key = keyring_store.get_secret(profile)
+    prior_session = keyring_store.get_secret(_session_username(profile))
+    prior_cfg = _load()
+    done = False
+    try:
+        set_api_key(profile, api_key)
+        set_profile_env(profile, env)
+        set_session(
+            profile,
+            session_jwt=session_jwt,
+            session_token=session_token,
+            account_id=account_id,
+        )
+        # Within the same atomic rollback so the sandbox gate can't read stale identity.
+        if email is not None:
+            set_profile_email(profile, email)
+        done = True
+    finally:
+        if not done:
+            keyring_store.restore_secret(profile, prior_api_key)
+            keyring_store.restore_secret(_session_username(profile), prior_session)
+            _dump(prior_cfg)
 
 
 def has_device_id() -> bool:
@@ -362,12 +392,11 @@ def get_device_id() -> str:
     """A stable anonymous install id for telemetry: a random UUID minted locally on
     first use and persisted in config.toml. Carries nothing derivable from the
     machine or account."""
-    with config_lock.locked():
-        cfg = _load()
-        if cfg.device_id is None:
-            cfg.device_id = str(uuid.uuid4())
-            _dump(cfg)
-        return cfg.device_id
+    cfg = _load()
+    if cfg.device_id is None:
+        cfg.device_id = str(uuid.uuid4())
+        _dump(cfg)
+    return cfg.device_id
 
 
 def get_telemetry_enabled() -> bool | None:
@@ -377,7 +406,7 @@ def get_telemetry_enabled() -> bool | None:
 
 
 def set_telemetry_enabled(*, enabled: bool) -> None:
-    with config_lock.update(_load, _dump) as cfg:
+    with _update() as cfg:
         cfg.telemetry_enabled = enabled
 
 
@@ -390,7 +419,7 @@ def get_update_cache() -> tuple[float | None, str | None]:
 def set_update_cache(*, last_check: float, latest_version: str | None) -> None:
     """Persist the update-notifier cache. ``latest_version`` is None when the last
     fetch failed — the timestamp is still recorded so we don't re-spawn every run."""
-    with config_lock.update(_load, _dump) as cfg:
+    with _update() as cfg:
         cfg.update_last_check = last_check
         cfg.update_latest_version = latest_version
 
diff --git a/aai_cli/core/config_lock.py b/aai_cli/core/config_lock.py
deleted file mode 100644
index 70d1912f..00000000
--- a/aai_cli/core/config_lock.py
+++ /dev/null
@@ -1,51 +0,0 @@
-"""The cross-process write lock for config.toml's read-modify-write.
-
-config.toml's mutating helpers do ``_load -> mutate -> _dump``. The atomic ``os.replace``
-in ``_dump`` keeps a *reader* from ever seeing a torn file, but two writers racing would
-still lose an update — both read the same config, and the second dump clobbers the first's
-change. These helpers serialize the whole read-modify-write across processes via a sibling
-lock file; readers stay lock-free (an older-but-valid parse is fine).
-
-Kept out of ``config.py`` only to keep that module under the file-length gate; it reaches
-back into ``config`` (its ``_load``/``_dump``/``config_dir``) lazily, at call time.
-"""
-
-from __future__ import annotations
-
-import contextlib
-from collections.abc import Callable, Generator
-from pathlib import Path
-
-import filelock
-
-from aai_cli.core import config, locking
-
-
-def lock_path() -> Path:
-    return config.config_dir() / "config.toml.lock"
-
-
-def write_lock() -> filelock.FileLock:
-    """The shared cross-process write lock guarding config.toml."""
-    return locking.file_lock(lock_path())
-
-
-@contextlib.contextmanager
-def locked() -> Generator[None]:
-    """Hold the config write lock for the duration of the block."""
-    with locking.locked(lock_path()):
-        yield
-
-
-@contextlib.contextmanager
-def update(
-    load: Callable[[], config.Config], dump: Callable[[config.Config], None]
-) -> Generator[config.Config]:
-    """Run a load -> mutate -> dump under the write lock so a concurrent writer can't lose
-    the update. Yields the loaded config; dumps it on clean exit (an exception in the block
-    propagates and skips the dump). ``load``/``dump`` are injected by config.py so this
-    module stays clear of its private helpers."""
-    with locked():
-        cfg = load()
-        yield cfg
-        dump(cfg)
diff --git a/aai_cli/core/hotkey.py b/aai_cli/core/hotkey.py
deleted file mode 100644
index 37b4ccab..00000000
--- a/aai_cli/core/hotkey.py
+++ /dev/null
@@ -1,130 +0,0 @@
-"""Single-keypress input for hotkey-driven commands (`assembly dictate`).
-
-``TerminalKeys`` reads individual keypresses — without waiting for Enter — for the
-lifetime of a ``with`` block, while Ctrl-C still ends the program. One interface,
-two backends:
-
-- POSIX puts stdin into cbreak mode (termios/tty) and waits with ``select``; cbreak
-  keeps ISIG, so Ctrl-C raises KeyboardInterrupt instead of arriving as a byte.
-- Windows reads the console through stdlib ``msvcrt`` (``kbhit``/``getwch``), which is
-  already character-at-a-time, so there is no mode to enter or restore.
-
-A platform that is neither (no termios and not Windows) raises a clean CLIError rather
-than an ImportError traceback. Stdlib-only on purpose, mirroring the other
-non-rendering layers.
-"""
-
-from __future__ import annotations
-
-import importlib
-import os
-import select
-import sys
-import time
-from collections.abc import Callable
-
-from aai_cli.core.errors import CLIError
-
-# Control characters hotkey-driven commands treat as "end the session".
-CTRL_C = "\x03"
-CTRL_D = "\x04"
-ESC = "\x1b"
-
-# How long the Windows key poll naps between kbhit() checks (msvcrt has no select()):
-# short enough to feel instant at the dictate prompt, long enough not to spin a core.
-_WINDOWS_POLL_INTERVAL = 0.01
-
-
-def _stdin_fd() -> int:
-    """The stdin file descriptor, or -1 when stdin has none (a captured/replaced
-    stream in an embedding or test harness) — os.isatty(-1) is False, so that
-    case falls into the clean not-a-tty error instead of an fileno traceback."""
-    try:
-        return sys.stdin.fileno()
-    except (ValueError, OSError):  # ValueError covers io.UnsupportedOperation
-        return -1
-
-
-def _on_windows() -> bool:
-    """True on Windows, where key input goes through msvcrt instead of termios. A
-    function (not a constant) so tests can drive the Windows backend on a POSIX host."""
-    return sys.platform == "win32"
-
-
-class TerminalKeys:
-    """Reads single keypresses from a terminal, scoped via ``with``.
-
-    The fd is injectable (POSIX tests drive it through a pty pair) and defaults to the
-    process's stdin; the Windows backend reads the console directly and ignores it.
-    """
-
-    def __init__(self, fd: int | None = None) -> None:
-        self._fd = fd if fd is not None else _stdin_fd()
-        # termios.tcgetattr's attribute list (typeshed's exact shape); stays None on
-        # Windows, where there is no saved terminal state to restore.
-        self._saved: list[int | list[bytes | int]] | None = None
-        self._windows = _on_windows()
-
-    def __enter__(self) -> TerminalKeys:
-        if not os.isatty(self._fd):
-            raise CLIError(
-                "This command needs an interactive terminal: it waits for hotkey presses on stdin.",
-                error_type="not_a_tty",
-                exit_code=2,
-                suggestion="Run it directly in a terminal, without piping or redirecting stdin.",
-            )
-        if self._windows:
-            # The Windows console is already character-at-a-time; there is no cbreak mode
-            # to enter or restore, so read() goes straight through msvcrt.
-            return self
-        try:
-            import termios
-            import tty
-        except ImportError as exc:
-            raise CLIError(
-                "Hotkey input is not supported on this platform (no termios).",
-                error_type="unsupported_platform",
-                exit_code=2,
-            ) from exc
-        self._saved = termios.tcgetattr(self._fd)
-        tty.setcbreak(self._fd)
-        return self
-
-    def __exit__(self, *exc: object) -> None:
-        if self._saved is not None:
-            import termios
-
-            termios.tcsetattr(self._fd, termios.TCSADRAIN, self._saved)
-            self._saved = None
-
-    def read(self, timeout: float | None) -> str | None:
-        """One keypress, or None when ``timeout`` elapses or stdin hits EOF.
-
-        ``timeout=None`` blocks until a key arrives; ``timeout=0`` polls without
-        waiting (the in-recording check between audio chunks).
-        """
-        if self._windows:
-            return self._read_windows(timeout)
-        ready, _, _ = select.select([self._fd], [], [], timeout)
-        if not ready:
-            return None
-        data = os.read(self._fd, 1)
-        if not data:
-            return None
-        return data.decode("utf-8", "replace")
-
-    def _read_windows(self, timeout: float | None) -> str | None:
-        """Windows key read: getwch() blocks (timeout=None) or kbhit() polls to a deadline."""
-        msvcrt = importlib.import_module("msvcrt")
-        # Bind the win32-only members to typed locals: typeshed hides them off-Windows,
-        # so the type-checkers reject `msvcrt.getwch` directly but accept these.
-        kbhit: Callable[[], bool] = msvcrt.kbhit
-        getwch: Callable[[], str] = msvcrt.getwch
-        if timeout is None:
-            return getwch()
-        deadline = time.monotonic() + timeout  # pragma: no mutate
-        while not kbhit():
-            if time.monotonic() >= deadline:  # pragma: no mutate
-                return None
-            time.sleep(_WINDOWS_POLL_INTERVAL)  # pragma: no mutate
-        return getwch()
diff --git a/aai_cli/core/locking.py b/aai_cli/core/locking.py
deleted file mode 100644
index dbfb33b2..00000000
--- a/aai_cli/core/locking.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""Cross-process advisory file locking, backed by ``filelock``.
-
-Used to serialize a read-modify-write of a shared on-disk file (``config.toml``) across
-concurrent ``assembly`` processes, so two of them can't lose each other's updates
-(last-writer-wins). The atomic-rename write keeps a *reader* from ever seeing a torn
-file; this lock is what keeps two *writers* from clobbering each other.
-"""
-
-from __future__ import annotations
-
-import contextlib
-from collections.abc import Generator
-from pathlib import Path
-
-import filelock
-
-# One cached lock instance per lock-file path. filelock already serializes threads within
-# this process (and other processes via the lock file), and reusing a single instance per
-# path makes nested acquisitions reentrant — distinct instances on one path deadlock, which
-# is what a re-entrant caller (e.g. a snapshot+rollback that calls smaller writers) needs.
-_lock_cache: dict[str, filelock.FileLock] = {}
-
-
-def file_lock(path: Path) -> filelock.FileLock:
-    """The cached cross-process lock for ``path`` (created on first use)."""
-    key = str(path)
-    lock = _lock_cache.get(key)
-    if lock is None:
-        lock = filelock.FileLock(path)
-        _lock_cache[key] = lock
-    return lock
-
-
-@contextlib.contextmanager
-def locked(path: Path) -> Generator[None]:
-    """Hold the cross-process lock at ``path`` for the duration of the block.
-
-    Creates the lock file's parent directory first — filelock won't, and the very first
-    write on a fresh machine targets a dir that may not exist yet.
-    """
-    path.parent.mkdir(parents=True, exist_ok=True)
-    with file_lock(path):
-        yield
diff --git a/aai_cli/core/signals.py b/aai_cli/core/signals.py
index 0ad3376e..de85186c 100644
--- a/aai_cli/core/signals.py
+++ b/aai_cli/core/signals.py
@@ -4,6 +4,10 @@
 signal: SIGINT arrives as a ``KeyboardInterrupt`` that the streaming lifecycle catches
 to flush its closing state and exit 130 (the cancel code). This module lets an
 *external* stop reach that same path — see ``terminate_as_interrupt``.
+
+`dictate` wants the opposite of cancel: an external SIGTERM means "I'm done, now
+transcribe" (a clean exit 0), so it uses ``stop_on_terminate`` instead — same
+SIGTERM, latched into a poll-able flag rather than re-raised as a cancel.
 """
 
 from __future__ import annotations
@@ -11,7 +15,7 @@
 import contextlib
 import signal
 import threading
-from collections.abc import Generator
+from collections.abc import Callable, Generator
 from types import FrameType
 
 
@@ -43,3 +47,45 @@ def _raise_interrupt(signum: int, frame: FrameType | None) -> None:
         yield
     finally:
         signal.signal(signal.SIGTERM, previous)
+
+
+class _StopFlag:
+    """A poll-able 'SIGTERM has been delivered' latch (see ``stop_on_terminate``)."""
+
+    def __init__(self) -> None:
+        self.stopped = False
+
+    def __call__(self) -> bool:
+        return self.stopped
+
+
+@contextlib.contextmanager
+def stop_on_terminate() -> Generator[Callable[[], bool]]:
+    """Latch SIGTERM into a poll-able 'stop now' predicate for the block.
+
+    Unlike ``terminate_as_interrupt`` (which routes SIGTERM into the cancel path),
+    this is for `assembly dictate`'s headless capture: a controller — a Hammerspoon
+    hotkey, a wrapper's ``kill -TERM`` — launches `assembly dictate`, which starts
+    recording immediately, then sends SIGTERM to mean "I'm done dictating". The
+    capture loop sees the flag flip, stops, and the utterance is transcribed (a clean
+    exit 0). SIGINT (Ctrl-C) stays the cancel that aborts without transcribing.
+
+    Yields a zero-argument predicate the capture loop polls between mic chunks; it
+    flips to True once SIGTERM is delivered. No-op off the main thread (where
+    ``signal.signal`` refuses to install a handler), yielding a predicate that stays
+    False; the previous handler is always restored on exit.
+    """
+    flag = _StopFlag()
+    if threading.current_thread() is not threading.main_thread():
+        yield flag
+        return
+
+    def _request_stop(signum: int, frame: FrameType | None) -> None:
+        del signum, frame  # handler signature is fixed; the values are unused
+        flag.stopped = True
+
+    previous = signal.signal(signal.SIGTERM, _request_stop)
+    try:
+        yield flag
+    finally:
+        signal.signal(signal.SIGTERM, previous)
diff --git a/pyproject.toml b/pyproject.toml
index 8e04d4da..78404223 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,11 +65,6 @@ dependencies = [
     # lazily). Strips boilerplate down to the readable body; ships prebuilt wheels
     # (lxml included), so it adds no source-compile step to Homebrew bottling.
     "trafilatura>=2.1.0",
-    # Cross-process advisory lock around config.toml's read-modify-write (config.py),
-    # so two concurrent `assembly` processes can't lose each other's profile/telemetry
-    # updates (last-writer-wins). Pure-Python, no compiled deps; already arrived
-    # transitively via the dev toolchain, so the lock pins the same release.
-    "filelock>=3.16.0",
 ]
 
 [project.urls]
diff --git a/tests/AGENTS.md b/tests/AGENTS.md
index 3247914e..6595b9fb 100644
--- a/tests/AGENTS.md
+++ b/tests/AGENTS.md
@@ -109,8 +109,8 @@ on macOS — so OS-specific failures you never see on Linux still land on `main`
 These have each cost a session a follow-up PR; bake the fix in up front:
 
 - **POSIX-only imports at module scope crash collection on Windows.** A top-level
-  `import termios` / `fcntl` / `os.openpty` (e.g. `tests/test_hotkey.py`'s pty driver)
-  aborts collection before any skip can apply. Guard it with
+  `import termios` / `fcntl` / `os.openpty` (e.g. a pty-driving test for a termios
+  backend) aborts collection before any skip can apply. Guard it with
   `pytest.importorskip("termios")` at the top of the module — that skips the whole file
   on Windows and, unlike a skip/xfail marker, is **not** counted by the Linux
   escape-hatch gate (which greps for the marker/call forms — so don't paste those literal
diff --git a/tests/__snapshots__/test_snapshots_help_root.ambr b/tests/__snapshots__/test_snapshots_help_root.ambr
index e8935ece..e41f5e86 100644
--- a/tests/__snapshots__/test_snapshots_help_root.ambr
+++ b/tests/__snapshots__/test_snapshots_help_root.ambr
@@ -45,7 +45,7 @@
   │                batch                                                         │
   │ stream         Transcribe live audio in real time from a mic, file, URL, or  │
   │                pipe                                                          │
-  │ dictate        Push-to-talk dictation: record the mic, get the transcript    │
+  │ dictate        Signal-driven dictation: record the mic, get the transcript   │
   │                back                                                          │
   │ agent          Hold a live two-way voice conversation with a voice agent     │
   │ agent-cascade  [sandbox] Hold a live voice conversation through a self-wired │
diff --git a/tests/__snapshots__/test_snapshots_help_run.ambr b/tests/__snapshots__/test_snapshots_help_run.ambr
index e0c2b43b..e9a69d92 100644
--- a/tests/__snapshots__/test_snapshots_help_run.ambr
+++ b/tests/__snapshots__/test_snapshots_help_run.ambr
@@ -380,13 +380,15 @@
   
    Usage: assembly dictate [OPTIONS]
   
-   Push-to-talk dictation: record the mic, get the transcript back
+   Signal-driven dictation: record the mic, get the transcript back
   
-   Recording starts immediately; press Enter (or Space) to stop and the
-   utterance is sent to the AssemblyAI Sync API — the transcript prints right
-   away (no polling) and dictate exits, so it flows straight to the next
-   command in a pipe. The recording can be up to 120 seconds long. Press
-   Ctrl-C to cancel without transcribing.
+   Recording starts immediately and runs headless — no terminal needed — so a
+   hotkey tool like Hammerspoon can launch it as a background task and send
+   SIGTERM (kill -TERM, task:terminate()) to stop. On SIGTERM the utterance is
+   sent to the AssemblyAI Sync API, the transcript prints right away (no
+   polling), and dictate exits, so it flows straight to the next command in a
+   pipe. The recording can be up to 120 seconds long. Ctrl-C (SIGINT) cancels
+   without transcribing.
   
   ╭─ Options ────────────────────────────────────────────────────────────────────╮
   │ --language             TEXT                       ISO 639-1 language code,   │
@@ -413,8 +415,10 @@
   ╰──────────────────────────────────────────────────────────────────────────────╯
   
    Examples
-   Dictate one utterance: recording starts, Enter transcribes it
+   Record until SIGTERM, then print the transcript
    $ assembly dictate
+   Stop the recording and transcribe (e.g. from a hotkey tool)
+   $ kill -TERM $(pgrep -f 'assembly dictate')
    Pipe the utterance into another command
    $ assembly dictate | assembly llm "write a conventional commit"
    Dictate in Spanish
diff --git a/tests/test_concurrency.py b/tests/test_concurrency.py
index ad5647b3..4caec1a0 100644
--- a/tests/test_concurrency.py
+++ b/tests/test_concurrency.py
@@ -3,11 +3,10 @@
 Two real concurrency surfaces exist in the CLI, and neither was directly exercised:
 
 1. ``core.config`` persists ``config.toml`` with a temp-file + atomic ``os.replace``
-   (`config._dump`) so a reader never observes a truncated file, and serializes its
-   read-modify-write under a cross-process ``filelock`` (`config._write_lock`) so two
-   concurrent ``assembly`` processes can't lose each other's updates. These tests pin
-   both: the at-rest atomicity under thread contention, and that distinct concurrent
-   updates all survive (no last-writer-wins clobber).
+   (`config._dump`) so a reader never observes a truncated file. Writers and readers are
+   otherwise unsynchronized (last write wins), and on Windows the replace window is ridden
+   out by a small retry (`config._retry_on_sharing_violation`). These tests pin the at-rest
+   atomicity under thread contention and that retry helper.
 2. ``streaming.StreamSession.on_turn`` runs on the SDK reader thread, and the
    ``--system-audio`` path drives two of those threads at once (`session._drive`). The
    turn write is serialized by ``_callback_lock`` so two sources can't interleave a
@@ -20,7 +19,62 @@
 import types
 from concurrent.futures import ThreadPoolExecutor
 
-from aai_cli.core import config, config_lock
+import pytest
+
+from aai_cli.core import config
+
+# --- config.toml: the Windows os.replace sharing-window retry -----------------------
+
+
+def test_retry_on_sharing_violation_returns_without_retrying_on_success(monkeypatch):
+    # The common case: the op succeeds first try, so no backoff sleep happens.
+    sleeps: list[float] = []
+    monkeypatch.setattr(config, "time", types.SimpleNamespace(sleep=sleeps.append))
+    calls = []
+
+    def op():
+        calls.append(1)
+        return "ok"
+
+    assert config._retry_on_sharing_violation(op) == "ok"
+    assert len(calls) == 1
+    assert sleeps == []
+
+
+def test_retry_on_sharing_violation_rides_out_transient_permission_errors(monkeypatch):
+    # Two transient PermissionErrors (Windows' replace window) then success: the helper
+    # backs off between attempts and ultimately returns the value, never raising.
+    sleeps: list[float] = []
+    monkeypatch.setattr(config, "time", types.SimpleNamespace(sleep=sleeps.append))
+    calls = []
+
+    def op():
+        calls.append(1)
+        if len(calls) < 3:
+            raise PermissionError("file is being replaced")
+        return "ok"
+
+    assert config._retry_on_sharing_violation(op) == "ok"
+    assert len(calls) == 3  # two failures, then the success
+    assert sleeps == [config._SHARING_BACKOFF, config._SHARING_BACKOFF]  # one per retry
+
+
+def test_retry_on_sharing_violation_reraises_a_persistent_permission_error(monkeypatch):
+    # A genuine, persistent permission problem is not a transient sharing race: after the
+    # full budget the last attempt's error propagates rather than looping forever.
+    sleeps: list[float] = []
+    monkeypatch.setattr(config, "time", types.SimpleNamespace(sleep=sleeps.append))
+    calls = []
+
+    def op():
+        calls.append(1)
+        raise PermissionError("denied")
+
+    with pytest.raises(PermissionError, match="denied"):
+        config._retry_on_sharing_violation(op)
+    # Exactly the full budget of attempts (loop retries + one final attempt), no more.
+    assert len(calls) == config._SHARING_RETRIES
+
 
 # --- config.toml: atomic writes vs. lost updates -----------------------------------
 
@@ -57,58 +111,6 @@ def reader() -> None:
     assert sorted(p.name for p in tmp_config.iterdir()) == ["config.toml"]  # no temp leftover
 
 
-def test_concurrent_writers_do_not_lose_distinct_updates(tmp_config):
-    # The cross-process write lock makes the read-modify-write atomic, so many threads
-    # each adding a DISTINCT profile through the public API all survive. Without the lock
-    # the interleaved RMW would drop some (two writers _load the same config; the second
-    # _dump clobbers the first's new profile) — this is the lost-update race the lock closes.
-    workers = 16
-    barrier = threading.Barrier(workers)  # release all writers at once for max contention
-
-    def add(i: int) -> None:
-        barrier.wait()
-        config.set_profile_env(f"p{i:02d}", f"sandbox{i:03d}")
-
-    with ThreadPoolExecutor(max_workers=workers) as pool:
-        for f in [pool.submit(add, i) for i in range(workers)]:
-            f.result()
-
-    # Every concurrent update is present with its own value — none clobbered.
-    assert config.list_profiles() == {f"p{i:02d}": f"sandbox{i:03d}" for i in range(workers)}
-
-
-def test_write_lock_targets_the_config_dir_lock_file(tmp_config):
-    # The lock guards config.toml via a sibling lock file in the same dir.
-    assert config_lock.write_lock().lock_file == str(tmp_config / "config.toml.lock")
-
-
-def test_write_lock_rebuilds_when_config_dir_changes(monkeypatch, tmp_path):
-    # The instance is cached per path, but a changed config dir (the suite repoints it per
-    # test) must yield a fresh lock pointed at the new dir — not the stale one.
-    first = config_lock.write_lock()
-    moved = tmp_path / "moved"
-    moved.mkdir()
-    monkeypatch.setattr(config, "config_dir", lambda: moved)
-    second = config_lock.write_lock()
-    assert second is not first
-    assert second.lock_file == str(moved / "config.toml.lock")
-
-
-def test_update_holds_the_write_lock_during_the_dump(tmp_config, monkeypatch):
-    # The mutate -> dump runs inside the lock: while _dump executes, the lock is held.
-    # (Drop the `with locked()` and is_locked would be False here.)
-    seen: dict[str, bool] = {}
-    real_dump = config._dump
-
-    def spy_dump(cfg):
-        seen["locked"] = config_lock.write_lock().is_locked
-        return real_dump(cfg)
-
-    monkeypatch.setattr(config, "_dump", spy_dump)
-    config.set_profile_env("default", "sandbox000")
-    assert seen["locked"] is True
-
-
 # --- streaming: on_turn serialization under _callback_lock -------------------------
 
 
diff --git a/tests/test_dictate_command.py b/tests/test_dictate_command.py
index 7bdb5e9b..1f98f2d2 100644
--- a/tests/test_dictate_command.py
+++ b/tests/test_dictate_command.py
@@ -1,5 +1,5 @@
-"""The `assembly dictate` Typer surface: argv -> DictateOptions mapping and the
-non-terminal failure mode. Session behavior lives in test_dictate_exec.py."""
+"""The `assembly dictate` Typer surface: argv -> DictateOptions mapping. Session
+behavior lives in test_dictate_exec.py."""
 
 from typer.testing import CliRunner
 
@@ -73,16 +73,3 @@ def test_max_seconds_is_capped_at_the_api_limit():
     result = runner.invoke(app, ["dictate", "--max-seconds", "200"])
     assert result.exit_code == 2
     assert "120" in result.output
-
-
-def test_outside_a_terminal_is_a_usage_error_not_a_login():
-    # CliRunner's stdin is not a terminal and no credentials are configured: the
-    # whole stack (command -> run_dictate -> TerminalKeys) must surface the
-    # terminal requirement, not start an authentication flow.
-    result = runner.invoke(app, ["dictate"])
-    assert result.exit_code == 2
-    # POSIX surfaces the not-a-tty requirement; Windows (no termios) surfaces the
-    # unsupported-platform message first. Either is the point: a usage error, not a login.
-    assert (
-        "interactive terminal" in result.output or "not supported on this platform" in result.output
-    )
diff --git a/tests/test_dictate_exec.py b/tests/test_dictate_exec.py
index c7e4c3c1..db4f61ce 100644
--- a/tests/test_dictate_exec.py
+++ b/tests/test_dictate_exec.py
@@ -1,9 +1,9 @@
 """Direct tests of the `assembly dictate` options/run seam (dictate_exec).
 
 The session is driven by constructing DictateOptions and injecting the three
-boundaries — TerminalKeys (scripted keys), MicrophoneSource (canned PCM), and
-sync_stt.transcribe_pcm (recorded calls) — so no test needs a real terminal,
-microphone, or network.
+boundaries — stop_on_terminate (a scripted SIGTERM latch), MicrophoneSource
+(canned PCM), and sync_stt.transcribe_pcm (recorded calls) — so no test needs a
+real signal, microphone, or network.
 """
 
 from __future__ import annotations
@@ -18,7 +18,7 @@
 from aai_cli.app.context import AppState
 from aai_cli.commands.dictate import _exec as dictate_exec
 from aai_cli.core import choices, config, sync_stt
-from aai_cli.core.errors import CLIError, UsageError
+from aai_cli.core.errors import NotAuthenticated, UsageError
 
 DICTATE_DEFAULTS = dictate_exec.DictateOptions(
     language=None,
@@ -37,30 +37,34 @@
 )
 
 
-class FakeKeys:
-    """A scripted TerminalKeys: each read() pops the next key (None = no key yet);
-    an exhausted script reads as EOF, which ends the session."""
+class FakeStop:
+    """A scripted stop_on_terminate: the yielded predicate pops the next scripted
+    bool per poll (an exhausted script reads as False = keep recording), so a True
+    stands in for a SIGTERM arriving at that point in the capture."""
 
     def __init__(self, script):
         self.script = list(script)
-        self.timeouts = []
+        self.polls = 0
         self.entered = False
         self.exited = False
 
     def __enter__(self):
         self.entered = True
-        return self
+        return self._poll
 
     def __exit__(self, *exc):
         self.exited = True
 
-    def read(self, timeout):
-        self.timeouts.append(timeout)
-        return self.script.pop(0) if self.script else None
+    def _poll(self):
+        self.polls += 1
+        return self.script.pop(0) if self.script else False
 
 
-class RaisingKeys(FakeKeys):
-    def read(self, timeout):
+class RaisingStop(FakeStop):
+    """A stop latch whose poll raises KeyboardInterrupt — i.e. SIGINT (Ctrl-C)
+    arriving mid-recording."""
+
+    def _poll(self):
         raise KeyboardInterrupt
 
 
@@ -68,9 +72,9 @@ def read(self, timeout):
 def seams(monkeypatch):
     """Wire all three boundaries; returns the mutable harness state."""
     config.set_api_key("default", "sk_live")
-    harness = {"keys": FakeKeys([]), "chunks": [CHUNK, CHUNK], "mic": {}, "calls": []}
+    harness = {"stop": FakeStop([]), "chunks": [CHUNK, CHUNK], "mic": {}, "calls": []}
 
-    monkeypatch.setattr(dictate_exec, "TerminalKeys", lambda: harness["keys"])
+    monkeypatch.setattr(dictate_exec, "stop_on_terminate", lambda: harness["stop"])
 
     def fake_mic(*, target_rate, device=None, on_open=None):
         harness["mic"].update(target_rate=target_rate, device=device)
@@ -102,9 +106,9 @@ def test_options_are_immutable():
 
 
 def test_records_then_prints_bare_transcript(seams, capsys):
-    # Recording auto-starts; the in-recording poll sees nothing after chunk 1,
-    # Enter after chunk 2 stops the capture, then dictate exits.
-    seams["keys"] = FakeKeys([None, "\r"])
+    # Recording auto-starts; the first poll sees no SIGTERM, a SIGTERM after the
+    # second chunk stops the capture, then dictate exits.
+    seams["stop"] = FakeStop([False, True])
     _run()
     # Both chunks were captured and uploaded as one utterance at the resampled rate.
     assert seams["calls"] == [
@@ -121,18 +125,16 @@ def test_records_then_prints_bare_transcript(seams, capsys):
     captured = capsys.readouterr()
     # Human mode: the bare text on stdout (pipe-friendly), not a JSON object.
     assert captured.out.strip() == "hello world"
-    # The mic-open note fires on stderr; there is no interactive start prompt.
-    assert "Recording — press Enter to stop" in captured.err
-    assert "start recording" not in captured.err
+    # The mic-open note fires on stderr and names the SIGTERM stop.
+    assert "send SIGTERM to transcribe" in captured.err
     assert seams["mic"] == {"target_rate": 16000, "device": None}
-    assert seams["keys"].entered and seams["keys"].exited  # terminal restored
-    # Recording auto-started, so every read is the zero-timeout in-recording poll
-    # — no blocking idle read(None) waiting for a start keypress.
-    assert seams["keys"].timeouts == [0, 0]
+    assert seams["stop"].entered and seams["stop"].exited  # handler installed + restored
+    # Polled once per captured chunk: False after chunk 1, True after chunk 2.
+    assert seams["stop"].polls == 2
 
 
 def test_json_mode_emits_one_ndjson_object_per_utterance(seams, capsys):
-    seams["keys"] = FakeKeys(["\r"])
+    seams["stop"] = FakeStop([True])
     _run(json_mode=True)
     captured = capsys.readouterr()
     assert json.loads(captured.out) == {
@@ -149,14 +151,14 @@ def test_json_mode_emits_one_ndjson_object_per_utterance(seams, capsys):
 def test_output_json_folds_into_ndjson_without_the_json_flag(seams, capsys):
     # -o json must enable NDJSON on its own (json_mode stays the --json flag,
     # which is False here) — proving the -o/--output resolution runs.
-    seams["keys"] = FakeKeys(["\r"])
+    seams["stop"] = FakeStop([True])
     _run(dataclasses.replace(DICTATE_DEFAULTS, output_field=choices.TextOrJson.json))
     assert json.loads(capsys.readouterr().out)["text"] == "hello world"
 
 
 def test_output_text_emits_bare_transcript(seams, capsys):
     # -o text is the explicit spelling of the human default: bare text, no JSON.
-    seams["keys"] = FakeKeys(["\r"])
+    seams["stop"] = FakeStop([True])
     _run(dataclasses.replace(DICTATE_DEFAULTS, output_field=choices.TextOrJson.text))
     out = capsys.readouterr().out
     assert out.strip() == "hello world"
@@ -166,7 +168,7 @@ def test_output_text_emits_bare_transcript(seams, capsys):
 def test_output_text_conflicts_with_json_flag(seams):
     # --json + -o text are contradictory output shapes: a clean usage error,
     # the same as `stream`/`agent`.
-    seams["keys"] = FakeKeys(["\r", "\r"])
+    seams["stop"] = FakeStop([True])
     with pytest.raises(UsageError):
         _run(
             dataclasses.replace(DICTATE_DEFAULTS, output_field=choices.TextOrJson.text),
@@ -174,74 +176,63 @@ def test_output_text_conflicts_with_json_flag(seams):
         )
 
 
-def test_quiet_suppresses_the_interactive_hints(seams, capsys):
-    seams["keys"] = FakeKeys(["\r"])
+def test_quiet_suppresses_the_recording_hint(seams, capsys):
+    seams["stop"] = FakeStop([True])
     _run(state=AppState(quiet=True))
     captured = capsys.readouterr()
     assert captured.out.strip() == "hello world"
     assert captured.err == ""
 
 
-def test_auto_starts_recording_then_exits_after_one_utterance(seams, capsys):
-    # Recording auto-starts: a single read(0) pops the Enter that stops the
-    # capture, then dictate exits — no blocking idle read(None) waiting for a
-    # start keypress, and the rest of the key script is left undrained.
-    seams["keys"] = FakeKeys(["\r", "\r", "\r"])
+def test_records_one_utterance_then_exits(seams, capsys):
+    # A SIGTERM on the first poll stops the capture and dictate exits after one
+    # utterance — it never starts a second recording, and the rest of the stop
+    # script is left undrained.
+    seams["stop"] = FakeStop([True, True, True])
     _run()
     assert len(seams["calls"]) == 1
-    assert seams["keys"].script  # ended on the single utterance, not by draining keys
-    assert seams["keys"].timeouts == [0]
-    captured = capsys.readouterr()
-    assert captured.out.strip() == "hello world"
-    # The mic-open note fires immediately; there is no interactive start prompt.
-    assert "Recording — press Enter to stop" in captured.err
-    assert "start recording" not in captured.err
+    assert seams["stop"].script  # ended on the single utterance, not by draining the script
+    assert seams["stop"].polls == 1
+    assert capsys.readouterr().out.strip() == "hello world"
 
 
 def test_once_flag_is_a_deprecated_noop_that_warns(seams, capsys):
     # --once is kept only so old scripts don't break: it does nothing (single
     # utterance is the default) but warns that it can be dropped.
-    seams["keys"] = FakeKeys(["\r"])
+    seams["stop"] = FakeStop([True])
     _run(dataclasses.replace(DICTATE_DEFAULTS, once=True))
     assert len(seams["calls"]) == 1
     assert "--once is now the default" in capsys.readouterr().err
 
 
 def test_once_warning_is_silenced_by_quiet(seams, capsys):
-    seams["keys"] = FakeKeys(["\r"])
+    seams["stop"] = FakeStop([True])
     _run(dataclasses.replace(DICTATE_DEFAULTS, once=True), state=AppState(quiet=True))
     assert "--once" not in capsys.readouterr().err
 
 
-@pytest.mark.parametrize("quit_key", ["q", "Q", "\x1b", "\x04"])
-def test_quit_keys_stop_recording_and_transcribe(seams, quit_key):
-    # No idle prompt anymore: q / Esc / Ctrl-D stop the auto-started recording
-    # and the captured utterance is still transcribed.
-    seams["keys"] = FakeKeys([quit_key])
+def test_sigterm_stops_recording_and_transcribes(seams):
+    # A SIGTERM after the first chunk stops the auto-started recording and the
+    # captured utterance is still transcribed.
+    seams["stop"] = FakeStop([True])
     _run()
     assert len(seams["calls"]) == 1
     assert seams["calls"][0]["pcm"] == CHUNK  # stopped after the first chunk
 
 
-def test_space_also_stops_recording(seams):
-    seams["keys"] = FakeKeys([" "])
-    _run()
-    assert len(seams["calls"]) == 1
-
-
-def test_unbound_keys_during_recording_do_not_stop_capture(seams):
-    # A stray keystroke mid-utterance is ignored; only Enter/Space (or a quit
-    # key) ends the capture.
-    seams["keys"] = FakeKeys(["x", "\r"])
+def test_no_signal_does_not_stop_capture(seams):
+    # A poll that reports no SIGTERM keeps recording; only a True (or the cap)
+    # ends the capture.
+    seams["stop"] = FakeStop([False, True])
     seams["chunks"] = [CHUNK, CHUNK, CHUNK]
     _run()
     assert seams["calls"][0]["pcm"] == CHUNK + CHUNK
 
 
 def test_recording_stops_at_the_duration_cap(seams):
-    # 0.2 s at 16 kHz PCM16 = 6400 bytes = exactly two chunks; the poll never
-    # reports a key, so only the cap can stop the capture.
-    seams["keys"] = FakeKeys([])
+    # 0.2 s at 16 kHz PCM16 = 6400 bytes = exactly two chunks; no SIGTERM ever
+    # arrives, so only the cap can stop the capture.
+    seams["stop"] = FakeStop([])
     seams["chunks"] = [CHUNK] * 5
     _run(dataclasses.replace(DICTATE_DEFAULTS, max_seconds=0.2))
     assert len(seams["calls"]) == 1
@@ -259,7 +250,7 @@ def chunk_gen():
         finally:
             closed.append(True)
 
-    seams["keys"] = FakeKeys(["\r"])
+    seams["stop"] = FakeStop([True])
     seams["chunks"] = chunk_gen()
     _run()
     assert closed == [True]  # the device-releasing cleanup ran at stop, not at GC
@@ -267,7 +258,7 @@ def chunk_gen():
 
 @pytest.mark.parametrize("size", [200, 2558])  # 2558: just under the exact 2560-byte floor
 def test_too_short_recording_is_skipped_with_a_warning(seams, capsys, size):
-    seams["keys"] = FakeKeys(["\r"])
+    seams["stop"] = FakeStop([True])
     seams["chunks"] = [b"\x01" * size]  # below 80 ms of 16 kHz PCM16 (2560 bytes)
     _run()
     assert seams["calls"] == []
@@ -277,39 +268,39 @@ def test_too_short_recording_is_skipped_with_a_warning(seams, capsys, size):
 
 
 def test_recording_at_the_80ms_floor_is_transcribed(seams):
-    seams["keys"] = FakeKeys(["\r"])
+    seams["stop"] = FakeStop([True])
     seams["chunks"] = [b"\x01" * 2560]  # exactly 80 ms: allowed, not skipped
     _run()
     assert len(seams["calls"]) == 1
 
 
 def test_language_and_boost_flags_are_forwarded(seams):
-    seams["keys"] = FakeKeys(["\r"])
+    seams["stop"] = FakeStop([True])
     _run(dataclasses.replace(DICTATE_DEFAULTS, language="es", word_boost=["AssemblyAI"]))
     assert seams["calls"][0]["language_code"] == "es"
     assert seams["calls"][0]["word_boost"] == ["AssemblyAI"]
 
 
 def test_comma_separated_languages_become_a_list(seams):
-    seams["keys"] = FakeKeys(["\r"])
+    seams["stop"] = FakeStop([True])
     _run(dataclasses.replace(DICTATE_DEFAULTS, language="en, es"))
     assert seams["calls"][0]["language_code"] == ["en", "es"]
 
 
 def test_blank_language_reads_as_unset(seams):
-    seams["keys"] = FakeKeys(["\r"])
+    seams["stop"] = FakeStop([True])
     _run(dataclasses.replace(DICTATE_DEFAULTS, language=" , "))
     assert seams["calls"][0]["language_code"] is None
 
 
 def test_prompt_with_language_warns_that_language_is_ignored(seams, capsys):
-    seams["keys"] = FakeKeys(["\r"])
+    seams["stop"] = FakeStop([True])
     _run(dataclasses.replace(DICTATE_DEFAULTS, prompt="Verbatim.", language="es"))
     assert "--language is ignored when --prompt is set" in capsys.readouterr().err
 
 
 def test_prompt_alone_is_forwarded_without_warning(seams, capsys):
-    seams["keys"] = FakeKeys(["\r"])
+    seams["stop"] = FakeStop([True])
     _run(dataclasses.replace(DICTATE_DEFAULTS, prompt="Verbatim."))
     assert seams["calls"][0]["prompt"] == "Verbatim."
     assert "ignored" not in capsys.readouterr().err
@@ -324,35 +315,30 @@ def fake_status(message, *, json_mode, quiet=False):
         yield
 
     monkeypatch.setattr(dictate_exec.output, "status", fake_status)
-    seams["keys"] = FakeKeys(["\r"])
+    seams["stop"] = FakeStop([True])
     _run(state=AppState(quiet=True))
     assert seen == {"message": "Transcribing…", "json_mode": False, "quiet": True}
 
 
 def test_ctrl_c_exits_with_cancel_code(seams):
-    keys = RaisingKeys([])
-    seams["keys"] = keys
-    # Ctrl-C cancels dictation: exit 130 (distinct from `q`, which finishes with 0).
+    stop = RaisingStop([])
+    seams["stop"] = stop
+    # Ctrl-C / SIGINT cancels dictation: exit 130 (distinct from SIGTERM, which
+    # finishes with 0).
     with pytest.raises(typer.Exit) as exc:
         _run()
     assert exc.value.exit_code == 130
-    assert keys.exited  # the with-block unwound, restoring the terminal
+    assert stop.exited  # the with-block unwound, restoring the previous handler
 
 
-def test_terminal_is_validated_before_credentials(seams, monkeypatch):
-    # No key is configured and TerminalKeys rejects the terminal: the usage
-    # error must win, not NotAuthenticated (validation before credentials).
+def test_credentials_are_resolved_before_recording(seams, monkeypatch):
+    # No key is configured: the missing-credentials error must surface before the
+    # mic is ever opened (don't capture audio we can't transcribe).
     config.clear_api_key("default")
-
-    class NoTty:
-        def __enter__(self):
-            raise CLIError("This command needs an interactive terminal.", exit_code=2)
-
-        def __exit__(self, *exc):
-            return None
-
-    monkeypatch.setattr(dictate_exec, "TerminalKeys", NoTty)
-    with pytest.raises(CLIError) as exc:
+    opened = []
+    monkeypatch.setattr(
+        dictate_exec, "MicrophoneSource", lambda **_kw: opened.append(True) or iter([])
+    )
+    with pytest.raises(NotAuthenticated):
         _run()
-    assert exc.value.exit_code == 2
-    assert "interactive terminal" in exc.value.message
+    assert opened == []  # the mic was never opened
diff --git a/tests/test_hotkey.py b/tests/test_hotkey.py
deleted file mode 100644
index 73f14465..00000000
--- a/tests/test_hotkey.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""TerminalKeys: cbreak scoping, single-key reads, and the clean failure modes.
-
-The terminal tests drive a real pty pair (os.openpty), so termios behavior is
-exercised for real without touching the test runner's stdin.
-"""
-
-import os
-import sys
-
-import pytest
-
-from aai_cli.core import hotkey
-from aai_cli.core.errors import CLIError
-from aai_cli.core.hotkey import TerminalKeys, _stdin_fd
-
-# termios and os.openpty are POSIX-only, so the pty-driven tests below are skipped on
-# Windows. importorskip keeps that out of the skip/xfail escape-hatch count the Linux
-# gate tracks. The msvcrt-backend tests at the bottom inject a fake console, so they run
-# (and give coverage) on the POSIX CI host even though they exercise the Windows path.
-termios = pytest.importorskip("termios")
-
-
-@pytest.fixture
-def pty_pair():
-    master, slave = os.openpty()
-    yield master, slave
-    os.close(master)
-    os.close(slave)
-
-
-def test_reads_single_keypresses_without_enter(pty_pair):
-    master, slave = pty_pair
-    with TerminalKeys(fd=slave) as keys:
-        os.write(master, b"ab")
-        # One keypress per read, even when several are queued.
-        assert keys.read(5.0) == "a"
-        assert keys.read(5.0) == "b"
-
-
-def test_poll_returns_none_when_no_key_is_pending(pty_pair):
-    _, slave = pty_pair
-    with TerminalKeys(fd=slave) as keys:
-        assert keys.read(0) is None
-
-
-def test_cbreak_is_scoped_to_the_context(pty_pair):
-    _, slave = pty_pair
-    lflag_index = 3
-    assert termios.tcgetattr(slave)[lflag_index] & termios.ICANON
-    with TerminalKeys(fd=slave):
-        inside = termios.tcgetattr(slave)[lflag_index]
-        assert not inside & termios.ICANON  # keys arrive without Enter
-        assert inside & termios.ISIG  # but Ctrl-C still raises KeyboardInterrupt
-    assert termios.tcgetattr(slave)[lflag_index] & termios.ICANON  # restored
-
-
-def test_exit_without_enter_restores_nothing(pty_pair):
-    # __exit__ is a no-op when the cbreak switch never happened (or already ran):
-    # exiting twice must not call tcsetattr with stale state.
-    _, slave = pty_pair
-    keys = TerminalKeys(fd=slave)
-    keys.__exit__(None, None, None)  # never entered: nothing to restore
-    with keys:
-        pass
-    keys.__exit__(None, None, None)  # second exit after restore: still a no-op
-
-
-def test_non_tty_fd_is_a_clean_usage_error(tmp_path):
-    with (tmp_path / "plain-file").open("w") as f:
-        with pytest.raises(CLIError) as exc:
-            with TerminalKeys(fd=f.fileno()):
-                pass
-    assert exc.value.exit_code == 2
-    assert exc.value.error_type == "not_a_tty"
-    assert "interactive terminal" in exc.value.message
-
-
-def test_platform_without_termios_is_a_clean_error(pty_pair, monkeypatch):
-    # Windows has no termios; None in sys.modules makes the import raise.
-    _, slave = pty_pair
-    monkeypatch.setitem(sys.modules, "termios", None)
-    with pytest.raises(CLIError) as exc:
-        with TerminalKeys(fd=slave):
-            pass
-    assert exc.value.exit_code == 2
-    assert exc.value.error_type == "unsupported_platform"
-
-
-def test_read_returns_none_at_eof():
-    # A pipe stands in for a hung-up terminal: select reports readable, the
-    # read yields no bytes. (read() itself doesn't require a tty; only the
-    # cbreak context does.)
-    read_end, write_end = os.pipe()
-    try:
-        os.write(write_end, b"z")
-        os.close(write_end)
-        keys = TerminalKeys(fd=read_end)
-        assert keys.read(0) == "z"  # drains the last byte
-        assert keys.read(0) is None  # then EOF
-    finally:
-        os.close(read_end)
-
-
-def test_stdin_fd_defaults_to_real_stdin_or_minus_one(monkeypatch):
-    class NoFileno:
-        def fileno(self):
-            raise OSError("no underlying file")
-
-    monkeypatch.setattr(sys, "stdin", NoFileno())
-    assert _stdin_fd() == -1
-    assert TerminalKeys()._fd == -1
-
-    class CapturedStdin:
-        def fileno(self):
-            raise ValueError("I/O operation on captured stream")
-
-    monkeypatch.setattr(sys, "stdin", CapturedStdin())
-    assert _stdin_fd() == -1
-
-    class RealStdin:
-        def fileno(self):
-            return 42
-
-    monkeypatch.setattr(sys, "stdin", RealStdin())
-    assert _stdin_fd() == 42
-
-
-# --- Windows (msvcrt) backend ------------------------------------------------
-# Driven on the POSIX CI host by forcing _on_windows() True and injecting a fake
-# console; the real msvcrt calls are thin, so this covers the branch logic.
-
-
-class _FakeMsvcrt:
-    """Stand-in for the stdlib msvcrt console API the Windows backend reads through."""
-
-    def __init__(self, *, ready_after: int = 0, char: str = "a") -> None:
-        self._until_ready = ready_after  # kbhit() returns False this many times first
-        self._char = char
-        self.getwch_calls = 0
-
-    def kbhit(self) -> bool:
-        if self._until_ready <= 0:
-            return True
-        self._until_ready -= 1
-        return False
-
-    def getwch(self) -> str:
-        self.getwch_calls += 1
-        return self._char
-
-
-@pytest.fixture
-def windows_backend(monkeypatch):
-    """Force the Windows code path on this POSIX host: msvcrt backend, fake console tty."""
-    monkeypatch.setattr(hotkey, "_on_windows", lambda: True)
-    monkeypatch.setattr(hotkey.os, "isatty", lambda _fd: True)
-    return monkeypatch
-
-
-def test_windows_backend_enters_without_cbreak_and_reads_keys(windows_backend):
-    windows_backend.setitem(sys.modules, "msvcrt", _FakeMsvcrt(char="a"))
-    keys = TerminalKeys(fd=5)
-    with keys as k:
-        assert k.read(None) == "a"  # timeout=None -> blocking getwch()
-        assert k.read(0) == "a"  # zero-timeout poll with a key already buffered
-    assert keys._saved is None  # no termios state is saved or restored on Windows
-
-
-def test_windows_backend_poll_returns_none_when_no_key(windows_backend):
-    windows_backend.setitem(sys.modules, "msvcrt", _FakeMsvcrt(ready_after=10**9))
-    with TerminalKeys(fd=5) as k:
-        assert k.read(0) is None  # nothing buffered + zero timeout -> immediate None
-
-
-def test_windows_backend_polls_with_naps_until_a_key_arrives(windows_backend):
-    naps: list[float] = []
-    windows_backend.setattr(hotkey.time, "sleep", lambda s: naps.append(s))
-    windows_backend.setitem(sys.modules, "msvcrt", _FakeMsvcrt(ready_after=2, char="z"))
-    with TerminalKeys(fd=5) as k:
-        assert k.read(5.0) == "z"
-    assert naps == [0.01, 0.01]  # napped between the two not-ready polls
-
-
-def test_windows_backend_non_tty_is_still_a_usage_error(windows_backend):
-    windows_backend.setattr(hotkey.os, "isatty", lambda _fd: False)
-    with pytest.raises(CLIError) as exc:
-        with TerminalKeys(fd=5):
-            pass
-    assert exc.value.error_type == "not_a_tty"
diff --git a/tests/test_signals.py b/tests/test_signals.py
index d95d05e2..e8b8e417 100644
--- a/tests/test_signals.py
+++ b/tests/test_signals.py
@@ -24,6 +24,44 @@ def test_terminate_as_interrupt_installs_and_restores_handler():
     assert signal.getsignal(signal.SIGTERM) is before
 
 
+def test_stop_on_terminate_latches_sigterm_then_restores_handler():
+    before = signal.getsignal(signal.SIGTERM)
+    with signals.stop_on_terminate() as stop_requested:
+        handler = signal.getsignal(signal.SIGTERM)
+        # A new handler is installed for the block...
+        assert handler is not before
+        assert callable(handler)
+        # ...the flag stays False until a SIGTERM is delivered...
+        assert stop_requested() is False
+        handler(signal.SIGTERM, None)
+        # ...and then latches True, without re-raising (unlike the cancel path).
+        assert stop_requested() is True
+    # The previous handler is restored on exit.
+    assert signal.getsignal(signal.SIGTERM) is before
+
+
+def test_stop_on_terminate_is_noop_off_main_thread():
+    before = signal.getsignal(signal.SIGTERM)
+    observed: dict[str, object] = {}
+
+    def worker() -> None:
+        with signals.stop_on_terminate() as stop_requested:
+            # Off the main thread no handler may be installed, so the disposition
+            # is untouched and the predicate stays False for the whole block.
+            observed["handler"] = signal.getsignal(signal.SIGTERM)
+            observed["stopped"] = stop_requested()
+        observed["ran"] = True
+
+    thread = threading.Thread(target=worker)
+    thread.start()
+    thread.join()
+
+    assert observed["ran"] is True
+    assert observed["handler"] is before
+    assert observed["stopped"] is False
+    assert signal.getsignal(signal.SIGTERM) is before
+
+
 def test_terminate_as_interrupt_is_noop_off_main_thread():
     before = signal.getsignal(signal.SIGTERM)
     observed: dict[str, object] = {}
diff --git a/uv.lock b/uv.lock
index 0a04be22..e73c0e56 100644
--- a/uv.lock
+++ b/uv.lock
@@ -23,7 +23,6 @@ dependencies = [
     { name = "assemblyai" },
     { name = "audioop-lts", marker = "python_full_version >= '3.13'" },
     { name = "feedparser" },
-    { name = "filelock" },
     { name = "fsspec" },
     { name = "httpx2" },
     { name = "jiwer" },
@@ -78,7 +77,6 @@ requires-dist = [
     { name = "assemblyai", specifier = ">=0.64.4" },
     { name = "audioop-lts", marker = "python_full_version >= '3.13'", specifier = ">=0.2" },
     { name = "feedparser", specifier = ">=6.0.11" },
-    { name = "filelock", specifier = ">=3.16.0" },
     { name = "fsspec", specifier = ">=2026.4.0" },
     { name = "httpx2", specifier = ">=2.0.0" },
     { name = "jiwer", specifier = ">=4.0" },