AssemblyAI · alexkroman · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026
diff --git a/REFERENCE.md b/REFERENCE.md
@@ -29,7 +29,7 @@ Product-scoped variables are `ASSEMBLYAI_*`; CLI-behavior variables are
 | Variable | Effect |
 | -------- | ------ |
 | `ASSEMBLYAI_API_KEY` | API key for all API calls; beats the keyring, loses to nothing but a `--api-key` validation flag. |
-| `AAI_ENV` | Backend environment (`production`, `sandbox000`); beats the profile's stored env, loses to `--env`/`--sandbox`. |
+| `AAI_ENV` | Backend environment (`production`, `sandbox000`); beats the profile's stored env, loses to `--env`/`--sandbox`. The non-production environments are internal: selecting one (here, via `--env`/`--sandbox`, or a profile binding) is rejected with exit 2 unless the active profile is signed in with an `@assemblyai.com` login, and `--env`/`--sandbox` and the sandbox-only commands are hidden from `--help` for everyone else. |
 | `AAI_AUTH_PORT` | Loopback callback port for `assembly login` (dev/test only; default 8585). |
 | `AAI_NO_UPDATE_CHECK` | Disables the "update available" notice and its background refresh. |
 | `AAI_TELEMETRY_DISABLED` / `DO_NOT_TRACK` | Disables anonymous usage telemetry (always beats the persisted choice). |

diff --git a/aai_cli/app/context.py b/aai_cli/app/context.py
@@ -125,6 +125,7 @@ def persist_browser_login(profile: str, env: str, *, json_mode: bool = False) ->
         session_jwt=result.session_jwt,
         session_token=result.session_token,
         account_id=result.account_id,
+        email=result.email,
     )
 
 

diff --git a/aai_cli/auth/flow.py b/aai_cli/auth/flow.py
@@ -22,12 +22,15 @@ class LoginResult:
     session_jwt: str
     session_token: str
     account_id: int
+    # The signed-in user's email, from AMS discovery. Persisted so the CLI can gate
+    # internal-only environments (the sandbox) on the org domain; None if AMS omits it.
+    email: str | None = None
 
 
 # Typed views of the AMS login responses. AMS only returns HTTP errors for outright
 # failures; a 200 with an unexpected shape would otherwise KeyError into an ugly
 # traceback, so each required field's absence becomes the same clean "run login
-# again" APIError via `_parse`. Extra fields (e.g. discover's `email`) are ignored.
+# again" APIError via `_parse`. Only the fields below are read; the rest are ignored.
 class _Organization(BaseModel):
     organization_id: str
     organization_name: str | None = None
@@ -36,6 +39,8 @@ class _Organization(BaseModel):
 class _Discovery(BaseModel):
     intermediate_session_token: str
     organizations: list[_Organization] = []
+    # Top-level email from the discover response; used only to gate sandbox access.
+    email: str | None = None
 
 
 class _Account(BaseModel):
@@ -240,4 +245,5 @@ def run_login_flow(*, json_mode: bool = False) -> LoginResult:
         session_jwt=signed_in.session_jwt,
         session_token=signed_in.session_token,
         account_id=signed_in.account.id,
+        email=disc.email,
     )
diff --git a/aai_cli/core/access.py b/aai_cli/core/access.py
@@ -0,0 +1,39 @@
+"""Who may select the internal-only environments (the sandbox).
+
+The sandbox runs on internal infrastructure, so it's gated on the login email
+captured at browser login (persisted per profile by ``config``), not the API key —
+an API-key-only profile (CI, ``ASSEMBLYAI_API_KEY``) therefore reads as external.
+The root callback rejects an internal environment for an external account, and the
+root ``--help`` hides the sandbox flags/commands from it.
+"""
+
+from __future__ import annotations
+
+from aai_cli.core import config
+from aai_cli.core.errors import CLIError
+
+# Login emails in this domain unlock the internal-only environments.
+INTERNAL_EMAIL_DOMAIN = "assemblyai.com"
+
+
+def is_internal_email(email: str | None) -> bool:
+    """Whether ``email`` belongs to the AssemblyAI org (gates sandbox access).
+
+    The ``@`` anchors the domain boundary so a look-alike like
+    ``user@evil-assemblyai.com`` is rejected; matching is case-insensitive.
+    """
+    return email is not None and email.strip().lower().endswith("@" + INTERNAL_EMAIL_DOMAIN)
+
+
+def profile_is_internal(profile: str | None = None) -> bool:
+    """Whether a profile's stored login email is an AssemblyAI address.
+
+    Reads the active profile when ``profile`` is None. Fails closed: an unreadable
+    or corrupt config reads as external rather than raising, so the gate never
+    accidentally grants access (or crashes ``--help``) on a broken config.toml.
+    """
+    try:
+        name = profile or config.get_active_profile()
+        return is_internal_email(config.get_profile_email(name))
+    except CLIError:
+        return False
diff --git a/aai_cli/core/config.py b/aai_cli/core/config.py
@@ -35,6 +35,8 @@ class Profile(BaseModel):
 
     env: str | None = None
     account_id: int | None = None
+    # Login email from AMS discovery; gates internal-environment access (see core.access).
+    email: str | None = None
 
 
 class Config(BaseModel):
@@ -291,6 +293,20 @@ def set_profile_env(profile: str, env: str) -> None:
     _dump(cfg)
 
 
+def get_profile_email(profile: str) -> str | None:
+    """The login email recorded for a profile at browser login, if any."""
+    prof = _load().profiles.get(profile)
+    return prof.email if prof else None
+
+
+def set_profile_email(profile: str, email: str) -> None:
+    """Persist the login email for a profile (gates internal-environment access)."""
+    validate_profile(profile)
+    cfg = _load()
+    cfg.profiles.setdefault(profile, Profile()).email = email
+    _dump(cfg)
+
+
 def clear_api_key(profile: str) -> None:
     # KeyringError, not just PasswordDeleteError: with no backend at all (headless
     # boxes) delete raises NoKeyringError, and "nothing stored" is already the goal.
@@ -357,6 +373,7 @@ def persist_login(
     session_jwt: str,
     session_token: str,
     account_id: int,
+    email: str | None = None,
 ) -> None:
     """Atomically persist a full browser-login result (API key + env + session).
 
@@ -381,6 +398,9 @@ def persist_login(
             session_token=session_token,
             account_id=account_id,
         )
+        # Within the same atomic rollback so the sandbox gate can't read stale identity.
+        if email is not None:
+            set_profile_email(profile, email)
         done = True
     finally:
         if not done:

diff --git a/aai_cli/main.py b/aai_cli/main.py
@@ -1,22 +1,27 @@
 from __future__ import annotations
 
+import contextlib
 import logging
 import sys
+from collections.abc import Generator
 from typing import TYPE_CHECKING
 
 import typer
 from typer._click.utils import PacifyFlushWrapper
-from typer.core import TyperGroup
+from typer.core import TyperGroup, TyperOption
 
 if TYPE_CHECKING:
     # Typer (>=0.13) vendors its own click; TyperGroup.list_commands receives this
     # context type, not the upstream click.Context. Imported for typing only.
+    from typer._click.core import Command as ClickCommand
     from typer._click.core import Context as ClickContext
+    from typer._click.formatting import HelpFormatter as ClickHelpFormatter
 
 from aai_cli import __version__, command_registry
 from aai_cli.app.context import AppState
 from aai_cli.commands import onboard
-from aai_cli.core import argscan, choices, debuglog, environments, stdio
+from aai_cli.core import access, argscan, choices, debuglog, environments, stdio
+from aai_cli.core.environments import Environment
 from aai_cli.core.errors import CLIError, NotAuthenticated
 from aai_cli.onboard import wizard
 from aai_cli.onboard.sections import WizardContext
@@ -38,6 +43,23 @@
 _COMMAND_RANK = {name: i for i, name in enumerate(_COMMAND_ORDER)}
 
 
+# Root flags and the marker the sandbox-only command docstrings open with: the single
+# place the "what is a sandbox option" surface is defined, so help-filtering and the
+# command docstrings stay the lone declarations (no parallel command list to maintain).
+_SANDBOX_ROOT_FLAGS = frozenset({"sandbox", "env"})
+_SANDBOX_HELP_MARKER = "[sandbox]"
+
+
+def _is_sandbox_command(command: ClickCommand) -> bool:
+    """Whether a command is sandbox-only, detected by the ``[sandbox]`` help prefix.
+
+    The docstrings escape the bracket for Rich (``\\[sandbox]``), so strip a leading
+    backslash before matching.
+    """
+    text = (command.help or command.short_help or "").lstrip()
+    return text.lstrip("\\").startswith(_SANDBOX_HELP_MARKER)
+
+
 class _OrderedGroup(TyperGroup):
     """Lists commands in `_COMMAND_ORDER` rather than registration order.
 
@@ -59,6 +81,51 @@ def parse_args(self, ctx: ClickContext, args: list[str]) -> list[str]:
         ctx.meta[argscan.RAW_ARGS_META_KEY] = list(args)
         return super().parse_args(ctx, args)
 
+    def _sandbox_surface(self, ctx: ClickContext) -> list[TyperOption | ClickCommand]:
+        """The sandbox root flags and ``[sandbox]`` commands — the surface to hide."""
+        flags: list[TyperOption | ClickCommand] = [
+            param
+            for param in self.get_params(ctx)
+            if isinstance(param, TyperOption) and param.name in _SANDBOX_ROOT_FLAGS
+        ]
+        commands = [
+            command
+            for name in self.list_commands(ctx)
+            if (command := self.get_command(ctx, name)) is not None and _is_sandbox_command(command)
+        ]
+        return [*flags, *commands]
+
+    @contextlib.contextmanager
+    def _sandbox_surface_hidden(self, ctx: ClickContext) -> Generator[None]:
+        """Mark the sandbox flags/commands ``hidden`` for one render, then restore.
+
+        Restored in ``finally``: the parameter/command objects are process-global
+        (one Typer tree per process), so a leaked ``hidden=True`` would wrongly hide
+        the sandbox surface from a later in-process render or from shell completion.
+        """
+        targets = self._sandbox_surface(ctx)
+        saved = [(target, target.hidden) for target in targets]
+        for target in targets:
+            target.hidden = True
+        try:
+            yield
+        finally:
+            for target, was_hidden in saved:
+                target.hidden = was_hidden
+
+    def format_help(self, ctx: ClickContext, formatter: ClickHelpFormatter) -> None:
+        """Render `assembly --help`, hiding the sandbox surface from external accounts.
+
+        The sandbox runs on internal infrastructure, so its flags and commands are
+        noise (and a dead end) for an external account — show them only to an
+        AssemblyAI login. Internal users get the full surface unchanged.
+        """
+        if access.profile_is_internal():
+            super().format_help(ctx, formatter)
+            return
+        with self._sandbox_surface_hidden(ctx):
+            super().format_help(ctx, formatter)
+
 
 # Brand-retint Typer's help palette, pin help-table columns against clipping, make
 # Typer's consoles pipe-safe, fix Click's error formatting, and trim the completion
@@ -108,6 +175,36 @@ def _sandbox_conflict_warning(sandbox: bool, env: str | None) -> str | None:
     return None
 
 
+def _enforce_internal_env(
+    ctx: typer.Context, state: AppState, active_env: Environment, *, json_mode: bool
+) -> None:
+    """Reject an internal-only environment for a profile that isn't an AssemblyAI account.
+
+    The sandbox runs on internal infrastructure an external account can neither reach
+    nor authenticate against, so selecting it (via --sandbox / --env / AAI_ENV) fails
+    here with a clean error instead of a confusing downstream auth failure. ``login``
+    is exempt: a first-time employee must be able to target the sandbox to sign in
+    there, which is what records the email this gate then reads.
+    """
+    if active_env.name == environments.DEFAULT_ENV:
+        return
+    if ctx.invoked_subcommand == "login":
+        return
+    if access.profile_is_internal(state.resolve_profile()):
+        return
+    err = CLIError(
+        f"The {active_env.name} environment is restricted to AssemblyAI accounts.",
+        error_type="restricted_environment",
+        exit_code=2,
+        suggestion=(
+            "Drop --sandbox/--env (and unset AAI_ENV) to use production, or run "
+            "'assembly login' with an AssemblyAI account."
+        ),
+    )
+    output.emit_error(err, json_mode=json_mode)
+    raise typer.Exit(code=err.exit_code)
+
+
 def _offer_or_help(ctx: typer.Context, state: AppState) -> None:
     """No subcommand given: offer guided setup to a credential-less, interactive user;
     otherwise print help. Never prompts in a non-interactive session, never on
@@ -216,6 +313,7 @@ def main(
             raise typer.Exit(code=env_err.exit_code) from None
     active_env = environments.active()
     _LOG.debug("environment: %s (%s)", active_env.name, active_env.api_base)
+    _enforce_internal_env(ctx, state, active_env, json_mode=json_mode)
     for warning in (conflict_warning, state.env_override_warning()):
         if warning and not quiet:
             # Surfaced in JSON mode too (as {"warning": …}), so a `--json` pipeline gets

diff --git a/scripts/generated_code_compile_gate.py b/scripts/generated_code_compile_gate.py
@@ -6,11 +6,22 @@
 
 from typer.testing import CliRunner
 
+from aai_cli.core import access
 from aai_cli.main import app
 
 _ARG_COUNT = 2
 _USAGE_EXIT = 2
 
+
+def _force_internal_account() -> None:
+    """Run as an AssemblyAI login so the sandbox-only `--show-code` cases aren't gated.
+
+    The root callback restricts sandbox environments to internal logins; this gate
+    only compiles generated code, not the access check, so stub the predicate True.
+    """
+    access.profile_is_internal = lambda *_args, **_kwargs: True
+
+
 # Compile exactly what `assembly … --show-code > script.py` would capture: stdout
 # only (stderr carries human chrome like warnings), with telemetry disabled so a
 # gate run never mints a device id or spawns a flusher on the host.
@@ -39,6 +50,7 @@ def main() -> int:
         return _USAGE_EXIT
     out_dir = Path(sys.argv[1])
     out_dir.mkdir(parents=True, exist_ok=True)
+    _force_internal_account()
 
     transcribe_config = out_dir / "transcribe-config.json"
     transcribe_config.write_text(

diff --git a/tests/AGENTS.md b/tests/AGENTS.md
@@ -54,6 +54,19 @@ Lessons that cost iterations getting the patch-coverage and mutation tail gates
   cost a PR three CI rounds. Don't fight it: a local green is now a CI green for output tests.
   A test that genuinely needs a different width passes it on the call
   (`runner.invoke(app, argv, env={"COLUMNS": "300"})`), which overrides the default.
+- **Never `"--flag" in result.output` on Rich/help output — CI colorizes it and you cannot
+  turn that off from the test process.** Locally CliRunner captures to a non-tty so output is
+  plain and the check passes; in CI the render carries ANSI and Rich splits a flag's leading
+  dash into its own SGR span (`\x1b[..m-\x1b[..m-profile`), so `"--profile" in output` fails —
+  green locally, red in CI. The *worse* trap is the negative form: `"--sandbox" not in output`
+  passes **vacuously** against colored text, so a regression that re-exposes a flag sails
+  through CI undetected. This has bitten many PRs. Trying to disable color in `conftest`
+  (popping `FORCE_COLOR`, etc.) does **not** work — CI re-colors anyway, and the attempt only
+  masks the bug locally. The fix is to strip ANSI in the assertion: pass the output through
+  `tests._snapshot_surface.normalize` (what every `--help` snapshot test already does), then do
+  the `in` / `not in` checks against the plain text. A test that genuinely needs *colored*
+  output builds its own console (`theme.make_console(force_terminal=True, _environ={})`), never
+  the ambient env (see `test_color_mode.py` / `test_output.py`).
 - **Typer's `CliRunner` merges stderr into `result.output`, and not in call order**, so don't
   assume `splitlines()[-1]` is the command payload. In `--json` mode the env-mismatch warning
   is its own `{"warning": …}` line, so filter parsed lines by a key the payload carries

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -208,6 +208,18 @@ def memory_fs():
     MemoryFileSystem.pseudo_dirs[:] = [""]
 
 
+@pytest.fixture
+def internal_profile(monkeypatch):
+    """Make the active profile read as an AssemblyAI (internal) login.
+
+    The sandbox flags/commands are hidden from help and rejected at the root
+    callback for external accounts, so any test that drives `--sandbox` / a
+    sandbox-only command must run as an employee. Patches the predicate rather
+    than writing an email so it's independent of how a test sets up its config.
+    """
+    monkeypatch.setattr("aai_cli.core.access.profile_is_internal", lambda *a, **k: True)
+
+
 @pytest.fixture(autouse=True)
 def tmp_config(monkeypatch, tmp_path):
     cfg_dir = tmp_path / "config"

diff --git a/tests/test_agent_cascade_show_code.py b/tests/test_agent_cascade_show_code.py
@@ -8,12 +8,17 @@
 
 from __future__ import annotations
 
+import pytest
 from typer.testing import CliRunner
 
 from aai_cli.commands.agent_cascade import _exec
 from aai_cli.core import config
 from aai_cli.main import app
 
+# The cascade is sandbox-only and its happy paths run under `--sandbox`, which the
+# root callback restricts to AssemblyAI logins — run the module as an employee.
+pytestmark = pytest.mark.usefixtures("internal_profile")
+
 runner = CliRunner()
 
 

diff --git a/tests/test_caption_command.py b/tests/test_caption_command.py
@@ -8,6 +8,7 @@
 import re
 from pathlib import Path
 
+import pytest
 from typer.testing import CliRunner
 
 from aai_cli.commands.caption import _exec as caption_exec
@@ -117,6 +118,7 @@ def test_caption_json_error_shape(tmp_path, monkeypatch):
     assert err["error"]["type"] == "file_not_found"
 
 
+@pytest.mark.usefixtures("internal_profile")  # dub is sandbox-only, hidden from external help
 def test_caption_is_listed_between_dub_and_eval_in_root_help():
     # Pins caption's slot in _COMMAND_ORDER: it renders in the "Run AssemblyAI"
     # panel after dub, not alphabetically at the end of the help.