Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .gitleaks.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,17 @@
useDefault = true

[allowlist]
description = "Fake placeholder API keys used only in tests and planning docs"
description = "Fake placeholder keys (tests/docs) + gitignored per-developer agent settings"
regexTarget = "match"
regexes = [
'''sk_abcdef1234''',
'''sk_zzzzzz9999''',
]
# `gitleaks dir` scans the working tree regardless of .gitignore, so high-entropy values
# in a developer's gitignored `.claude/settings.local.json` (a personal Claude Code file
# that never enters the repo) would fail the *local* gate while CI — which lacks the file
# — passes. Exclude the path so local runs match CI. Tracked `.claude/` files
# (settings.json, agents/, skills/) are not matched and stay scanned.
paths = [
'''\.claude/settings\.local\.json$''',
]
26 changes: 26 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,32 @@ Lessons that cost time in agent sessions — read before exercising `uv run aai`
blocking path can't wedge the session. For pytest, `--timeout N` (pytest-timeout, in the
dev group) does the same per-test.

### Replay fixtures (offline end-to-end coverage)

`tests/test_replay_e2e.py` drives whole commands (`transcribe`/`transcripts`/`llm`/
`balance`/`usage`/`limits`) against **real** API responses recorded once and replayed
offline — the command's own parsing/rendering runs, but pytest-socket stays armed, so
these live in the default suite. Three moving parts:

- **`tests/fixtures/api/*.json`** — scrubbed snapshots (API key/JWT redacted, `email` and
`account_id` faked, private `cdn.assemblyai.com/upload/…` URLs redacted). Committed and
gitleaks-clean; treat them like syrupy snapshots (regenerate, don't hand-edit).
- **`scripts/record_fixtures.py`** — the recorder. It is **deliberately outside the gate**
(it hits the network) and is *not* mypy/pyright-checked (only ruff covers `scripts/`).
Refresh after an API shape change: `ASSEMBLYAI_API_KEY=… uv run python scripts/record_fixtures.py`.
The key comes from the env; the AMS session JWT + `account_id` from the keyring/`config.toml`
of whoever ran `aai login` (profile `default`) — neither is ever written to a fixture.
- **`tests/replay_fixtures.py`** — rebuilds the boundary objects from JSON. A transcript is a
real `aai.Transcript` via `Transcript.from_response`; an LLM response is rebuilt with
`ChatCompletion.model_construct` (**not** `model_validate`) because the gateway returns
Anthropic-flavored fields — `finish_reason="end_turn"`, token counts under
`input_tokens`/`output_tokens` — that strict validation rejects but the OpenAI SDK itself
parses leniently.

The replay tests patch the same boundary the unit tests do
(`commands.<cmd>.client.<fn>` / `.ams.<fn>` / `.gateway.complete`); the only difference is
the return value comes from a recorded payload instead of a hand-built mock.

## Naming & packaging gotchas

- The **package/module** is `aai_cli`; the **distribution** name is `aai-cli`; the **console command** is `aai` (`[project.scripts] aai = "aai_cli.main:run"`).
Expand Down
8 changes: 5 additions & 3 deletions aai_cli/commands/share.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@


def _cloudflared_install_hint() -> str:
if sys.platform == "darwin":
return "Install it: brew install cloudflared"
return f"Install it: {_CLOUDFLARED_DOCS}"
# A ternary (not an if/return) so neither branch reads as unreachable under
# mypy --warn-unreachable, which targets one platform at a time: on macOS the
# second return looked dead, on Linux the first would.
hint = "brew install cloudflared" if sys.platform == "darwin" else _CLOUDFLARED_DOCS
return f"Install it: {hint}"


def _require_cloudflared() -> None:
Expand Down
176 changes: 176 additions & 0 deletions scripts/record_fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
#!/usr/bin/env python
"""Record real AssemblyAI API responses as scrubbed JSON fixtures for replay tests.

This is a *manual* tool, deliberately outside the test suite and the gate: it reaches
the real network. It drives the same `client.* / llm.* / ams.*` functions the CLI uses,
then serializes each result to ``tests/fixtures/api/`` with every credential scrubbed,
so the committed fixtures carry no secrets (the gate's gitleaks scan would catch any
that slipped through).

Usage::

ASSEMBLYAI_API_KEY=<key> uv run python scripts/record_fixtures.py

The API key is read from the environment; the AMS session (JWT) is read from the OS
keyring of whoever ran ``aai login`` (profile ``default``). Neither is ever written to
a fixture. Re-run it to refresh the fixtures after an API shape change.
"""

from __future__ import annotations

import json
import os
import sys
from collections.abc import Callable
from datetime import UTC, datetime
from pathlib import Path
from typing import Any

import assemblyai as aai

from aai_cli import client, config, environments, llm
from aai_cli.auth import ams
from aai_cli.errors import CLIError

FIXTURE_DIR = Path(__file__).resolve().parent.parent / "tests" / "fixtures" / "api"
PROFILE = "default"

# Stable placeholders substituted for real secrets/identifiers on the way out. Replay
# tests assert against these, so they are part of the fixture contract.
FAKE_ACCOUNT_ID = 12345
FAKE_EMAIL = "user@example.com"
REDACTED = "REDACTED"
UPLOAD_PREFIX = "https://cdn.assemblyai.com/upload/"
# API responses are shallow; a deeper structure means malformed/hostile input, so cap
# the recursion rather than risk a stack overflow on a pathologically nested payload.
_MAX_SCRUB_DEPTH = 100


def _scrub_str(value: str, secret_set: set[str]) -> str:
"""Redact a string value: a known secret becomes ``REDACTED``; an account upload
URL keeps its prefix but drops the high-entropy hash so no private audio leaks."""
if value in secret_set:
return REDACTED
if value.startswith(UPLOAD_PREFIX):
return UPLOAD_PREFIX + REDACTED
return value


def _build_scrubber(secrets: list[str]) -> Callable[[Any], Any]:
"""A recursive scrubber that redacts known secret strings and identifying keys.

``secrets`` are exact string values (the API key, session JWT/token) replaced with
``REDACTED`` wherever they appear as a value. Keys named ``email``/``account_id``
are replaced with stable fakes regardless of value, so the committed fixtures are
inert but still shaped exactly like the real responses.
"""
secret_set = {s for s in secrets if s}

def scrub(obj: Any, depth: int = 0) -> Any:
if depth > _MAX_SCRUB_DEPTH:
raise CLIError(
f"Fixture nesting exceeded {_MAX_SCRUB_DEPTH} levels; refusing to scrub."
)
if isinstance(obj, dict):
out: dict[str, Any] = {}
for key, value in obj.items():
if key == "email":
out[key] = FAKE_EMAIL
elif key == "account_id":
out[key] = FAKE_ACCOUNT_ID
else:
out[key] = scrub(value, depth + 1)
return out
if isinstance(obj, list):
return [scrub(item, depth + 1) for item in obj]
if isinstance(obj, str):
return _scrub_str(obj, secret_set)
return obj

return scrub


def _out(message: str) -> None:
sys.stdout.write(message + "\n")


def _err(message: str) -> None:
sys.stderr.write(message + "\n")


def _write(name: str, payload: object, scrub: Callable[[Any], Any]) -> None:
FIXTURE_DIR.mkdir(parents=True, exist_ok=True)
path = FIXTURE_DIR / f"{name}.json"
path.write_text(json.dumps(scrub(payload), indent=2) + "\n")
_out(f" wrote {path.relative_to(Path.cwd())}")


def _transcript_payload(transcript: aai.Transcript) -> dict[str, object]:
"""The raw API ``json_response`` for a transcript — what get_by_id would parse."""
payload = client.transcript_json_payload(transcript)
return dict(payload)


def main() -> int:
environments.set_active(environments.get(environments.DEFAULT_ENV))

api_key = os.environ.get("ASSEMBLYAI_API_KEY")
if not api_key:
_err("ASSEMBLYAI_API_KEY is not set.")
return 1

session = config.get_session(PROFILE)
account_id = config.get_account_id(PROFILE)
if session is None or account_id is None:
_err(f"No AMS session for profile {PROFILE!r}; run 'aai login' first.")
return 1
jwt = session["jwt"]
scrub = _build_scrubber([api_key, jwt, session.get("token", "")])

# (name, thunk) — each runs independently so one failure (e.g. an LLM entitlement
# block) doesn't lose the others. The sample transcript's id feeds the get fixture.
_out(f"Recording fixtures into {FIXTURE_DIR}")

sample = client.transcribe(api_key, client.SAMPLE_AUDIO_URL, config=aai.TranscriptionConfig())
_write("transcribe_sample", _transcript_payload(sample), scrub)
sample_id = sample.id
if sample_id is None: # a completed transcript always has an id; guard for the type checker
raise CLIError("Transcribe returned no transcript id.")

_write("transcripts_list", client.list_transcripts(api_key, limit=10), scrub)

got = client.get_transcript(api_key, sample_id)
_write("transcript_get", _transcript_payload(got), scrub)

today = datetime.now(UTC).date()
start = datetime(today.year, today.month, 1, tzinfo=UTC).isoformat()
end = datetime(today.year, today.month, today.day, tzinfo=UTC).isoformat()

jobs: list[tuple[str, Callable[[], object]]] = [
("account_balance", lambda: ams.get_balance(jwt)),
("account_usage", lambda: ams.get_usage(jwt, start, end, "day")),
("account_limits", lambda: ams.get_rate_limits(account_id, jwt)),
(
"llm_complete",
lambda: llm.complete(
api_key,
model=llm.DEFAULT_MODEL,
messages=llm.build_messages("Reply with exactly one word: PONG"),
max_tokens=16,
).model_dump(mode="json"),
),
]
for name, thunk in jobs:
try:
_write(name, thunk(), scrub)
except CLIError as exc:
# The client/ams/llm wrappers funnel every expected network failure into a
# CLIError, so a blocked LLM entitlement skips just that fixture.
_err(f" SKIP {name}: {exc}")

_out("Done.")
return 0


if __name__ == "__main__":
raise SystemExit(main())
5 changes: 5 additions & 0 deletions tests/fixtures/api/account_balance.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"account_id": 12345,
"metronome_customer_id": "cb695cb4-804e-46fe-88f8-83c1d1b3f057",
"balance_in_cents": 87958.0
}
3 changes: 3 additions & 0 deletions tests/fixtures/api/account_limits.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"rate_limits": []
}
Loading
Loading