From b628ccb868adef7cfd316fb8d6e0c17e20dd3ea3 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 16 Jun 2026 20:57:28 +0000
Subject: [PATCH] Let `assembly llm` read file arguments as the prompt's
 context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Accept file paths after the prompt; each is read, prefixed with a
`===== <name> =====` header, and concatenated as the prompt's context,
then sent in a single gateway call. This collapses the common
shell-glue pattern of `cat`-ing notes into a pipe — e.g.
`assembly llm "... Question: ..." notes/*.md` instead of building a
file array and piping it in.

Precedence for one-shot input is now --transcript-id > files > stdin;
a higher-priority source present alongside a lower one ignores the
lower with a visible warning (suppressed by --quiet, structured under
--json). A missing/unreadable path is a usage error raised before auth
or network. File arguments are rejected in --follow mode, which runs
over live stdin.

Split the --follow live-mode tests out of test_llm_command.py into
test_llm_follow.py to keep both files under the 500-line gate.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01WMZ8fCo2D2giFUfkwc4jUa
---
 README.md                                     |   8 +-
 aai_cli/commands/llm/__init__.py              |  12 +
 aai_cli/commands/llm/_exec.py                 |  94 ++++--
 .../test_snapshots_help_run.ambr              |  10 +-
 tests/test_llm_command.py                     | 287 ++++++++----------
 tests/test_llm_follow.py                      | 197 ++++++++++++
 6 files changed, 422 insertions(+), 186 deletions(-)
 create mode 100644 tests/test_llm_follow.py
diff --git a/README.md b/README.md
index 242008a8..d9fa3e78 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,7 @@ That's it. Run `assembly onboard` for a guided tour, or see [Installation](#-ins
 | `assembly agent` | Full-duplex spoken conversation with a voice agent, right in your terminal |
 | `assembly agent-cascade` | Same live conversation, but wired client-side from Streaming STT + the LLM Gateway + streaming TTS, like the `agent-cascade` starter (sandbox-only) |
 | `assembly speak` | Synthesize text to speech over the streaming-TTS WebSocket (sandbox-only) |
-| `assembly llm` | Prompt the LLM Gateway over a transcript, stdin, or a live stream |
+| `assembly llm` | Prompt the LLM Gateway over a transcript, files, stdin, or a live stream |
 | `assembly clip` | Cut audio/video with ffmpeg by diarized speaker, text match, LLM pick, or time range (`--video` keeps the picture for URL sources) — clip boundaries snap into nearby silence |
 | `assembly dub` | Re-voice an audio/video file or URL in another language: transcription, LLM translation, per-speaker TTS, ffmpeg track-swap (sandbox-only) |
 | `assembly caption` | Burn always-visible captions into a video: transcribe (or reuse a transcript), fetch SRT, ffmpeg burns it in — audio untouched |
@@ -300,6 +300,12 @@ ffmpeg -i talk.mp4 -f wav - | assembly transcribe -
 git log --oneline -30 | assembly llm "write release notes grouped by feature/fix"
 ```
 
+Pass files straight to `llm` instead of building the pipeline yourself — each is read, prefixed with a `===== name =====` header, and concatenated as the prompt's context (so the answer can cite which note it came from):
+
+```sh
+assembly llm "answer using only these notes: who owns the deploy?" notes/*.md
+```
+
 ## 📚 Documentation
 
 ### In the terminal
diff --git a/aai_cli/commands/llm/__init__.py b/aai_cli/commands/llm/__init__.py
index 8c1d5f15..7652119f 100644
--- a/aai_cli/commands/llm/__init__.py
+++ b/aai_cli/commands/llm/__init__.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from pathlib import Path
+
 import typer
 
 from aai_cli import command_registry, help_panels, options
@@ -41,6 +43,10 @@ def _list_models(output_field: choices.TextOrJson | None, json_mode: bool) -> No
                 'assembly llm "summarize the key decisions" --transcript-id 5551234-abcd',
             ),
             ("Pipe any text in", 'echo "meeting notes" | assembly llm "turn into action items"'),
+            (
+                "Read one or more files as context",
+                'assembly llm "answer using only these notes: who owns the deploy?" notes/*.md',
+            ),
             (
                 "Pick a model and add a system prompt",
                 'assembly llm "draft a follow-up email" --model claude-opus-4-7 --system "Be concise."',
@@ -52,6 +58,11 @@ def _list_models(output_field: choices.TextOrJson | None, json_mode: bool) -> No
 def llm(
     ctx: typer.Context,
     prompt: str | None = typer.Argument(None, help="The prompt to send to the model"),
+    files: list[Path] | None = typer.Argument(
+        None,
+        help="Optional input files to read as the prompt's context (each is header-prefixed "
+        "with its name and concatenated; takes priority over piped stdin)",
+    ),
     # Note: text piped on stdin is injected into the prompt (e.g. `cat notes | assembly llm "summarize"`).
     model: str = typer.Option(
         gateway.DEFAULT_MODEL,
@@ -103,6 +114,7 @@ def llm(
 
     opts = llm_exec.LlmOptions(
         prompt=prompt,
+        files=tuple(files or ()),
         model=model,
         transcript_id=transcript_id,
         system=system,
diff --git a/aai_cli/commands/llm/_exec.py b/aai_cli/commands/llm/_exec.py
index 29934dd0..af3dd3da 100644
--- a/aai_cli/commands/llm/_exec.py
+++ b/aai_cli/commands/llm/_exec.py
@@ -10,6 +10,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
+from pathlib import Path
 
 from rich.markup import escape
 
@@ -44,10 +45,15 @@ class LlmOptions:
     max_tokens: int
     # Raw --config KEY=VALUE pairs; parsed (and validated) once in run_llm.
     config_kv: tuple[str, ...] = ()
+    # Input files read as the prompt's context (header-prefixed, concatenated).
+    files: tuple[Path, ...] = ()
 
 
 def _validate_follow_args(
-    prompt: str | None, output_field: str | None, transcript_id: str | None
+    prompt: str | None,
+    output_field: str | None,
+    transcript_id: str | None,
+    files: tuple[Path, ...],
 ) -> str:
     """Reject flag combinations that don't apply to --follow's live-panel mode.
 
@@ -65,36 +71,84 @@ def _validate_follow_args(
             "--follow runs over live transcript text piped on stdin; it can't be "
             "combined with --transcript-id."
         )
+    if files:
+        raise UsageError(
+            "--follow runs over live transcript text piped on stdin; it can't be "
+            "combined with file arguments."
+        )
     if not stdio.stdin_is_piped():
         raise UsageError(_FOLLOW_STDIN_MESSAGE)
     return prompt
 
 
-def _stdin_transcript_text(
-    state: AppState, transcript_id: str | None, *, json_mode: bool
+def _read_files(files: tuple[Path, ...]) -> str:
+    """Read each file and join them, each prefixed with a ``===== name =====`` header.
+
+    The header names each source (the file's stem) so a multi-file prompt can cite
+    which note an answer came from; it's applied uniformly, even for a single file,
+    so the format the model sees is predictable. A missing or unreadable path is a
+    usage error raised before any auth or network — the same fail-fast ordering as
+    the --transcript-id check.
+    """
+    sections: list[str] = []
+    for path in files:
+        try:
+            text = path.read_text(encoding="utf-8")
+        except OSError as exc:
+            raise UsageError(
+                f"Couldn't read {path}: {exc.strerror or exc}.",
+                suggestion="Check the path points at a readable file.",
+            ) from exc
+        sections.append(f"===== {path.stem} =====\n{text}")
+    return "\n\n".join(sections)
+
+
+def _input_text(
+    state: AppState, transcript_id: str | None, files: tuple[Path, ...], *, json_mode: bool
 ) -> str | None:
-    """Resolve the inline transcript text for one-shot mode.
+    """Resolve the inline text the prompt operates on for one-shot mode.
 
-    Text piped on stdin becomes the content the prompt operates on, unless an
-    explicit --transcript-id is given — that injects server-side and takes
-    priority, so piped text is ignored with a visible warning (suppressed by
-    --quiet, structured under --json).
+    Three possible sources, in priority order: an explicit --transcript-id (injected
+    server-side, so this returns None), one or more file arguments (read and
+    concatenated), or text piped on stdin. A higher-priority source present alongside
+    a lower one ignores the lower with a visible warning (suppressed by --quiet,
+    structured under --json).
     """
-    if transcript_id is None:
-        return stdio.piped_stdin_text()
-    # Same cheap local id check as `transcripts get`, before auth or network.
-    client.validate_transcript_id(transcript_id)
-    if stdio.stdin_is_piped() and not state.quiet:
-        output.emit_warning(
-            "Ignoring piped stdin; --transcript-id takes priority.", json_mode=json_mode
-        )
-    return None
+    if transcript_id is not None:
+        # Same cheap local id check as `transcripts get`, before auth or network.
+        client.validate_transcript_id(transcript_id)
+        ignored = _ignored_sources(files, stdio.stdin_is_piped())
+        if ignored and not state.quiet:
+            output.emit_warning(
+                f"Ignoring {ignored}; --transcript-id takes priority.", json_mode=json_mode
+            )
+        return None
+    if files:
+        if stdio.stdin_is_piped() and not state.quiet:
+            output.emit_warning(
+                "Ignoring piped stdin; file arguments take priority.", json_mode=json_mode
+            )
+        return _read_files(files)
+    return stdio.piped_stdin_text()
+
+
+def _ignored_sources(files: tuple[Path, ...], stdin_piped: bool) -> str | None:
+    """Name the lower-priority input sources present alongside --transcript-id, for the
+    warning — or None when there's nothing to ignore."""
+    sources: list[str] = []
+    if files:
+        sources.append("file arguments")
+    if stdin_piped:
+        sources.append("piped stdin")
+    return " and ".join(sources) or None
 
 
 def _run_follow(
     opts: LlmOptions, state: AppState, extra: dict[str, object], *, json_mode: bool
 ) -> None:
-    prompt_text = _validate_follow_args(opts.prompt, opts.output_field, opts.transcript_id)
+    prompt_text = _validate_follow_args(
+        opts.prompt, opts.output_field, opts.transcript_id, opts.files
+    )
     api_key = state.resolve_api_key()
 
     def ask(transcript_text: str) -> str:
@@ -131,13 +185,13 @@ def _run_oneshot(
             suggestion="Or pass --list-models to see available models.",
         )
     prompt_text = opts.prompt
-    stdin_text = _stdin_transcript_text(state, opts.transcript_id, json_mode=json_mode)
+    input_text = _input_text(state, opts.transcript_id, opts.files, json_mode=json_mode)
     api_key = state.resolve_api_key()
     messages = gateway.build_messages(
         prompt_text,
         system=opts.system,
         transcript_id=opts.transcript_id,
-        transcript_text=stdin_text,
+        transcript_text=input_text,
     )
     response = gateway.complete(
         api_key,
diff --git a/tests/__snapshots__/test_snapshots_help_run.ambr b/tests/__snapshots__/test_snapshots_help_run.ambr
index 1d2d4cd6..af616dc2 100644
--- a/tests/__snapshots__/test_snapshots_help_run.ambr
+++ b/tests/__snapshots__/test_snapshots_help_run.ambr
@@ -570,7 +570,7 @@
 # name: test_command_help_matches_snapshot[llm]
   '''
   
-   Usage: assembly llm [OPTIONS] [PROMPT]
+   Usage: assembly llm [OPTIONS] [PROMPT] [FILES]...
   
    Send a prompt to AssemblyAI's LLM Gateway and print the reply
   
@@ -579,7 +579,10 @@
    --transcript-id ID).
   
   ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
-  │   prompt      [PROMPT]  The prompt to send to the model                      │
+  │   prompt      [PROMPT]    The prompt to send to the model                    │
+  │   files       [FILES]...  Optional input files to read as the prompt's       │
+  │                           context (each is header-prefixed with its name and │
+  │                           concatenated; takes priority over piped stdin)     │
   ╰──────────────────────────────────────────────────────────────────────────────╯
   ╭─ Options ────────────────────────────────────────────────────────────────────╮
   │ --model                  TEXT                  LLM Gateway model             │
@@ -619,6 +622,9 @@
    $ assembly llm "summarize the key decisions" --transcript-id 5551234-abcd
    Pipe any text in
    $ echo "meeting notes" | assembly llm "turn into action items"
+   Read one or more files as context
+   $ assembly llm "answer using only these notes: who owns the deploy?"
+   notes/*.md
    Pick a model and add a system prompt
    $ assembly llm "draft a follow-up email" --model claude-opus-4-7 --system "Be
    concise."
diff --git a/tests/test_llm_command.py b/tests/test_llm_command.py
index 8456c73a..fdfea7ba 100644
--- a/tests/test_llm_command.py
+++ b/tests/test_llm_command.py
@@ -128,6 +128,130 @@ def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, e
     assert seen["transcript_id"] is None
 
 
+def test_llm_reads_file_argument_as_context(monkeypatch, tmp_path):
+    _auth()
+    seen = {}
+
+    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None):
+        seen["content"] = messages[0]["content"]
+        seen["transcript_id"] = transcript_id
+        return _payload("done")
+
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
+    note = tmp_path / "alpha.md"
+    note.write_text("bob owns the deploy")
+    result = runner.invoke(app, ["llm", "who owns the deploy?", str(note), "--json"])
+    assert result.exit_code == 0
+    # The file content is injected, under a header naming the file's stem.
+    assert "who owns the deploy?" in seen["content"]
+    assert "bob owns the deploy" in seen["content"]
+    assert "===== alpha =====" in seen["content"]
+    assert seen["transcript_id"] is None
+
+
+def test_llm_concatenates_multiple_files_with_headers_in_order(monkeypatch, tmp_path):
+    _auth()
+    seen = {}
+
+    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None):
+        seen["content"] = messages[0]["content"]
+        return _payload("done")
+
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
+    first = tmp_path / "first.md"
+    first.write_text("ship friday")
+    second = tmp_path / "second.md"
+    second.write_text("freeze monday")
+    result = runner.invoke(app, ["llm", "summarize", str(first), str(second), "--json"])
+    assert result.exit_code == 0
+    content = seen["content"]
+    assert "===== first =====" in content
+    assert "===== second =====" in content
+    assert "ship friday" in content
+    assert "freeze monday" in content
+    # Both note bodies appear under their own header, in the order passed.
+    assert content.index("===== first =====") < content.index("===== second =====")
+    assert content.index("ship friday") < content.index("freeze monday")
+
+
+def test_llm_files_take_priority_over_stdin(monkeypatch, tmp_path):
+    _auth()
+    seen = {}
+
+    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None):
+        seen["content"] = messages[0]["content"]
+        return _payload("done")
+
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
+    note = tmp_path / "note.md"
+    note.write_text("from the file")
+    result = runner.invoke(
+        app, ["llm", "summarize", str(note)], input="from stdin, should be ignored"
+    )
+    assert result.exit_code == 0
+    assert "from the file" in seen["content"]
+    assert "from stdin, should be ignored" not in seen["content"]
+    assert "Ignoring piped stdin; file arguments take priority." in result.output
+
+
+def test_llm_missing_file_exits_2_without_network(monkeypatch, tmp_path):
+    # A bad path (e.g. an unmatched shell glob passed through literally) is a usage
+    # error raised before auth or the gateway, not a crash.
+    _auth()
+    monkeypatch.setattr(
+        "aai_cli.commands.llm.gateway.complete",
+        lambda *a, **k: (_ for _ in ()).throw(AssertionError("must not call the gateway")),
+    )
+    missing = tmp_path / "nope.md"
+    result = runner.invoke(app, ["llm", "summarize", str(missing)])
+    assert result.exit_code == 2
+    assert "Couldn't read" in result.output
+    # The clean OS reason (errno's strerror) is shown, not the raw exception repr —
+    # so no "[Errno N] …: '/path'" bracket leaks into the message.
+    assert "[Errno" not in result.output
+
+
+def test_llm_files_with_terminal_stdin_emits_no_warning(monkeypatch, tmp_path):
+    # With files given and stdin a terminal (not piped), there's nothing being
+    # ignored, so the "Ignoring piped stdin" warning must not fire.
+    _auth()
+    monkeypatch.setattr("aai_cli.commands.llm._exec.stdio.stdin_is_piped", lambda: False)
+    seen = {}
+
+    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None):
+        seen["content"] = messages[0]["content"]
+        return _payload("done")
+
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
+    note = tmp_path / "note.md"
+    note.write_text("only the file")
+    result = runner.invoke(app, ["llm", "summarize", str(note)])
+    assert result.exit_code == 0
+    assert "only the file" in seen["content"]
+    assert "Ignoring piped stdin" not in result.output
+
+
+def test_llm_transcript_id_takes_priority_over_files(monkeypatch, tmp_path):
+    _auth()
+    seen = {}
+    # Pin stdin to a terminal so only the file argument is the ignored source.
+    monkeypatch.setattr("aai_cli.commands.llm._exec.stdio.stdin_is_piped", lambda: False)
+
+    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None):
+        seen["content"] = messages[0]["content"]
+        seen["transcript_id"] = transcript_id
+        return _payload("s")
+
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
+    note = tmp_path / "note.md"
+    note.write_text("file content here")
+    result = runner.invoke(app, ["llm", "summarize", str(note), "--transcript-id", "t_9"])
+    assert result.exit_code == 0
+    assert seen["transcript_id"] == "t_9"
+    assert "file content here" not in seen["content"]
+    assert "Ignoring file arguments; --transcript-id takes priority." in result.output
+
+
 def test_llm_transcript_id_takes_priority_over_stdin(monkeypatch):
     _auth()
     seen = {}
@@ -252,83 +376,6 @@ def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, e
     assert "Run the same command again" in result.output
 
 
-def test_llm_follow_summarizes_each_turn(monkeypatch):
-    _auth()
-    calls = []
-
-    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None):
-        calls.append(messages[-1]["content"])
-        return _payload(f"summary-{len(calls)}")
-
-    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
-    result = runner.invoke(
-        app,
-        ["llm", "summarize action items", "--follow", "--json"],
-        input="we ship friday\nbob owns the deploy\n",
-    )
-    assert result.exit_code == 0
-    updates = [json.loads(line) for line in result.output.splitlines() if line.strip()]
-    # One update per finalized turn, full transcript accumulating each time.
-    assert len(updates) == 2
-    assert "we ship friday" in calls[0]
-    assert "bob owns the deploy" not in calls[0]
-    assert "we ship friday" in calls[1]
-    assert "bob owns the deploy" in calls[1]
-    assert updates[-1]["output"] == "summary-2"
-    assert updates[-1]["turns"] == 2
-
-
-def test_llm_follow_includes_system_prompt(monkeypatch):
-    _auth()
-    seen = {}
-
-    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None):
-        seen["roles"] = [m["role"] for m in messages]
-        seen["system"] = messages[0]["content"]
-        return _payload("ok")
-
-    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
-    result = runner.invoke(
-        app,
-        ["llm", "summarize", "--follow", "--system", "You are a scribe", "--json"],
-        input="one turn\n",
-    )
-    assert result.exit_code == 0
-    assert seen["roles"][0] == "system"
-    assert seen["system"] == "You are a scribe"
-
-
-def test_llm_follow_rejects_transcript_id(monkeypatch):
-    _auth()
-    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload())
-    result = runner.invoke(
-        app,
-        ["llm", "summarize", "--follow", "--transcript-id", "t_1", "--json"],
-        input="x\n",
-    )
-    assert result.exit_code == 2
-    assert "transcript-id" in result.output
-
-
-def test_llm_follow_ignores_blank_lines(monkeypatch):
-    _auth()
-    calls = []
-
-    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None):
-        calls.append(messages[-1]["content"])
-        return _payload("ok")
-
-    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
-    result = runner.invoke(
-        app,
-        ["llm", "summarize", "--follow", "--json"],
-        input="first\n\n   \nsecond\n",
-    )
-    assert result.exit_code == 0
-    # Blank/whitespace-only lines don't trigger a call.
-    assert len(calls) == 2
-
-
 def test_llm_output_text_prints_raw_answer(monkeypatch):
     _auth()
     monkeypatch.setattr(
@@ -368,92 +415,6 @@ def test_llm_output_invalid_field_exits_2(monkeypatch):
     assert result.exit_code == 2
 
 
-def test_llm_output_with_follow_is_rejected(monkeypatch):
-    _auth()
-    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload())
-    result = runner.invoke(app, ["llm", "hi", "-f", "-o", "text"], input="x\n")
-    assert result.exit_code == 2
-    assert "one-shot" in result.output
-
-
-def test_llm_follow_requires_a_prompt(monkeypatch):
-    # --follow re-runs a prompt over each turn; with no prompt there's nothing to run.
-    _auth()
-    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload())
-    result = runner.invoke(app, ["llm", "--follow", "--json"], input="x\n")
-    assert result.exit_code == 2
-    assert "prompt" in result.output.lower()
-
-
-def test_llm_follow_requires_piped_stdin(monkeypatch):
-    # Interactively (no pipe) --follow would block forever; reject it with guidance.
-    _auth()
-    monkeypatch.setattr("aai_cli.commands.llm._exec.stdio.stdin_is_piped", lambda: False)
-    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload())
-    result = runner.invoke(app, ["llm", "summarize", "--follow", "--json"])
-    assert result.exit_code == 2
-    assert "stdin" in result.output.lower()
-
-
-def test_llm_follow_empty_stdin_exits_2(monkeypatch):
-    # `assembly llm -f "…" </dev/null` must not exit 0 silently: an empty pipe means the
-    # prompt never ran, which is a usage error, not a success.
-    _auth()
-    calls = []
-
-    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None):
-        calls.append(messages)
-        return _payload("ok")
-
-    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
-    result = runner.invoke(app, ["llm", "summarize", "--follow", "--json"], input="")
-    assert result.exit_code == 2
-    assert "--follow needs transcript text piped on stdin" in result.output
-    assert calls == []  # no API call was made
-
-
-def test_llm_follow_interrupt_before_first_turn_still_exits_0(monkeypatch):
-    # Ctrl-C before any turn arrives is the normal "stop watching" signal, not the
-    # empty-stdin usage error.
-    _auth()
-
-    class _InterruptIter:
-        def __iter__(self):
-            return self
-
-        def __next__(self):
-            raise KeyboardInterrupt
-
-    monkeypatch.setattr(
-        "aai_cli.commands.llm._exec.stdio.iter_piped_stdin_lines", lambda: _InterruptIter()
-    )
-    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload())
-    result = runner.invoke(app, ["llm", "summarize", "--follow", "--json"], input="")
-    assert result.exit_code == 0
-    assert "--follow needs transcript text piped on stdin" not in result.output
-
-
-def test_llm_follow_stops_cleanly_on_interrupt(monkeypatch):
-    _auth()
-    calls = []
-
-    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None):
-        calls.append(messages[-1]["content"])
-        if len(calls) == 2:
-            raise KeyboardInterrupt  # user hits Ctrl-C mid-meeting
-        return _payload("ok")
-
-    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
-    result = runner.invoke(
-        app, ["llm", "summarize", "--follow", "--json"], input="alpha\nbeta\ngamma\n"
-    )
-    # Ctrl-C is a normal stop, not an error.
-    assert result.exit_code == 0
-    updates = [json.loads(line) for line in result.output.splitlines() if line.strip()]
-    assert len(updates) == 1
-    assert updates[0]["turns"] == 1
-
-
 def test_llm_passes_model_and_max_tokens(monkeypatch):
     _auth()
     seen = {}
diff --git a/tests/test_llm_follow.py b/tests/test_llm_follow.py
new file mode 100644
index 00000000..9755eec8
--- /dev/null
+++ b/tests/test_llm_follow.py
@@ -0,0 +1,197 @@
+import json
+import types
+
+from typer.testing import CliRunner
+
+from aai_cli.core import config
+from aai_cli.main import app
+
+runner = CliRunner()
+
+
+def _auth():
+    config.set_api_key("default", "sk_live")
+
+
+def _payload(content="four"):
+    # Mimics the OpenAI SDK response object the command reads via content_of/usage_of.
+    # `usage` is a CompletionUsage-like model (model_dump), not a raw dict.
+    message = types.SimpleNamespace(role="assistant", content=content)
+    choice = types.SimpleNamespace(message=message, finish_reason="stop")
+    usage = types.SimpleNamespace(model_dump=lambda: {"total_tokens": 3})
+    return types.SimpleNamespace(choices=[choice], usage=usage)
+
+
+def test_llm_follow_summarizes_each_turn(monkeypatch):
+    _auth()
+    calls = []
+
+    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None):
+        calls.append(messages[-1]["content"])
+        return _payload(f"summary-{len(calls)}")
+
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
+    result = runner.invoke(
+        app,
+        ["llm", "summarize action items", "--follow", "--json"],
+        input="we ship friday\nbob owns the deploy\n",
+    )
+    assert result.exit_code == 0
+    updates = [json.loads(line) for line in result.output.splitlines() if line.strip()]
+    # One update per finalized turn, full transcript accumulating each time.
+    assert len(updates) == 2
+    assert "we ship friday" in calls[0]
+    assert "bob owns the deploy" not in calls[0]
+    assert "we ship friday" in calls[1]
+    assert "bob owns the deploy" in calls[1]
+    assert updates[-1]["output"] == "summary-2"
+    assert updates[-1]["turns"] == 2
+
+
+def test_llm_follow_includes_system_prompt(monkeypatch):
+    _auth()
+    seen = {}
+
+    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None):
+        seen["roles"] = [m["role"] for m in messages]
+        seen["system"] = messages[0]["content"]
+        return _payload("ok")
+
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
+    result = runner.invoke(
+        app,
+        ["llm", "summarize", "--follow", "--system", "You are a scribe", "--json"],
+        input="one turn\n",
+    )
+    assert result.exit_code == 0
+    assert seen["roles"][0] == "system"
+    assert seen["system"] == "You are a scribe"
+
+
+def test_llm_follow_rejects_transcript_id(monkeypatch):
+    _auth()
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload())
+    result = runner.invoke(
+        app,
+        ["llm", "summarize", "--follow", "--transcript-id", "t_1", "--json"],
+        input="x\n",
+    )
+    assert result.exit_code == 2
+    assert "transcript-id" in result.output
+
+
+def test_llm_follow_rejects_file_arguments(monkeypatch, tmp_path):
+    # --follow runs over live stdin; a file argument has no meaning there, so reject
+    # it rather than silently ignore it.
+    _auth()
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload())
+    note = tmp_path / "note.md"
+    note.write_text("x")
+    result = runner.invoke(app, ["llm", "summarize", str(note), "--follow", "--json"], input="x\n")
+    assert result.exit_code == 2
+    assert "file arguments" in result.output
+
+
+def test_llm_follow_ignores_blank_lines(monkeypatch):
+    _auth()
+    calls = []
+
+    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None):
+        calls.append(messages[-1]["content"])
+        return _payload("ok")
+
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
+    result = runner.invoke(
+        app,
+        ["llm", "summarize", "--follow", "--json"],
+        input="first\n\n   \nsecond\n",
+    )
+    assert result.exit_code == 0
+    # Blank/whitespace-only lines don't trigger a call.
+    assert len(calls) == 2
+
+
+def test_llm_output_with_follow_is_rejected(monkeypatch):
+    _auth()
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload())
+    result = runner.invoke(app, ["llm", "hi", "-f", "-o", "text"], input="x\n")
+    assert result.exit_code == 2
+    assert "one-shot" in result.output
+
+
+def test_llm_follow_requires_a_prompt(monkeypatch):
+    # --follow re-runs a prompt over each turn; with no prompt there's nothing to run.
+    _auth()
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload())
+    result = runner.invoke(app, ["llm", "--follow", "--json"], input="x\n")
+    assert result.exit_code == 2
+    assert "prompt" in result.output.lower()
+
+
+def test_llm_follow_requires_piped_stdin(monkeypatch):
+    # Interactively (no pipe) --follow would block forever; reject it with guidance.
+    _auth()
+    monkeypatch.setattr("aai_cli.commands.llm._exec.stdio.stdin_is_piped", lambda: False)
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload())
+    result = runner.invoke(app, ["llm", "summarize", "--follow", "--json"])
+    assert result.exit_code == 2
+    assert "stdin" in result.output.lower()
+
+
+def test_llm_follow_empty_stdin_exits_2(monkeypatch):
+    # `assembly llm -f "…" </dev/null` must not exit 0 silently: an empty pipe means the
+    # prompt never ran, which is a usage error, not a success.
+    _auth()
+    calls = []
+
+    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None):
+        calls.append(messages)
+        return _payload("ok")
+
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
+    result = runner.invoke(app, ["llm", "summarize", "--follow", "--json"], input="")
+    assert result.exit_code == 2
+    assert "--follow needs transcript text piped on stdin" in result.output
+    assert calls == []  # no API call was made
+
+
+def test_llm_follow_interrupt_before_first_turn_still_exits_0(monkeypatch):
+    # Ctrl-C before any turn arrives is the normal "stop watching" signal, not the
+    # empty-stdin usage error.
+    _auth()
+
+    class _InterruptIter:
+        def __iter__(self):
+            return self
+
+        def __next__(self):
+            raise KeyboardInterrupt
+
+    monkeypatch.setattr(
+        "aai_cli.commands.llm._exec.stdio.iter_piped_stdin_lines", lambda: _InterruptIter()
+    )
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload())
+    result = runner.invoke(app, ["llm", "summarize", "--follow", "--json"], input="")
+    assert result.exit_code == 0
+    assert "--follow needs transcript text piped on stdin" not in result.output
+
+
+def test_llm_follow_stops_cleanly_on_interrupt(monkeypatch):
+    _auth()
+    calls = []
+
+    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None):
+        calls.append(messages[-1]["content"])
+        if len(calls) == 2:
+            raise KeyboardInterrupt  # user hits Ctrl-C mid-meeting
+        return _payload("ok")
+
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
+    result = runner.invoke(
+        app, ["llm", "summarize", "--follow", "--json"], input="alpha\nbeta\ngamma\n"
+    )
+    # Ctrl-C is a normal stop, not an error.
+    assert result.exit_code == 0
+    updates = [json.loads(line) for line in result.output.splitlines() if line.strip()]
+    assert len(updates) == 1
+    assert updates[0]["turns"] == 1