From b628ccb868adef7cfd316fb8d6e0c17e20dd3ea3 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 16 Jun 2026 20:57:28 +0000 Subject: [PATCH] Let `assembly llm` read file arguments as the prompt's context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Accept file paths after the prompt; each is read, prefixed with a `===== =====` header, and concatenated as the prompt's context, then sent in a single gateway call. This collapses the common shell-glue pattern of `cat`-ing notes into a pipe — e.g. `assembly llm "... Question: ..." notes/*.md` instead of building a file array and piping it in. Precedence for one-shot input is now --transcript-id > files > stdin; a higher-priority source present alongside a lower one ignores the lower with a visible warning (suppressed by --quiet, structured under --json). A missing/unreadable path is a usage error raised before auth or network. File arguments are rejected in --follow mode, which runs over live stdin. Split the --follow live-mode tests out of test_llm_command.py into test_llm_follow.py to keep both files under the 500-line gate. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01WMZ8fCo2D2giFUfkwc4jUa --- README.md | 8 +- aai_cli/commands/llm/__init__.py | 12 + aai_cli/commands/llm/_exec.py | 94 ++++-- .../test_snapshots_help_run.ambr | 10 +- tests/test_llm_command.py | 287 ++++++++---------- tests/test_llm_follow.py | 197 ++++++++++++ 6 files changed, 422 insertions(+), 186 deletions(-) create mode 100644 tests/test_llm_follow.py diff --git a/README.md b/README.md index 242008a8..d9fa3e78 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ That's it. Run `assembly onboard` for a guided tour, or see [Installation](#-ins | `assembly agent` | Full-duplex spoken conversation with a voice agent, right in your terminal | | `assembly agent-cascade` | Same live conversation, but wired client-side from Streaming STT + the LLM Gateway + streaming TTS, like the `agent-cascade` starter (sandbox-only) | | `assembly speak` | Synthesize text to speech over the streaming-TTS WebSocket (sandbox-only) | -| `assembly llm` | Prompt the LLM Gateway over a transcript, stdin, or a live stream | +| `assembly llm` | Prompt the LLM Gateway over a transcript, files, stdin, or a live stream | | `assembly clip` | Cut audio/video with ffmpeg by diarized speaker, text match, LLM pick, or time range (`--video` keeps the picture for URL sources) — clip boundaries snap into nearby silence | | `assembly dub` | Re-voice an audio/video file or URL in another language: transcription, LLM translation, per-speaker TTS, ffmpeg track-swap (sandbox-only) | | `assembly caption` | Burn always-visible captions into a video: transcribe (or reuse a transcript), fetch SRT, ffmpeg burns it in — audio untouched | @@ -300,6 +300,12 @@ ffmpeg -i talk.mp4 -f wav - | assembly transcribe - git log --oneline -30 | assembly llm "write release notes grouped by feature/fix" ``` +Pass files straight to `llm` instead of building the pipeline yourself — each is read, prefixed with a `===== name =====` header, and concatenated as the prompt's context (so the answer can cite which note it came from): + +```sh +assembly llm "answer using only these notes: who owns the deploy?" notes/*.md +``` + ## 📚 Documentation ### In the terminal diff --git a/aai_cli/commands/llm/__init__.py b/aai_cli/commands/llm/__init__.py index 8c1d5f15..7652119f 100644 --- a/aai_cli/commands/llm/__init__.py +++ b/aai_cli/commands/llm/__init__.py @@ -1,5 +1,7 @@ from __future__ import annotations +from pathlib import Path + import typer from aai_cli import command_registry, help_panels, options @@ -41,6 +43,10 @@ def _list_models(output_field: choices.TextOrJson | None, json_mode: bool) -> No 'assembly llm "summarize the key decisions" --transcript-id 5551234-abcd', ), ("Pipe any text in", 'echo "meeting notes" | assembly llm "turn into action items"'), + ( + "Read one or more files as context", + 'assembly llm "answer using only these notes: who owns the deploy?" notes/*.md', + ), ( "Pick a model and add a system prompt", 'assembly llm "draft a follow-up email" --model claude-opus-4-7 --system "Be concise."', @@ -52,6 +58,11 @@ def _list_models(output_field: choices.TextOrJson | None, json_mode: bool) -> No def llm( ctx: typer.Context, prompt: str | None = typer.Argument(None, help="The prompt to send to the model"), + files: list[Path] | None = typer.Argument( + None, + help="Optional input files to read as the prompt's context (each is header-prefixed " + "with its name and concatenated; takes priority over piped stdin)", + ), # Note: text piped on stdin is injected into the prompt (e.g. `cat notes | assembly llm "summarize"`). model: str = typer.Option( gateway.DEFAULT_MODEL, @@ -103,6 +114,7 @@ def llm( opts = llm_exec.LlmOptions( prompt=prompt, + files=tuple(files or ()), model=model, transcript_id=transcript_id, system=system, diff --git a/aai_cli/commands/llm/_exec.py b/aai_cli/commands/llm/_exec.py index 29934dd0..af3dd3da 100644 --- a/aai_cli/commands/llm/_exec.py +++ b/aai_cli/commands/llm/_exec.py @@ -10,6 +10,7 @@ from __future__ import annotations from dataclasses import dataclass +from pathlib import Path from rich.markup import escape @@ -44,10 +45,15 @@ class LlmOptions: max_tokens: int # Raw --config KEY=VALUE pairs; parsed (and validated) once in run_llm. config_kv: tuple[str, ...] = () + # Input files read as the prompt's context (header-prefixed, concatenated). + files: tuple[Path, ...] = () def _validate_follow_args( - prompt: str | None, output_field: str | None, transcript_id: str | None + prompt: str | None, + output_field: str | None, + transcript_id: str | None, + files: tuple[Path, ...], ) -> str: """Reject flag combinations that don't apply to --follow's live-panel mode. @@ -65,36 +71,84 @@ def _validate_follow_args( "--follow runs over live transcript text piped on stdin; it can't be " "combined with --transcript-id." ) + if files: + raise UsageError( + "--follow runs over live transcript text piped on stdin; it can't be " + "combined with file arguments." + ) if not stdio.stdin_is_piped(): raise UsageError(_FOLLOW_STDIN_MESSAGE) return prompt -def _stdin_transcript_text( - state: AppState, transcript_id: str | None, *, json_mode: bool +def _read_files(files: tuple[Path, ...]) -> str: + """Read each file and join them, each prefixed with a ``===== name =====`` header. + + The header names each source (the file's stem) so a multi-file prompt can cite + which note an answer came from; it's applied uniformly, even for a single file, + so the format the model sees is predictable. A missing or unreadable path is a + usage error raised before any auth or network — the same fail-fast ordering as + the --transcript-id check. + """ + sections: list[str] = [] + for path in files: + try: + text = path.read_text(encoding="utf-8") + except OSError as exc: + raise UsageError( + f"Couldn't read {path}: {exc.strerror or exc}.", + suggestion="Check the path points at a readable file.", + ) from exc + sections.append(f"===== {path.stem} =====\n{text}") + return "\n\n".join(sections) + + +def _input_text( + state: AppState, transcript_id: str | None, files: tuple[Path, ...], *, json_mode: bool ) -> str | None: - """Resolve the inline transcript text for one-shot mode. + """Resolve the inline text the prompt operates on for one-shot mode. - Text piped on stdin becomes the content the prompt operates on, unless an - explicit --transcript-id is given — that injects server-side and takes - priority, so piped text is ignored with a visible warning (suppressed by - --quiet, structured under --json). + Three possible sources, in priority order: an explicit --transcript-id (injected + server-side, so this returns None), one or more file arguments (read and + concatenated), or text piped on stdin. A higher-priority source present alongside + a lower one ignores the lower with a visible warning (suppressed by --quiet, + structured under --json). """ - if transcript_id is None: - return stdio.piped_stdin_text() - # Same cheap local id check as `transcripts get`, before auth or network. - client.validate_transcript_id(transcript_id) - if stdio.stdin_is_piped() and not state.quiet: - output.emit_warning( - "Ignoring piped stdin; --transcript-id takes priority.", json_mode=json_mode - ) - return None + if transcript_id is not None: + # Same cheap local id check as `transcripts get`, before auth or network. + client.validate_transcript_id(transcript_id) + ignored = _ignored_sources(files, stdio.stdin_is_piped()) + if ignored and not state.quiet: + output.emit_warning( + f"Ignoring {ignored}; --transcript-id takes priority.", json_mode=json_mode + ) + return None + if files: + if stdio.stdin_is_piped() and not state.quiet: + output.emit_warning( + "Ignoring piped stdin; file arguments take priority.", json_mode=json_mode + ) + return _read_files(files) + return stdio.piped_stdin_text() + + +def _ignored_sources(files: tuple[Path, ...], stdin_piped: bool) -> str | None: + """Name the lower-priority input sources present alongside --transcript-id, for the + warning — or None when there's nothing to ignore.""" + sources: list[str] = [] + if files: + sources.append("file arguments") + if stdin_piped: + sources.append("piped stdin") + return " and ".join(sources) or None def _run_follow( opts: LlmOptions, state: AppState, extra: dict[str, object], *, json_mode: bool ) -> None: - prompt_text = _validate_follow_args(opts.prompt, opts.output_field, opts.transcript_id) + prompt_text = _validate_follow_args( + opts.prompt, opts.output_field, opts.transcript_id, opts.files + ) api_key = state.resolve_api_key() def ask(transcript_text: str) -> str: @@ -131,13 +185,13 @@ def _run_oneshot( suggestion="Or pass --list-models to see available models.", ) prompt_text = opts.prompt - stdin_text = _stdin_transcript_text(state, opts.transcript_id, json_mode=json_mode) + input_text = _input_text(state, opts.transcript_id, opts.files, json_mode=json_mode) api_key = state.resolve_api_key() messages = gateway.build_messages( prompt_text, system=opts.system, transcript_id=opts.transcript_id, - transcript_text=stdin_text, + transcript_text=input_text, ) response = gateway.complete( api_key, diff --git a/tests/__snapshots__/test_snapshots_help_run.ambr b/tests/__snapshots__/test_snapshots_help_run.ambr index 1d2d4cd6..af616dc2 100644 --- a/tests/__snapshots__/test_snapshots_help_run.ambr +++ b/tests/__snapshots__/test_snapshots_help_run.ambr @@ -570,7 +570,7 @@ # name: test_command_help_matches_snapshot[llm] ''' - Usage: assembly llm [OPTIONS] [PROMPT] + Usage: assembly llm [OPTIONS] [PROMPT] [FILES]... Send a prompt to AssemblyAI's LLM Gateway and print the reply @@ -579,7 +579,10 @@ --transcript-id ID). ╭─ Arguments ──────────────────────────────────────────────────────────────────╮ - │ prompt [PROMPT] The prompt to send to the model │ + │ prompt [PROMPT] The prompt to send to the model │ + │ files [FILES]... Optional input files to read as the prompt's │ + │ context (each is header-prefixed with its name and │ + │ concatenated; takes priority over piped stdin) │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────╮ │ --model TEXT LLM Gateway model │ @@ -619,6 +622,9 @@ $ assembly llm "summarize the key decisions" --transcript-id 5551234-abcd Pipe any text in $ echo "meeting notes" | assembly llm "turn into action items" + Read one or more files as context + $ assembly llm "answer using only these notes: who owns the deploy?" + notes/*.md Pick a model and add a system prompt $ assembly llm "draft a follow-up email" --model claude-opus-4-7 --system "Be concise." diff --git a/tests/test_llm_command.py b/tests/test_llm_command.py index 8456c73a..fdfea7ba 100644 --- a/tests/test_llm_command.py +++ b/tests/test_llm_command.py @@ -128,6 +128,130 @@ def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, e assert seen["transcript_id"] is None +def test_llm_reads_file_argument_as_context(monkeypatch, tmp_path): + _auth() + seen = {} + + def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None): + seen["content"] = messages[0]["content"] + seen["transcript_id"] = transcript_id + return _payload("done") + + monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete) + note = tmp_path / "alpha.md" + note.write_text("bob owns the deploy") + result = runner.invoke(app, ["llm", "who owns the deploy?", str(note), "--json"]) + assert result.exit_code == 0 + # The file content is injected, under a header naming the file's stem. + assert "who owns the deploy?" in seen["content"] + assert "bob owns the deploy" in seen["content"] + assert "===== alpha =====" in seen["content"] + assert seen["transcript_id"] is None + + +def test_llm_concatenates_multiple_files_with_headers_in_order(monkeypatch, tmp_path): + _auth() + seen = {} + + def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None): + seen["content"] = messages[0]["content"] + return _payload("done") + + monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete) + first = tmp_path / "first.md" + first.write_text("ship friday") + second = tmp_path / "second.md" + second.write_text("freeze monday") + result = runner.invoke(app, ["llm", "summarize", str(first), str(second), "--json"]) + assert result.exit_code == 0 + content = seen["content"] + assert "===== first =====" in content + assert "===== second =====" in content + assert "ship friday" in content + assert "freeze monday" in content + # Both note bodies appear under their own header, in the order passed. + assert content.index("===== first =====") < content.index("===== second =====") + assert content.index("ship friday") < content.index("freeze monday") + + +def test_llm_files_take_priority_over_stdin(monkeypatch, tmp_path): + _auth() + seen = {} + + def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None): + seen["content"] = messages[0]["content"] + return _payload("done") + + monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete) + note = tmp_path / "note.md" + note.write_text("from the file") + result = runner.invoke( + app, ["llm", "summarize", str(note)], input="from stdin, should be ignored" + ) + assert result.exit_code == 0 + assert "from the file" in seen["content"] + assert "from stdin, should be ignored" not in seen["content"] + assert "Ignoring piped stdin; file arguments take priority." in result.output + + +def test_llm_missing_file_exits_2_without_network(monkeypatch, tmp_path): + # A bad path (e.g. an unmatched shell glob passed through literally) is a usage + # error raised before auth or the gateway, not a crash. + _auth() + monkeypatch.setattr( + "aai_cli.commands.llm.gateway.complete", + lambda *a, **k: (_ for _ in ()).throw(AssertionError("must not call the gateway")), + ) + missing = tmp_path / "nope.md" + result = runner.invoke(app, ["llm", "summarize", str(missing)]) + assert result.exit_code == 2 + assert "Couldn't read" in result.output + # The clean OS reason (errno's strerror) is shown, not the raw exception repr — + # so no "[Errno N] …: '/path'" bracket leaks into the message. + assert "[Errno" not in result.output + + +def test_llm_files_with_terminal_stdin_emits_no_warning(monkeypatch, tmp_path): + # With files given and stdin a terminal (not piped), there's nothing being + # ignored, so the "Ignoring piped stdin" warning must not fire. + _auth() + monkeypatch.setattr("aai_cli.commands.llm._exec.stdio.stdin_is_piped", lambda: False) + seen = {} + + def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None): + seen["content"] = messages[0]["content"] + return _payload("done") + + monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete) + note = tmp_path / "note.md" + note.write_text("only the file") + result = runner.invoke(app, ["llm", "summarize", str(note)]) + assert result.exit_code == 0 + assert "only the file" in seen["content"] + assert "Ignoring piped stdin" not in result.output + + +def test_llm_transcript_id_takes_priority_over_files(monkeypatch, tmp_path): + _auth() + seen = {} + # Pin stdin to a terminal so only the file argument is the ignored source. + monkeypatch.setattr("aai_cli.commands.llm._exec.stdio.stdin_is_piped", lambda: False) + + def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None): + seen["content"] = messages[0]["content"] + seen["transcript_id"] = transcript_id + return _payload("s") + + monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete) + note = tmp_path / "note.md" + note.write_text("file content here") + result = runner.invoke(app, ["llm", "summarize", str(note), "--transcript-id", "t_9"]) + assert result.exit_code == 0 + assert seen["transcript_id"] == "t_9" + assert "file content here" not in seen["content"] + assert "Ignoring file arguments; --transcript-id takes priority." in result.output + + def test_llm_transcript_id_takes_priority_over_stdin(monkeypatch): _auth() seen = {} @@ -252,83 +376,6 @@ def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, e assert "Run the same command again" in result.output -def test_llm_follow_summarizes_each_turn(monkeypatch): - _auth() - calls = [] - - def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None): - calls.append(messages[-1]["content"]) - return _payload(f"summary-{len(calls)}") - - monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete) - result = runner.invoke( - app, - ["llm", "summarize action items", "--follow", "--json"], - input="we ship friday\nbob owns the deploy\n", - ) - assert result.exit_code == 0 - updates = [json.loads(line) for line in result.output.splitlines() if line.strip()] - # One update per finalized turn, full transcript accumulating each time. - assert len(updates) == 2 - assert "we ship friday" in calls[0] - assert "bob owns the deploy" not in calls[0] - assert "we ship friday" in calls[1] - assert "bob owns the deploy" in calls[1] - assert updates[-1]["output"] == "summary-2" - assert updates[-1]["turns"] == 2 - - -def test_llm_follow_includes_system_prompt(monkeypatch): - _auth() - seen = {} - - def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None): - seen["roles"] = [m["role"] for m in messages] - seen["system"] = messages[0]["content"] - return _payload("ok") - - monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete) - result = runner.invoke( - app, - ["llm", "summarize", "--follow", "--system", "You are a scribe", "--json"], - input="one turn\n", - ) - assert result.exit_code == 0 - assert seen["roles"][0] == "system" - assert seen["system"] == "You are a scribe" - - -def test_llm_follow_rejects_transcript_id(monkeypatch): - _auth() - monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload()) - result = runner.invoke( - app, - ["llm", "summarize", "--follow", "--transcript-id", "t_1", "--json"], - input="x\n", - ) - assert result.exit_code == 2 - assert "transcript-id" in result.output - - -def test_llm_follow_ignores_blank_lines(monkeypatch): - _auth() - calls = [] - - def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None, extra=None): - calls.append(messages[-1]["content"]) - return _payload("ok") - - monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete) - result = runner.invoke( - app, - ["llm", "summarize", "--follow", "--json"], - input="first\n\n \nsecond\n", - ) - assert result.exit_code == 0 - # Blank/whitespace-only lines don't trigger a call. - assert len(calls) == 2 - - def test_llm_output_text_prints_raw_answer(monkeypatch): _auth() monkeypatch.setattr( @@ -368,92 +415,6 @@ def test_llm_output_invalid_field_exits_2(monkeypatch): assert result.exit_code == 2 -def test_llm_output_with_follow_is_rejected(monkeypatch): - _auth() - monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload()) - result = runner.invoke(app, ["llm", "hi", "-f", "-o", "text"], input="x\n") - assert result.exit_code == 2 - assert "one-shot" in result.output - - -def test_llm_follow_requires_a_prompt(monkeypatch): - # --follow re-runs a prompt over each turn; with no prompt there's nothing to run. - _auth() - monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload()) - result = runner.invoke(app, ["llm", "--follow", "--json"], input="x\n") - assert result.exit_code == 2 - assert "prompt" in result.output.lower() - - -def test_llm_follow_requires_piped_stdin(monkeypatch): - # Interactively (no pipe) --follow would block forever; reject it with guidance. - _auth() - monkeypatch.setattr("aai_cli.commands.llm._exec.stdio.stdin_is_piped", lambda: False) - monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload()) - result = runner.invoke(app, ["llm", "summarize", "--follow", "--json"]) - assert result.exit_code == 2 - assert "stdin" in result.output.lower() - - -def test_llm_follow_empty_stdin_exits_2(monkeypatch): - # `assembly llm -f "…"