diff --git a/aai_cli/core/llm.py b/aai_cli/core/llm.py index c27e0aac..b147e89e 100644 --- a/aai_cli/core/llm.py +++ b/aai_cli/core/llm.py @@ -14,7 +14,10 @@ # The LLM Gateway is OpenAI-compatible, so we talk to it through the OpenAI SDK # pointed at the active environment's gateway base (see _client / code_gen). DEFAULT_MODEL = "claude-haiku-4-5-20251001" -DEFAULT_MAX_TOKENS = 1000 +# Generous ceiling so long reduces/summaries aren't clipped mid-sentence; the +# gateway only bills tokens actually generated, so a high cap costs nothing on +# short replies. Override per-call with --max-tokens. +DEFAULT_MAX_TOKENS = 8192 # Exact tag the gateway substitutes with a transcript's text when `transcript_id` # is supplied. Must be exactly "{{ transcript }}" (spaces included). diff --git a/tests/__snapshots__/test_snapshots_help_history.ambr b/tests/__snapshots__/test_snapshots_help_history.ambr index 10d8ae9c..0d114403 100644 --- a/tests/__snapshots__/test_snapshots_help_history.ambr +++ b/tests/__snapshots__/test_snapshots_help_history.ambr @@ -102,7 +102,7 @@ │ transcript. │ │ --model TEXT LLM Gateway model │ │ [default: claude-haiku-4-5-20251001] │ - │ --max-tokens INTEGER Max tokens [default: 1000] │ + │ --max-tokens INTEGER Max tokens [default: 8192] │ ╰──────────────────────────────────────────────────────────────────────────────╯ Examples diff --git a/tests/__snapshots__/test_snapshots_help_run.ambr b/tests/__snapshots__/test_snapshots_help_run.ambr index b9af0c88..d358e113 100644 --- a/tests/__snapshots__/test_snapshots_help_run.ambr +++ b/tests/__snapshots__/test_snapshots_help_run.ambr @@ -67,7 +67,7 @@ │ [default: │ │ claude-haiku-4-5-20251001] │ │ --max-tokens INTEGER RANGE [x>=1] Max tokens per reply │ - │ [default: 1000] │ + │ [default: 8192] │ │ --llm-config TEXT Set any LLM Gateway request field │ │ as KEY=VALUE (repeatable) │ ╰──────────────────────────────────────────────────────────────────────────────╯ @@ -314,7 +314,7 @@ │ --model TEXT LLM Gateway model for --llm │ │ [default: claude-haiku-4-5-20251001] │ │ --max-tokens INTEGER Max tokens for the --llm selection reply │ - │ [default: 1000] │ + │ [default: 8192] │ ╰──────────────────────────────────────────────────────────────────────────────╯ Examples @@ -461,7 +461,7 @@ │ utterances │ │ [default: claude-haiku-4-5-20251001] │ │ --max-tokens INTEGER Max tokens per utterance translation │ - │ [default: 1000] │ + │ [default: 8192] │ ╰──────────────────────────────────────────────────────────────────────────────╯ Examples @@ -563,7 +563,7 @@ │ previous one's output. │ │ --model TEXT LLM Gateway model │ │ [default: claude-haiku-4-5-20251001] │ - │ --max-tokens INTEGER Max tokens [default: 1000] │ + │ --max-tokens INTEGER Max tokens [default: 8192] │ ╰──────────────────────────────────────────────────────────────────────────────╯ Examples @@ -621,7 +621,7 @@ │ answer, pipe-friendly) or │ │ json │ │ --max-tokens INTEGER RANGE [x>=1] Max tokens to generate │ - │ [default: 1000] │ + │ [default: 8192] │ │ --config TEXT Set any extra gateway request │ │ field: KEY=VALUE, repeatable │ │ (e.g. --config │ @@ -856,7 +856,7 @@ │ --model TEXT LLM Gateway model │ │ [default: │ │ claude-haiku-4-5-20251001] │ - │ --max-tokens INTEGER RANGE [x>=1] Max tokens [default: 1000] │ + │ --max-tokens INTEGER RANGE [x>=1] Max tokens [default: 8192] │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Advanced ───────────────────────────────────────────────────────────────────╮ │ --config KEY=VALUE Set any StreamingParameters field as │ @@ -1066,7 +1066,7 @@ │ transcript. │ │ --model TEXT LLM Gateway model │ │ [default: claude-haiku-4-5-20251001] │ - │ --max-tokens INTEGER Max tokens [default: 1000] │ + │ --max-tokens INTEGER Max tokens [default: 8192] │ ╰──────────────────────────────────────────────────────────────────────────────╯ Examples @@ -1251,7 +1251,7 @@ │ transcript. │ │ --model TEXT LLM Gateway model │ │ [default: claude-haiku-4-5-20251001] │ - │ --max-tokens INTEGER Max tokens [default: 1000] │ + │ --max-tokens INTEGER Max tokens [default: 8192] │ ╰──────────────────────────────────────────────────────────────────────────────╯ Examples