From 184d43e29e0a0afe8ae56fddefd5f4c2f6f9a9fe Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 16 Jun 2026 23:48:16 +0000 Subject: [PATCH] Raise default LLM max_tokens from 1000 to 8192 Long --llm-reduce summaries and other LLM-Gateway replies were being clipped at ~1000 tokens. Bump the shared DEFAULT_MAX_TOKENS ceiling so multi-source reduces and summaries finish instead of cutting off mid-sentence. The gateway only bills tokens actually generated, so a higher cap is free on short replies, and --max-tokens still overrides per call. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01Y8Qzjnepp1yyViyopgeVYq --- aai_cli/core/llm.py | 5 ++++- .../test_snapshots_help_history.ambr | 2 +- tests/__snapshots__/test_snapshots_help_run.ambr | 16 ++++++++-------- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/aai_cli/core/llm.py b/aai_cli/core/llm.py index c27e0aac..b147e89e 100644 --- a/aai_cli/core/llm.py +++ b/aai_cli/core/llm.py @@ -14,7 +14,10 @@ # The LLM Gateway is OpenAI-compatible, so we talk to it through the OpenAI SDK # pointed at the active environment's gateway base (see _client / code_gen). DEFAULT_MODEL = "claude-haiku-4-5-20251001" -DEFAULT_MAX_TOKENS = 1000 +# Generous ceiling so long reduces/summaries aren't clipped mid-sentence; the +# gateway only bills tokens actually generated, so a high cap costs nothing on +# short replies. Override per-call with --max-tokens. +DEFAULT_MAX_TOKENS = 8192 # Exact tag the gateway substitutes with a transcript's text when `transcript_id` # is supplied. Must be exactly "{{ transcript }}" (spaces included). diff --git a/tests/__snapshots__/test_snapshots_help_history.ambr b/tests/__snapshots__/test_snapshots_help_history.ambr index 10d8ae9c..0d114403 100644 --- a/tests/__snapshots__/test_snapshots_help_history.ambr +++ b/tests/__snapshots__/test_snapshots_help_history.ambr @@ -102,7 +102,7 @@ │ transcript. │ │ --model TEXT LLM Gateway model │ │ [default: claude-haiku-4-5-20251001] │ - │ --max-tokens INTEGER Max tokens [default: 1000] │ + │ --max-tokens INTEGER Max tokens [default: 8192] │ ╰──────────────────────────────────────────────────────────────────────────────╯ Examples diff --git a/tests/__snapshots__/test_snapshots_help_run.ambr b/tests/__snapshots__/test_snapshots_help_run.ambr index b9af0c88..d358e113 100644 --- a/tests/__snapshots__/test_snapshots_help_run.ambr +++ b/tests/__snapshots__/test_snapshots_help_run.ambr @@ -67,7 +67,7 @@ │ [default: │ │ claude-haiku-4-5-20251001] │ │ --max-tokens INTEGER RANGE [x>=1] Max tokens per reply │ - │ [default: 1000] │ + │ [default: 8192] │ │ --llm-config TEXT Set any LLM Gateway request field │ │ as KEY=VALUE (repeatable) │ ╰──────────────────────────────────────────────────────────────────────────────╯ @@ -314,7 +314,7 @@ │ --model TEXT LLM Gateway model for --llm │ │ [default: claude-haiku-4-5-20251001] │ │ --max-tokens INTEGER Max tokens for the --llm selection reply │ - │ [default: 1000] │ + │ [default: 8192] │ ╰──────────────────────────────────────────────────────────────────────────────╯ Examples @@ -461,7 +461,7 @@ │ utterances │ │ [default: claude-haiku-4-5-20251001] │ │ --max-tokens INTEGER Max tokens per utterance translation │ - │ [default: 1000] │ + │ [default: 8192] │ ╰──────────────────────────────────────────────────────────────────────────────╯ Examples @@ -563,7 +563,7 @@ │ previous one's output. │ │ --model TEXT LLM Gateway model │ │ [default: claude-haiku-4-5-20251001] │ - │ --max-tokens INTEGER Max tokens [default: 1000] │ + │ --max-tokens INTEGER Max tokens [default: 8192] │ ╰──────────────────────────────────────────────────────────────────────────────╯ Examples @@ -621,7 +621,7 @@ │ answer, pipe-friendly) or │ │ json │ │ --max-tokens INTEGER RANGE [x>=1] Max tokens to generate │ - │ [default: 1000] │ + │ [default: 8192] │ │ --config TEXT Set any extra gateway request │ │ field: KEY=VALUE, repeatable │ │ (e.g. --config │ @@ -856,7 +856,7 @@ │ --model TEXT LLM Gateway model │ │ [default: │ │ claude-haiku-4-5-20251001] │ - │ --max-tokens INTEGER RANGE [x>=1] Max tokens [default: 1000] │ + │ --max-tokens INTEGER RANGE [x>=1] Max tokens [default: 8192] │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Advanced ───────────────────────────────────────────────────────────────────╮ │ --config KEY=VALUE Set any StreamingParameters field as │ @@ -1066,7 +1066,7 @@ │ transcript. │ │ --model TEXT LLM Gateway model │ │ [default: claude-haiku-4-5-20251001] │ - │ --max-tokens INTEGER Max tokens [default: 1000] │ + │ --max-tokens INTEGER Max tokens [default: 8192] │ ╰──────────────────────────────────────────────────────────────────────────────╯ Examples @@ -1251,7 +1251,7 @@ │ transcript. │ │ --model TEXT LLM Gateway model │ │ [default: claude-haiku-4-5-20251001] │ - │ --max-tokens INTEGER Max tokens [default: 1000] │ + │ --max-tokens INTEGER Max tokens [default: 8192] │ ╰──────────────────────────────────────────────────────────────────────────────╯ Examples