From 184d43e29e0a0afe8ae56fddefd5f4c2f6f9a9fe Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 16 Jun 2026 23:48:16 +0000
Subject: [PATCH] Raise default LLM max_tokens from 1000 to 8192

Long --llm-reduce summaries and other LLM-Gateway replies were being
clipped at ~1000 tokens. Bump the shared DEFAULT_MAX_TOKENS ceiling so
multi-source reduces and summaries finish instead of cutting off
mid-sentence. The gateway only bills tokens actually generated, so a
higher cap is free on short replies, and --max-tokens still overrides
per call.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01Y8Qzjnepp1yyViyopgeVYq
---
 aai_cli/core/llm.py                              |  5 ++++-
 .../test_snapshots_help_history.ambr             |  2 +-
 tests/__snapshots__/test_snapshots_help_run.ambr | 16 ++++++++--------
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/aai_cli/core/llm.py b/aai_cli/core/llm.py
index c27e0aac..b147e89e 100644
--- a/aai_cli/core/llm.py
+++ b/aai_cli/core/llm.py
@@ -14,7 +14,10 @@
 # The LLM Gateway is OpenAI-compatible, so we talk to it through the OpenAI SDK
 # pointed at the active environment's gateway base (see _client / code_gen).
 DEFAULT_MODEL = "claude-haiku-4-5-20251001"
-DEFAULT_MAX_TOKENS = 1000
+# Generous ceiling so long reduces/summaries aren't clipped mid-sentence; the
+# gateway only bills tokens actually generated, so a high cap costs nothing on
+# short replies. Override per-call with --max-tokens.
+DEFAULT_MAX_TOKENS = 8192
 
 # Exact tag the gateway substitutes with a transcript's text when `transcript_id`
 # is supplied. Must be exactly "{{ transcript }}" (spaces included).
diff --git a/tests/__snapshots__/test_snapshots_help_history.ambr b/tests/__snapshots__/test_snapshots_help_history.ambr
index 10d8ae9c..0d114403 100644
--- a/tests/__snapshots__/test_snapshots_help_history.ambr
+++ b/tests/__snapshots__/test_snapshots_help_history.ambr
@@ -102,7 +102,7 @@
   │                              transcript.                                     │
   │ --model             TEXT     LLM Gateway model                               │
   │                              [default: claude-haiku-4-5-20251001]            │
-  │ --max-tokens        INTEGER  Max tokens [default: 1000]                      │
+  │ --max-tokens        INTEGER  Max tokens [default: 8192]                      │
   ╰──────────────────────────────────────────────────────────────────────────────╯
   
    Examples
diff --git a/tests/__snapshots__/test_snapshots_help_run.ambr b/tests/__snapshots__/test_snapshots_help_run.ambr
index b9af0c88..d358e113 100644
--- a/tests/__snapshots__/test_snapshots_help_run.ambr
+++ b/tests/__snapshots__/test_snapshots_help_run.ambr
@@ -67,7 +67,7 @@
   │                                           [default:                          │
   │                                           claude-haiku-4-5-20251001]         │
   │ --max-tokens        INTEGER RANGE [x>=1]  Max tokens per reply               │
-  │                                           [default: 1000]                    │
+  │                                           [default: 8192]                    │
   │ --llm-config        TEXT                  Set any LLM Gateway request field  │
   │                                           as KEY=VALUE (repeatable)          │
   ╰──────────────────────────────────────────────────────────────────────────────╯
@@ -314,7 +314,7 @@
   │ --model             TEXT     LLM Gateway model for --llm                     │
   │                              [default: claude-haiku-4-5-20251001]            │
   │ --max-tokens        INTEGER  Max tokens for the --llm selection reply        │
-  │                              [default: 1000]                                 │
+  │                              [default: 8192]                                 │
   ╰──────────────────────────────────────────────────────────────────────────────╯
   
    Examples
@@ -461,7 +461,7 @@
   │                              utterances                                      │
   │                              [default: claude-haiku-4-5-20251001]            │
   │ --max-tokens        INTEGER  Max tokens per utterance translation            │
-  │                              [default: 1000]                                 │
+  │                              [default: 8192]                                 │
   ╰──────────────────────────────────────────────────────────────────────────────╯
   
    Examples
@@ -563,7 +563,7 @@
   │                              previous one's output.                          │
   │ --model             TEXT     LLM Gateway model                               │
   │                              [default: claude-haiku-4-5-20251001]            │
-  │ --max-tokens        INTEGER  Max tokens [default: 1000]                      │
+  │ --max-tokens        INTEGER  Max tokens [default: 8192]                      │
   ╰──────────────────────────────────────────────────────────────────────────────╯
   
    Examples
@@ -621,7 +621,7 @@
   │                                                answer, pipe-friendly) or     │
   │                                                json                          │
   │ --max-tokens             INTEGER RANGE [x>=1]  Max tokens to generate        │
-  │                                                [default: 1000]               │
+  │                                                [default: 8192]               │
   │ --config                 TEXT                  Set any extra gateway request │
   │                                                field: KEY=VALUE, repeatable  │
   │                                                (e.g. --config                │
@@ -856,7 +856,7 @@
   │ --model               TEXT                  LLM Gateway model                │
   │                                             [default:                        │
   │                                             claude-haiku-4-5-20251001]       │
-  │ --max-tokens          INTEGER RANGE [x>=1]  Max tokens [default: 1000]       │
+  │ --max-tokens          INTEGER RANGE [x>=1]  Max tokens [default: 8192]       │
   ╰──────────────────────────────────────────────────────────────────────────────╯
   ╭─ Advanced ───────────────────────────────────────────────────────────────────╮
   │ --config             KEY=VALUE  Set any StreamingParameters field as         │
@@ -1066,7 +1066,7 @@
   │                              transcript.                                     │
   │ --model             TEXT     LLM Gateway model                               │
   │                              [default: claude-haiku-4-5-20251001]            │
-  │ --max-tokens        INTEGER  Max tokens [default: 1000]                      │
+  │ --max-tokens        INTEGER  Max tokens [default: 8192]                      │
   ╰──────────────────────────────────────────────────────────────────────────────╯
   
    Examples
@@ -1251,7 +1251,7 @@
   │                              transcript.                                     │
   │ --model             TEXT     LLM Gateway model                               │
   │                              [default: claude-haiku-4-5-20251001]            │
-  │ --max-tokens        INTEGER  Max tokens [default: 1000]                      │
+  │ --max-tokens        INTEGER  Max tokens [default: 8192]                      │
   ╰──────────────────────────────────────────────────────────────────────────────╯
   
    Examples