diff --git a/.agents/tmux-cli.ts b/.agents/tmux-cli.ts new file mode 100644 index 0000000000..10c0ecdeab --- /dev/null +++ b/.agents/tmux-cli.ts @@ -0,0 +1,634 @@ +import type { AgentDefinition } from './types/agent-definition' + +const outputSchema = { + type: 'object' as const, + properties: { + overallStatus: { + type: 'string' as const, + enum: ['success', 'failure', 'partial'], + description: '"success" when all tasks completed, "failure" when the primary task could not be done, "partial" when some subtasks succeeded but others failed', + }, + summary: { + type: 'string' as const, + description: 'Brief summary of the CLI interaction: what was done, key outputs observed, and the outcome', + }, + sessionName: { + type: 'string' as const, + description: 'The tmux session name used for this run (needed for cleanup if the session lingers)', + }, + results: { + type: 'array' as const, + items: { + type: 'object' as const, + properties: { + name: { type: 'string' as const, description: 'Short name of the task or interaction step' }, + passed: { type: 'boolean' as const, description: 'Whether this step succeeded' }, + details: { type: 'string' as const, description: 'What happened during this step' }, + capturedOutput: { type: 'string' as const, description: 'Relevant CLI output observed (keep concise — full output is in capture files)' }, + }, + required: ['name', 'passed'], + }, + description: 'Ordered list of interaction steps and their outcomes', + }, + scriptIssues: { + type: 'array' as const, + items: { + type: 'object' as const, + properties: { + script: { type: 'string' as const, description: 'Which helper command had the issue (e.g., "send", "capture", "wait-for")' }, + issue: { type: 'string' as const, description: 'What went wrong when using the helper script' }, + errorOutput: { type: 'string' as const, description: 'The actual error message or unexpected output' }, + suggestedFix: { type: 'string' as const, description: 'Suggested fix for the parent agent to implement' }, + }, + required: ['script', 'issue', 'suggestedFix'], + }, + description: 'Problems encountered with the helper script that the parent agent should address', + }, + captures: { + type: 'array' as const, + items: { + type: 'object' as const, + properties: { + path: { type: 'string' as const, description: 'Absolute path to the capture file in /tmp/tmux-captures-{session}/' }, + label: { type: 'string' as const, description: 'Descriptive label for what this capture shows (e.g., "after-login", "error-state", "final")' }, + timestamp: { type: 'string' as const, description: 'ISO 8601 timestamp of when the capture was taken' }, + }, + required: ['path', 'label'], + }, + description: 'Saved terminal captures the parent agent can read to verify results', + }, + lessons: { + type: 'array' as const, + items: { + type: 'string' as const, + }, + description: 'Advice for future runs: timing adjustments needed, unexpected CLI behavior, workarounds discovered, input quirks', + }, + }, + required: ['overallStatus', 'summary', 'sessionName', 'scriptIssues', 'captures'], +} + +const definition: AgentDefinition = { + id: 'tmux-cli', + displayName: 'Tmux CLI Agent', + model: 'minimax/minimax-m2.5', + // Provider options are tightly coupled to the model choice above. + // If you change the model, update these accordingly. + providerOptions: { + only: ['inceptron/fp8'], + order: ['inceptron/fp8'], + allow_fallbacks: false, + data_collection: 'deny', + }, + + spawnerPrompt: `General-purpose agent that uses tmux to interact with and test CLI applications. + +**Your responsibilities as the parent agent:** +1. If \`scriptIssues\` is not empty, check the error details and re-run the agent +2. Use \`read_files\` on the capture paths to see what the CLI displayed +3. Re-run the agent after fixing any issues +4. Check the \`lessons\` array for advice on how to improve future runs + +**Note:** Capture files are saved to \`/tmp/\`. Use \`run_terminal_command\` with \`cat\` to read them if \`read_files\` doesn't support absolute paths. + +**When spawning this agent**, provide as much advice as possible in the prompt about how to test the CLI, including lessons from any previous runs of tmux-cli (e.g., timing adjustments, commands that didn't work, expected output patterns). This helps the agent avoid repeating mistakes. + +**Orphaned session cleanup:** If the agent fails or times out, the tmux session may linger. Run \`tmux kill-session -t \` to clean up. The session name is in the agent's output.`, + + inputSchema: { + prompt: { + type: 'string', + description: 'What to do with the CLI application (e.g., "run /help and verify output", "send a prompt and capture the response")', + }, + params: { + type: 'object', + properties: { + command: { + type: 'string', + description: 'The CLI command to start in the tmux session (e.g., "python app.py", "node server.js", "my-cli --interactive")', + }, + }, + }, + }, + + outputMode: 'structured_output', + outputSchema, + includeMessageHistory: false, + + toolNames: ['run_terminal_command', 'read_files', 'set_output', 'add_message'], + + systemPrompt: `You are an expert at interacting with CLI applications via tmux. You start a CLI process in a tmux session and use a helper script to send input and capture output. + +## Session Management + +A tmux session is started for you automatically. The session name and helper script path will be announced in a setup message. Do NOT start a new session — use the one provided. + +The session runs \`bash\` and your command is sent to it automatically. This means the session stays alive even if the command exits. + +## Helper Script Reference + +The examples below use \`$HELPER\` and \`$SESSION\` as shorthand. The **actual paths** will be provided in the setup message when the session starts. Always use those real paths in your commands. + +### Sending Input + +\`\`\`bash +# Send input (presses Enter automatically) +$HELPER send "$SESSION" "your input here" + +# Send without pressing Enter +$HELPER send "$SESSION" "partial text" --no-enter + +# Send with bracketed paste mode (for TUI apps: vim, fzf, Ink-based CLIs) +$HELPER send "$SESSION" "pasted content" --paste + +# Send and wait for output to stabilize (for streaming CLIs) +$HELPER send "$SESSION" "command" --wait-idle 3 + +# Send special keys (Enter, Escape, C-c, C-u, Up, Down, Tab, etc.) +$HELPER key "$SESSION" Escape +$HELPER key "$SESSION" C-c + +# Pass arguments directly to tmux send-keys (escape hatch) +$HELPER raw "$SESSION" "some text" Enter +\`\`\` + +Input is sent as **plain text** by default (works for \`input()\`, readline, most CLIs). For TUI apps that need paste events, add \`--paste\`. + +### Capturing Output + +\`\`\`bash +# Capture visible pane (~30 lines). Default wait: 1 second. +$HELPER capture "$SESSION" + +# Capture with a descriptive label (used in the filename) +$HELPER capture "$SESSION" --label "after-login" + +# Capture with custom wait time +$HELPER capture "$SESSION" --wait 3 + +# Capture full scrollback (use for final capture) +$HELPER capture "$SESSION" --full --label "final" + +# Capture with ANSI color codes stripped (cleaner for parsing) +$HELPER capture "$SESSION" --strip-ansi --label "clean-output" + +# Instant capture (no wait) +$HELPER capture "$SESSION" --wait 0 +\`\`\` + +Captures show the **visible pane** by default. Add \`--full\` for the entire scrollback buffer. Each capture is saved to a file in \`/tmp/tmux-captures-{session}/\` and the path + content are printed. A timestamp is included in the output. + +### Waiting + +\`\`\`bash +# Wait until a pattern appears in the visible pane (regex, default timeout: 30s) +$HELPER wait-for "$SESSION" "Your guess:" +$HELPER wait-for "$SESSION" "\\$" --timeout 10 +$HELPER wait-for "$SESSION" "ready" --timeout 60 + +# Wait until output is stable for N seconds (max 120s) +$HELPER wait-idle "$SESSION" 3 +\`\`\` + +### Session Control + +\`\`\`bash +# Check if session is alive +$HELPER status "$SESSION" + +# Stop the session +$HELPER stop "$SESSION" +\`\`\` + +## File Creation + +Do NOT send file content through the tmux session. Use \`run_terminal_command\` with heredocs or scripting to create/edit files. The tmux session is for interacting with the CLI being tested. + +## Error Recovery + +If the CLI appears hung, try \`$HELPER key "$SESSION" C-c\` to interrupt. If it's still unresponsive, check session status with \`$HELPER status "$SESSION"\`. If the session is dead, report the failure. Always capture before stopping so the parent agent can diagnose issues. + +## Operating Heuristics + +- Use the provided tmux session as the single source of truth. Do not start a second session. +- **Capture discipline:** Aim for 3-8 captures per run. Capture at key milestones: startup, after important interactions, on errors, and final state. Do NOT capture after every single input. +- **Use \`--full\` on the final capture** to get complete scrollback history. Regular captures only show the visible pane (~30 lines), keeping them small and focused. +- **Use \`wait-for\` before sending input** when you need to wait for a prompt or specific output to appear. This is more reliable than guessing wait times. +- **Wait guidance:** Most CLIs need 1-2 seconds to process input. Use \`--wait-idle 2\` on send or \`--wait 2\` on capture. For streaming CLIs, use \`--wait-idle 3\` or higher. +- Use \`--label\` on captures to make filenames descriptive. +- If the CLI already shows enough evidence in the current viewport, do not keep recapturing.`, + + instructionsPrompt: `Instructions: + +## Workflow + +A tmux session has been started for you. A setup message will announce the session name, helper script path, and the initial terminal output. Your command has already been sent to the session. + +1. **Check the initial output** provided in the setup message. If you see errors like "command not found" or "No such file", report failure immediately. +2. **Interact with the CLI** using the helper commands documented in the system prompt (send, key, capture, wait-for, etc.). +3. **Capture output** at key milestones. Use \`wait-for\` to wait for expected prompts before sending input. +4. **Final capture** with full scrollback before stopping: \`$HELPER capture "$SESSION" --full --label "final"\` +5. **Stop the session**: \`$HELPER stop "$SESSION"\` + +## Output + +Report results using set_output with: +- \`overallStatus\`: "success" (all tasks completed), "failure" (primary task couldn't be done), or "partial" (some subtasks succeeded but others failed) +- \`summary\`: Brief description of what was done +- \`sessionName\`: The tmux session name (REQUIRED) +- \`results\`: Array of task outcomes +- \`scriptIssues\`: Array of any problems with the helper script +- \`captures\`: Array of capture paths with labels. Use the file paths printed by the capture command (MUST have at least one) +- \`lessons\`: Array of strings describing issues encountered and advice for future runs (e.g., "Need longer --wait for this CLI", "CLI requires pressing Enter twice", "Command X produced unexpected output") + +Always include captures so the parent agent can verify results. Always include lessons so future invocations can be improved.`, + + handleSteps: function* ({ params, logger }) { + // Self-contained tmux helper script written to /tmp at startup. + // Must be defined inside handleSteps because the function is serialized. + const helperScript = `#!/usr/bin/env bash +set -e + +usage() { + echo "Usage: $0 [args]" + echo "Commands: start, send, capture, stop, key, raw, wait-for, wait-idle, status" + exit 1 +} + +[[ $# -lt 1 ]] && usage +CMD="$1"; shift + +case "$CMD" in + start) + SESSION="$1" + [[ -z "$SESSION" ]] && { echo "Usage: start " >&2; exit 1; } + tmux new-session -d -s "$SESSION" -x 120 -y 30 bash 2>/dev/null || true + if ! tmux has-session -t "$SESSION" 2>/dev/null; then + echo "Failed to create session $SESSION" >&2; exit 1 + fi + mkdir -p "/tmp/tmux-captures-$SESSION" + echo "$SESSION" + ;; + + send) + # send [--no-enter] [--paste] [--wait-idle N] + SESSION="$1"; shift + TEXT=""; AUTO_ENTER=true; PASTE_MODE=false; WAIT_IDLE=0 + while [[ $# -gt 0 ]]; do + case $1 in + --no-enter) AUTO_ENTER=false; shift ;; + --paste) PASTE_MODE=true; shift ;; + --wait-idle) WAIT_IDLE="$2"; shift 2 ;; + *) TEXT="$1"; shift ;; + esac + done + [[ -z "$SESSION" || -z "$TEXT" ]] && { echo "Usage: send [--no-enter] [--paste] [--wait-idle N]" >&2; exit 1; } + tmux send-keys -t "$SESSION" C-u + sleep 0.05 + if [[ "$PASTE_MODE" == true ]]; then + tmux send-keys -t "$SESSION" $'\\x1b[200~'"$TEXT"$'\\x1b[201~' + else + tmux send-keys -t "$SESSION" -- "$TEXT" + fi + if [[ "$AUTO_ENTER" == true ]]; then + sleep 0.05 + tmux send-keys -t "$SESSION" Enter + sleep 0.5 + fi + if [[ "$WAIT_IDLE" -gt 0 ]]; then + LAST_OUTPUT="" + STABLE_START=$(date +%s) + MAX_END=$(( $(date +%s) + 120 )) + while true; do + CURRENT_OUTPUT=$(tmux capture-pane -t "$SESSION" -S - -p 2>/dev/null || echo "") + NOW=$(date +%s) + if [[ "$CURRENT_OUTPUT" != "$LAST_OUTPUT" ]]; then + LAST_OUTPUT="$CURRENT_OUTPUT" + STABLE_START=$NOW + fi + if (( NOW - STABLE_START >= WAIT_IDLE )); then break; fi + if (( NOW >= MAX_END )); then echo "wait-idle timed out after 120s" >&2; break; fi + sleep 0.25 + done + fi + ;; + + key) + SESSION="$1"; KEY="$2" + [[ -z "$SESSION" || -z "$KEY" ]] && { echo "Usage: key " >&2; exit 1; } + tmux send-keys -t "$SESSION" "$KEY" + ;; + + raw) + SESSION="$1"; shift + [[ -z "$SESSION" ]] && { echo "Usage: raw [tmux send-keys args...]" >&2; exit 1; } + tmux send-keys -t "$SESSION" "$@" + ;; + + capture) + # capture [--wait N] [--label LABEL] [--full] [--strip-ansi] + SESSION="$1"; shift + WAIT=1; LABEL=""; FULL=false; STRIP_ANSI=false + while [[ $# -gt 0 ]]; do + case $1 in + --wait) WAIT="$2"; shift 2 ;; + --label) LABEL="$2"; shift 2 ;; + --full) FULL=true; shift ;; + --strip-ansi) STRIP_ANSI=true; shift ;; + *) shift ;; + esac + done + [[ -z "$SESSION" ]] && { echo "Usage: capture [--wait N] [--label LABEL] [--full] [--strip-ansi]" >&2; exit 1; } + [[ "$WAIT" -gt 0 ]] && sleep "$WAIT" + CAPTURE_DIR="/tmp/tmux-captures-$SESSION" + mkdir -p "$CAPTURE_DIR" + SEQ_FILE="$CAPTURE_DIR/.seq" + if [[ -f "$SEQ_FILE" ]]; then SEQ=$(cat "$SEQ_FILE"); else SEQ=0; fi + SEQ=$((SEQ + 1)) + echo "$SEQ" > "$SEQ_FILE" + SEQ_PAD=$(printf "%03d" "$SEQ") + if [[ -n "$LABEL" ]]; then + CAPTURE_FILE="$CAPTURE_DIR/capture-\${SEQ_PAD}-\${LABEL}.txt" + else + CAPTURE_FILE="$CAPTURE_DIR/capture-\${SEQ_PAD}.txt" + fi + if [[ "$FULL" == true ]]; then + tmux capture-pane -t "$SESSION" -S - -p > "$CAPTURE_FILE" + else + tmux capture-pane -t "$SESSION" -p > "$CAPTURE_FILE" + fi + if [[ "$STRIP_ANSI" == true ]]; then + perl -pe 's/\\e\\[[\\d;]*[a-zA-Z]//g' "$CAPTURE_FILE" > "$CAPTURE_FILE.tmp" && mv "$CAPTURE_FILE.tmp" "$CAPTURE_FILE" + fi + TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + echo "[Saved: $CAPTURE_FILE] [$TIMESTAMP]" + cat "$CAPTURE_FILE" + ;; + + wait-for) + # wait-for [--timeout N] + # Polls visible pane until grep matches the pattern (default timeout: 30s) + SESSION="$1"; shift + PATTERN=""; TIMEOUT=30 + while [[ $# -gt 0 ]]; do + case $1 in + --timeout) TIMEOUT="$2"; shift 2 ;; + *) PATTERN="$1"; shift ;; + esac + done + [[ -z "$SESSION" || -z "$PATTERN" ]] && { echo "Usage: wait-for [--timeout N]" >&2; exit 1; } + MAX_END=$(( $(date +%s) + TIMEOUT )) + while true; do + if tmux capture-pane -t "$SESSION" -p 2>/dev/null | grep -q "$PATTERN"; then + echo "Found: $PATTERN" + break + fi + NOW=$(date +%s) + if (( NOW >= MAX_END )); then + echo "Timed out after \${TIMEOUT}s waiting for: $PATTERN" >&2 + exit 1 + fi + sleep 0.25 + done + ;; + + wait-idle) + # wait-idle [stable-seconds] + SESSION="$1"; STABLE_SECS="\${2:-2}" + [[ -z "$SESSION" ]] && { echo "Usage: wait-idle [seconds]" >&2; exit 1; } + LAST_OUTPUT="" + STABLE_START=$(date +%s) + MAX_END=$(( $(date +%s) + 120 )) + while true; do + CURRENT_OUTPUT=$(tmux capture-pane -t "$SESSION" -S - -p 2>/dev/null || echo "") + NOW=$(date +%s) + if [[ "$CURRENT_OUTPUT" != "$LAST_OUTPUT" ]]; then + LAST_OUTPUT="$CURRENT_OUTPUT" + STABLE_START=$NOW + fi + if (( NOW - STABLE_START >= STABLE_SECS )); then echo "Output stable for \${STABLE_SECS}s"; break; fi + if (( NOW >= MAX_END )); then echo "Timed out after 120s" >&2; break; fi + sleep 0.25 + done + ;; + + status) + SESSION="$1" + [[ -z "$SESSION" ]] && { echo "Usage: status " >&2; exit 1; } + if tmux has-session -t "$SESSION" 2>/dev/null; then + echo "alive" + else + echo "dead" + fi + ;; + + stop) + SESSION="$1" + [[ -z "$SESSION" ]] && { echo "Usage: stop " >&2; exit 1; } + tmux kill-session -t "$SESSION" 2>/dev/null || true + ;; + + *) usage ;; +esac +` + + const startCommand = (params && typeof params.command === 'string') ? params.command : '' + + if (!startCommand) { + logger.error('No command provided in params.command') + yield { + toolName: 'set_output', + input: { + overallStatus: 'failure', + summary: 'No command provided. Pass params.command with the CLI command to start.', + sessionName: '', + scriptIssues: [], + captures: [], + }, + } + return + } + + // Generate a unique session name + const sessionName = 'tui-test-' + Date.now() + '-' + Math.random().toString(36).slice(2, 6) + const helperPath = '/tmp/tmux-helper-' + sessionName + '.sh' + + logger.info('Writing helper script to ' + helperPath) + + // Write the self-contained helper script to /tmp + const { toolResult: writeResult } = yield { + toolName: 'run_terminal_command', + input: { + command: 'cat > ' + helperPath + " << 'TMUX_HELPER_EOF'\n" + helperScript + "TMUX_HELPER_EOF\nchmod +x " + helperPath, + timeout_seconds: 10, + }, + } + + const writeOutput = writeResult?.[0] + if (writeOutput && writeOutput.type === 'json') { + const value = writeOutput.value as Record + const exitCode = typeof value?.exitCode === 'number' ? value.exitCode : undefined + if (exitCode !== 0) { + const stderr = typeof value?.stderr === 'string' ? value.stderr.trim() : 'unknown error' + logger.error('Failed to write helper script: ' + stderr) + yield { + toolName: 'set_output', + input: { + overallStatus: 'failure', + summary: 'Failed to write helper script to /tmp. ' + stderr, + sessionName: '', + scriptIssues: [{ script: helperPath, issue: stderr, suggestedFix: 'Check /tmp is writable' }], + captures: [], + }, + } + return + } + } + + logger.info('Starting tmux session (bash)') + + // Start the tmux session with bash (not the user's command directly) + const { toolResult } = yield { + toolName: 'run_terminal_command', + input: { + command: helperPath + " start '" + sessionName + "'", + timeout_seconds: 30, + }, + } + + let started = false + let parseError = '' + + const result = toolResult?.[0] + if (result && result.type === 'json') { + const value = result.value as Record + const stdout = typeof value?.stdout === 'string' ? value.stdout.trim() : '' + const stderr = typeof value?.stderr === 'string' ? value.stderr.trim() : '' + const exitCode = typeof value?.exitCode === 'number' ? value.exitCode : undefined + + if (exitCode !== 0) { + parseError = stderr || 'Helper script failed with no error message' + } else if (stdout === sessionName) { + started = true + } else { + parseError = 'Unexpected output: ' + stdout + } + } else { + parseError = 'Unexpected result type from run_terminal_command' + } + + if (!started) { + const errorMsg = parseError || 'Failed to start session' + logger.error({ parseError: errorMsg }, 'Failed to start tmux session') + yield { + toolName: 'set_output', + input: { + overallStatus: 'failure', + summary: 'Failed to start tmux session. ' + errorMsg, + sessionName: '', + scriptIssues: [ + { + script: helperPath, + issue: errorMsg, + errorOutput: JSON.stringify(toolResult), + suggestedFix: 'Ensure tmux is installed and the command is valid.', + }, + ], + captures: [], + }, + } + return + } + + logger.info('Successfully started tmux session: ' + sessionName) + + // Send the user's command to the bash session + const escapedCommand = startCommand.replace(/'/g, "'\\''") + const { toolResult: sendResult } = yield { + toolName: 'run_terminal_command', + input: { + command: helperPath + " send '" + sessionName + "' '" + escapedCommand + "'", + timeout_seconds: 15, + }, + } + + const sendOutput = sendResult?.[0] + if (sendOutput && sendOutput.type === 'json') { + const value = sendOutput.value as Record + const exitCode = typeof value?.exitCode === 'number' ? value.exitCode : undefined + if (exitCode !== 0) { + const stderr = typeof value?.stderr === 'string' ? value.stderr.trim() : 'send failed' + logger.error('Failed to send command: ' + stderr) + yield { + toolName: 'run_terminal_command', + input: { command: helperPath + " stop '" + sessionName + "'", timeout_seconds: 5 }, + } + yield { + toolName: 'set_output', + input: { + overallStatus: 'failure', + summary: 'Started session but failed to send command. ' + stderr, + sessionName, + scriptIssues: [{ script: helperPath, issue: stderr, suggestedFix: 'Check that the command is valid.' }], + captures: [], + }, + } + return + } + } + + logger.info('Sent command to session: ' + startCommand) + + // Wait briefly then capture initial state so the agent starts with context + const { toolResult: initCapture } = yield { + toolName: 'run_terminal_command', + input: { + command: 'sleep 1.5 && ' + helperPath + " capture '" + sessionName + "' --wait 0 --label startup-check", + timeout_seconds: 10, + }, + } + + let initialOutput = '(no initial capture available)' + const initResult = initCapture?.[0] + if (initResult && initResult.type === 'json') { + const initValue = initResult.value as Record + if (typeof initValue?.stdout === 'string' && initValue.stdout.trim()) { + initialOutput = initValue.stdout.trim() + } + } + + const captureDir = '/tmp/tmux-captures-' + sessionName + + yield { + toolName: 'add_message', + input: { + role: 'user', + content: 'A tmux session has been started and `' + startCommand + '` has been sent to it.\n\n' + + '**Session:** `' + sessionName + '`\n' + + '**Helper:** `' + helperPath + '`\n' + + '**Captures dir:** `' + captureDir + '/`\n\n' + + '**Initial terminal output:**\n```\n' + initialOutput + '\n```\n\n' + + 'Check the initial output above — if you see errors like "command not found" or "No such file", report failure immediately.\n\n' + + 'Commands:\n' + + '- Send input: `' + helperPath + ' send "' + sessionName + '" "..."`\n' + + '- Send with paste mode: `' + helperPath + ' send "' + sessionName + '" "..." --paste`\n' + + '- Send + wait for output: `' + helperPath + ' send "' + sessionName + '" "..." --wait-idle 3`\n' + + '- Send key: `' + helperPath + ' key "' + sessionName + '" C-c`\n' + + '- Raw tmux send-keys: `' + helperPath + ' raw "' + sessionName + '" "text" Enter`\n' + + '- Wait for pattern: `' + helperPath + ' wait-for "' + sessionName + '" "pattern" --timeout 30`\n' + + '- Capture visible pane: `' + helperPath + ' capture "' + sessionName + '" --label "..."`\n' + + '- Capture full scrollback: `' + helperPath + ' capture "' + sessionName + '" --full --label "final"`\n' + + '- Capture without ANSI colors: `' + helperPath + ' capture "' + sessionName + '" --strip-ansi`\n' + + '- Check session status: `' + helperPath + ' status "' + sessionName + '"`\n' + + '- Wait for stable output: `' + helperPath + ' wait-idle "' + sessionName + '" 3`\n' + + '- Stop session: `' + helperPath + ' stop "' + sessionName + '"`\n\n' + + 'Captures are saved to `' + captureDir + '/` — use the file paths in your output so the parent agent can verify with `read_files`.', + }, + includeToolCall: false, + } + + yield 'STEP_ALL' + }, +} + +export default definition diff --git a/agents/__tests__/context-pruner.test.ts b/agents/__tests__/context-pruner.test.ts index dd725a578b..45c61b4b9f 100644 --- a/agents/__tests__/context-pruner.test.ts +++ b/agents/__tests__/context-pruner.test.ts @@ -1087,26 +1087,6 @@ describe('context-pruner spawn_agents with prompt and params', () => { expect(content).toContain('params: {"command":"npm test"}') }) - test('includes both prompt and params for spawn_agent_inline', () => { - const messages = [ - createMessage('user', 'Search code'), - createToolCallMessage('call-1', 'spawn_agent_inline', { - agent_type: 'code-searcher', - prompt: 'Find usages of deprecated API', - params: { searchQueries: [{ pattern: 'oldFunction' }] }, - }), - createToolResultMessage('call-1', 'spawn_agent_inline', { output: {} }), - ] - - const results = runHandleSteps(messages) - const content = results[0].input.messages[0].content[0].text - - expect(content).toContain('Spawned agent: code-searcher') - expect(content).toContain('prompt: "Find usages of deprecated API"') - expect(content).toContain('params:') - expect(content).toContain('oldFunction') - }) - test('truncates very long prompts (over 1000 chars)', () => { const longPrompt = 'X'.repeat(1500) const messages = [ diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 31ffa89439..be5ade5a1c 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -30,11 +30,13 @@ export function createBase2( publisher, model: isFree ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6', providerOptions: isFree ? { - only: ['fireworks'], - order: ['fireworks'], + only: ['inceptron/fp8'], + order: ['inceptron/fp8'], allow_fallbacks: false, data_collection: 'deny', - } : undefined, + } : { + only: ['amazon-bedrock'], + }, displayName: 'Buffy the Orchestrator', spawnerPrompt: 'Advanced base agent that orchestrates planning, editing, and reviewing for complex coding tasks', @@ -68,16 +70,13 @@ export function createBase2( !noAskUser && 'ask_user', 'skill', 'set_output', - isFree && 'code_search', - isFree && 'list_directory', - isFree && 'glob', + 'code_search', + 'list_directory', + 'glob', ), spawnableAgents: buildArray( !isMax && 'file-picker', isMax && 'file-picker-max', - !isFree && 'code-searcher', - !isFree && 'directory-lister', - !isFree && 'glob-matcher', 'researcher-web', 'researcher-docs', isFree ? 'commander-lite' : 'commander', @@ -89,6 +88,7 @@ export function createBase2( isFree && 'code-reviewer-lite', isDefault && 'code-reviewer', isMax && 'code-reviewer-multi-prompt', + isDefault && 'tmux-cli', 'context-pruner', ), @@ -109,6 +109,7 @@ export function createBase2( } - **Be careful about terminal commands:** Be careful about instructing subagents to run terminal commands that could be destructive or have effects that are hard to undo (e.g. git push, git commit, running any scripts -- especially ones that could alter production environments (!), installing packages globally, etc). Don't run any of these effectful commands unless the user explicitly asks you to. - **Do what the user asks:** If the user asks you to do something, even running a risky terminal command, do it. +- **Don't use set_output:** The set_output tool is for spawned subagents to report results. Don't use it yourself. # Code Editing Mandates @@ -140,7 +141,7 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u - **Spawn multiple agents in parallel:** This increases the speed of your response **and** allows you to be more comprehensive by spawning more total agents to synthesize the best response. - **Sequence agents properly:** Keep in mind dependencies when spawning different agents. Don't spawn agents in parallel that depend on each other. ${buildArray( - '- Spawn context-gathering agents (file pickers, code-searcher, directory-lister, glob-matcher, and web/docs researchers) before making edits.', + '- Spawn context-gathering agents (file pickers and web/docs researchers) before making edits. Use the code_search, list_directory, and glob tools directly for searching and exploring the codebase.', isFree && '- Spawn the editor-lite agent to implement the changes after you have gathered all the context you need.', isDefault && @@ -196,11 +197,11 @@ ${buildArray( please implement [a complex new feature] -[ You spawn 3 file-pickers, a code-searcher, and a docs researcher in parallel to find relevant files and do research online ] +[ You spawn 3 file-pickers and a docs researcher in parallel to find relevant files and do research online. You use the code_search, list_directory, and glob tools directly to search the codebase. ] [ You read a few of the relevant files using the read_files tool in two separate tool calls ] -[ You spawn one more code-searcher and file-picker ] +[ You use code_search and glob tools, and spawn another file-picker to find more relevant files ] [ You read a few other relevant files using the read_files tool ]${!noAskUser ? `\n\n[ You ask the user for important clarifications on their request or alternate implementation strategies using the ask_user tool ]` @@ -299,7 +300,7 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT} } } -const EXPLORE_PROMPT = `- Iteratively spawn file pickers, code-searchers, directory-listers, glob-matchers, commanders, and web/docs researchers to gather context as needed. The file-picker agent in particular is very useful to find relevant files -- try spawning multiple in parallel (say, 2-5) to explore different parts of the codebase. Use read_subtree if you need to grok a particular part of the codebase. Read all the relevant files using the read_files tool.` +const EXPLORE_PROMPT = `- Iteratively spawn file pickers, commanders, and web/docs researchers to gather context as needed. Use the code_search, list_directory, and glob tools directly for searching and exploring the codebase. The file-picker agent in particular is very useful to find relevant files -- try spawning multiple in parallel (say, 2-5) to explore different parts of the codebase. Use read_subtree if you need to grok a particular part of the codebase. Read all the relevant files using the read_files tool.` function buildImplementationInstructionsPrompt({ isSonnet, diff --git a/agents/context-pruner.ts b/agents/context-pruner.ts index 0f31217402..dbb3c3cc57 100644 --- a/agents/context-pruner.ts +++ b/agents/context-pruner.ts @@ -297,9 +297,6 @@ const definition: AgentDefinition = { /** Agent IDs whose output should be excluded from spawn_agents results */ const SPAWN_AGENTS_OUTPUT_BLACKLIST = [ 'file-picker', - 'code-searcher', - 'directory-lister', - 'glob-matcher', 'researcher-web', 'researcher-docs', 'commander', diff --git a/agents/editor/best-of-n/best-of-n-selector2.ts b/agents/editor/best-of-n/best-of-n-selector2.ts index 852c268783..a0263a42cb 100644 --- a/agents/editor/best-of-n/best-of-n-selector2.ts +++ b/agents/editor/best-of-n/best-of-n-selector2.ts @@ -23,6 +23,11 @@ export const createBestOfNSelector2 = (options: { effort: 'high', }, }), + ...(isOpus && { + providerOptions: { + only: ['amazon-bedrock'], + }, + }), displayName: isGpt5 ? 'Best-of-N GPT-5 Diff Selector' : isOpus diff --git a/agents/editor/best-of-n/editor-implementor.ts b/agents/editor/best-of-n/editor-implementor.ts index d22cc77f31..87ec441ba3 100644 --- a/agents/editor/best-of-n/editor-implementor.ts +++ b/agents/editor/best-of-n/editor-implementor.ts @@ -20,6 +20,11 @@ export const createBestOfNImplementor = (options: { : isGemini ? 'google/gemini-3-pro-preview' : 'openai/gpt-5.1', + ...(isOpus && { + providerOptions: { + only: ['amazon-bedrock'], + }, + }), displayName: 'Implementation Generator', spawnerPrompt: 'Generates a complete implementation using propose_* tools that draft changes without applying them', diff --git a/agents/editor/best-of-n/editor-multi-prompt.ts b/agents/editor/best-of-n/editor-multi-prompt.ts index 2d101ea8a6..5c54cf9697 100644 --- a/agents/editor/best-of-n/editor-multi-prompt.ts +++ b/agents/editor/best-of-n/editor-multi-prompt.ts @@ -12,6 +12,9 @@ export function createMultiPromptEditor(): Omit { return { publisher, model: 'anthropic/claude-opus-4.6', + providerOptions: { + only: ['amazon-bedrock'], + }, displayName: 'Multi-Prompt Editor', spawnerPrompt: 'Edits code by spawning multiple implementor agents with different strategy prompts, selects the best implementation, and applies the changes. It also returns further suggested improvements which you should take seriously and act on. Pass as input an array of short prompts specifying different implementation approaches or strategies. Make sure to read any files intended to be edited before spawning this agent.', diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts index f765966879..6beb22d221 100644 --- a/agents/editor/editor.ts +++ b/agents/editor/editor.ts @@ -15,6 +15,11 @@ export const createCodeEditor = (options: { : options.model === 'minimax' ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6', + ...(options.model === 'opus' && { + providerOptions: { + only: ['amazon-bedrock'], + }, + }), displayName: 'Code Editor', spawnerPrompt: "Expert code editor that implements code changes based on the user's request. Do not specify an input prompt for this agent; it inherits the context of the entire conversation with the user. Make sure to read any files intended to be edited before spawning this agent as it cannot read files on its own.", diff --git a/agents/general-agent/general-agent.ts b/agents/general-agent/general-agent.ts index 37d92beacd..4925e60ab4 100644 --- a/agents/general-agent/general-agent.ts +++ b/agents/general-agent/general-agent.ts @@ -13,6 +13,11 @@ export const createGeneralAgent = (options: { return { publisher, model: isGpt5 ? 'openai/gpt-5.2' : 'anthropic/claude-opus-4.6', + ...(!isGpt5 && { + providerOptions: { + only: ['amazon-bedrock'], + }, + }), ...(isGpt5 && { reasoningOptions: { effort: 'high' as const, diff --git a/agents/reviewer/code-reviewer.ts b/agents/reviewer/code-reviewer.ts index c22d2d6c40..9cc840d69f 100644 --- a/agents/reviewer/code-reviewer.ts +++ b/agents/reviewer/code-reviewer.ts @@ -65,6 +65,9 @@ const definition: SecretAgentDefinition = { id: 'code-reviewer', publisher, ...createReviewer('anthropic/claude-opus-4.6'), + providerOptions: { + only: ['amazon-bedrock'], + }, } export default definition diff --git a/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts b/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts index 134862a57b..a6a380e3ee 100644 --- a/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts +++ b/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts @@ -15,6 +15,9 @@ export function createCodeReviewerMultiPrompt(): Omit< return { publisher, model: 'anthropic/claude-opus-4.6', + providerOptions: { + only: ['amazon-bedrock'], + }, displayName: 'Multi-Prompt Code Reviewer', spawnerPrompt: 'Reviews code by spawning multiple code-reviewer agents with different focus prompts, then combines all review outputs into a comprehensive review. Make sure to read relevant files before spawning this agent. Pass an input array of short prompts specifying several different review focuses or perspectives.', diff --git a/agents/thinker/best-of-n/thinker-best-of-n.ts b/agents/thinker/best-of-n/thinker-best-of-n.ts index 66530a9269..3e1e532c5f 100644 --- a/agents/thinker/best-of-n/thinker-best-of-n.ts +++ b/agents/thinker/best-of-n/thinker-best-of-n.ts @@ -20,6 +20,11 @@ export function createThinkerBestOfN( : isOpus ? 'anthropic/claude-opus-4.6' : 'anthropic/claude-sonnet-4.5', + ...(isOpus && { + providerOptions: { + only: ['amazon-bedrock'], + }, + }), displayName: isGpt5 ? 'Best-of-N GPT-5 Thinker' : isOpus diff --git a/agents/thinker/best-of-n/thinker-selector.ts b/agents/thinker/best-of-n/thinker-selector.ts index a5c302bb96..ab10bff69f 100644 --- a/agents/thinker/best-of-n/thinker-selector.ts +++ b/agents/thinker/best-of-n/thinker-selector.ts @@ -11,6 +11,11 @@ export function createThinkerSelector( model: isOpus ? 'anthropic/claude-opus-4.6' : 'anthropic/claude-sonnet-4.5', + ...(isOpus && { + providerOptions: { + only: ['amazon-bedrock'], + }, + }), displayName: isOpus ? 'Opus Thinker Output Selector' : 'Thinker Output Selector', diff --git a/agents/thinker/thinker.ts b/agents/thinker/thinker.ts index dfd61db1a0..3dd57d472f 100644 --- a/agents/thinker/thinker.ts +++ b/agents/thinker/thinker.ts @@ -6,6 +6,9 @@ const definition: SecretAgentDefinition = { id: 'thinker', publisher, model: 'anthropic/claude-opus-4.6', + providerOptions: { + only: ['amazon-bedrock'], + }, displayName: 'Theo the Theorizer', spawnerPrompt: 'Does deep thinking given the current conversation history and a specific prompt to focus on. Use this to help you solve a specific problem. It is better to gather any relevant context before spawning this agent.', diff --git a/cli/src/components/blocks/agent-block-grid.tsx b/cli/src/components/blocks/agent-block-grid.tsx index b303937fcb..a238510f98 100644 --- a/cli/src/components/blocks/agent-block-grid.tsx +++ b/cli/src/components/blocks/agent-block-grid.tsx @@ -41,7 +41,6 @@ export const AgentBlockGrid = memo( availableWidth={availableWidth} getItemKey={getItemKey} renderItem={renderItem} - marginTop={1} /> ) }, diff --git a/cli/src/components/blocks/agent-branch-item.tsx b/cli/src/components/blocks/agent-branch-item.tsx index 44d082c4ee..7661bd1be9 100644 --- a/cli/src/components/blocks/agent-branch-item.tsx +++ b/cli/src/components/blocks/agent-branch-item.tsx @@ -125,7 +125,7 @@ export const AgentBranchItem = memo((props: AgentBranchItemProps) => { if (React.isValidElement(value)) { if (value.key === null || value.key === undefined) { return ( - + {value} ) @@ -135,7 +135,7 @@ export const AgentBranchItem = memo((props: AgentBranchItemProps) => { if (Array.isArray(value)) { return ( - + {value.map((child, idx) => ( { } return ( - + {value} ) diff --git a/cli/src/components/blocks/agent-branch-wrapper.tsx b/cli/src/components/blocks/agent-branch-wrapper.tsx index 3b336735fa..46bae0bf43 100644 --- a/cli/src/components/blocks/agent-branch-wrapper.tsx +++ b/cli/src/components/blocks/agent-branch-wrapper.tsx @@ -9,7 +9,7 @@ import React, { import { AgentBlockGrid } from './agent-block-grid' import { AgentBranchItem } from './agent-branch-item' -import { trimTrailingNewlines, sanitizePreview } from './block-helpers' +import { trimNewlines, sanitizePreview } from './block-helpers' import { ContentWithMarkdown } from './content-with-markdown' import { ImplementorGroup } from './implementor-row' import { ThinkingBlock } from './thinking-block' @@ -18,7 +18,6 @@ import { useTheme } from '../../hooks/use-theme' import { useChatStore } from '../../state/chat-store' import { isTextBlock } from '../../types/chat' import { getAgentStatusInfo } from '../../utils/agent-helpers' -import { extractHtmlBlockMargins } from '../../utils/block-margins' import { processBlocks, type BlockProcessorHandlers, @@ -248,11 +247,12 @@ const AgentBody = memo( const isNestedStreamingText = p.parentIsStreaming || nestedStatus === 'running' const filteredNestedContent = isNestedStreamingText - ? trimTrailingNewlines(textBlock.content) + ? trimNewlines(textBlock.content) : textBlock.content.trim() + if (!filteredNestedContent) { + return null + } const markdownOptionsForLevel = p.getAgentMarkdownOptions(0) - const marginTop = textBlock.marginTop ?? 0 - const marginBottom = textBlock.marginBottom ?? 0 const explicitColor = textBlock.color const nestedTextColor = explicitColor ?? p.theme.foreground @@ -262,8 +262,6 @@ const AgentBody = memo( style={{ wrapMode: 'word', fg: nestedTextColor, - marginTop, - marginBottom, }} > {htmlBlock.render({ @@ -390,7 +384,6 @@ export const AgentBranchWrapper = memo( flexDirection: 'column', gap: 0, width: '100%', - marginTop: 1, }} > diff --git a/cli/src/components/blocks/block-helpers.ts b/cli/src/components/blocks/block-helpers.ts index 193d110d60..681d771fdd 100644 --- a/cli/src/components/blocks/block-helpers.ts +++ b/cli/src/components/blocks/block-helpers.ts @@ -1,6 +1,6 @@ -export function trimTrailingNewlines(str: string): string { - return str.replace(/\n+$/, '') +export function trimNewlines(str: string): string { + return str.replace(/^\n+|\n+$/g, '') } export function sanitizePreview(text: string): string { diff --git a/cli/src/components/blocks/image-block.tsx b/cli/src/components/blocks/image-block.tsx index 761295709f..6aada062ed 100644 --- a/cli/src/components/blocks/image-block.tsx +++ b/cli/src/components/blocks/image-block.tsx @@ -62,7 +62,7 @@ export const ImageBlock = memo(({ block, availableWidth }: ImageBlockProps) => { if (inlineSequence) { // Render inline image using terminal escape sequence return ( - + {/* Image caption/metadata */} šŸ“· @@ -84,8 +84,6 @@ export const ImageBlock = memo(({ block, availableWidth }: ImageBlockProps) => { style={{ flexDirection: 'column', gap: 0, - marginTop: 1, - marginBottom: 1, paddingLeft: 1, borderStyle: 'single', borderColor: theme.border, diff --git a/cli/src/components/blocks/single-block.tsx b/cli/src/components/blocks/single-block.tsx index e646e15ed1..021c7c3212 100644 --- a/cli/src/components/blocks/single-block.tsx +++ b/cli/src/components/blocks/single-block.tsx @@ -4,12 +4,11 @@ import React, { memo, type ReactNode } from 'react' import { AgentBranchWrapper } from './agent-branch-wrapper' import { AgentListBranch } from './agent-list-branch' import { AskUserBranch } from './ask-user-branch' -import { trimTrailingNewlines, isReasoningTextBlock } from './block-helpers' +import { trimNewlines, isReasoningTextBlock } from './block-helpers' import { ContentWithMarkdown } from './content-with-markdown' import { ImageBlock } from './image-block' import { UserBlockTextWithInlineCopy } from './user-content-copy' import { useTheme } from '../../hooks/use-theme' -import { extractTextBlockMargins, extractHtmlBlockMargins } from '../../utils/block-margins' import { PlanBox } from '../renderers/plan-box' import type { @@ -68,11 +67,12 @@ export const SingleBlock = memo( const textBlock = block as TextContentBlock const isStreamingText = isLoading || !isComplete const filteredContent = isStreamingText - ? trimTrailingNewlines(textBlock.content) + ? trimNewlines(textBlock.content) : textBlock.content.trim() + if (!filteredContent) { + return null + } const renderKey = `${messageId}-text-${idx}` - const prevBlock = idx > 0 && blocks ? blocks[idx - 1] : null - const { marginTop, marginBottom } = extractTextBlockMargins(textBlock, prevBlock) const explicitColor = textBlock.color const blockTextColor = explicitColor ?? textColor @@ -86,8 +86,8 @@ export const SingleBlock = memo( textColor={blockTextColor} codeBlockWidth={codeBlockWidth} palette={markdownPalette} - marginTop={marginTop} - marginBottom={marginBottom} + marginTop={0} + marginBottom={0} /> ) } @@ -98,8 +98,6 @@ export const SingleBlock = memo( style={{ wrapMode: 'word', fg: blockTextColor, - marginTop, - marginBottom, }} attributes={isUser ? TextAttributes.ITALIC : undefined} > @@ -129,15 +127,12 @@ export const SingleBlock = memo( } case 'html': { - const { marginTop, marginBottom } = extractHtmlBlockMargins(block) return ( diff --git a/cli/src/components/blocks/tool-block-group.tsx b/cli/src/components/blocks/tool-block-group.tsx index ec215d6eb1..1da064412d 100644 --- a/cli/src/components/blocks/tool-block-group.tsx +++ b/cli/src/components/blocks/tool-block-group.tsx @@ -9,46 +9,20 @@ interface ToolBlockGroupProps { toolBlocks: Extract[] keyPrefix: string startIndex: number + /** @deprecated No longer used for margin calculation */ nextIndex: number + /** @deprecated No longer used for margin calculation */ siblingBlocks: ContentBlock[] availableWidth: number onToggleCollapsed: (id: string) => void markdownPalette: MarkdownPalette } -const isRenderableTimelineBlock = ( - block: ContentBlock | null | undefined, -): boolean => { - if (!block) { - return false - } - - if (block.type === 'tool') { - return block.toolName !== 'end_turn' - } - - switch (block.type) { - case 'text': - case 'html': - case 'agent': - case 'agent-list': - case 'plan': - case 'mode-divider': - case 'ask-user': - case 'image': - return true - default: - return false - } -} - export const ToolBlockGroup = memo( ({ toolBlocks, keyPrefix, startIndex, - nextIndex, - siblingBlocks, availableWidth, onToggleCollapsed, markdownPalette, @@ -68,24 +42,12 @@ export const ToolBlockGroup = memo( if (groupNodes.length === 0) return null - const hasRenderableBefore = - startIndex > 0 && isRenderableTimelineBlock(siblingBlocks[startIndex - 1]) - let hasRenderableAfter = false - for (let i = nextIndex; i < siblingBlocks.length; i++) { - if (isRenderableTimelineBlock(siblingBlocks[i])) { - hasRenderableAfter = true - break - } - } - return ( {groupNodes} diff --git a/cli/src/components/blocks/user-content-copy.tsx b/cli/src/components/blocks/user-content-copy.tsx index e23bc65a38..256b8177f9 100644 --- a/cli/src/components/blocks/user-content-copy.tsx +++ b/cli/src/components/blocks/user-content-copy.tsx @@ -2,7 +2,7 @@ import { TextAttributes } from '@opentui/core' import React, { memo } from 'react' import { CopyButton } from '../copy-button' -import { trimTrailingNewlines } from './block-helpers' +import { trimNewlines } from './block-helpers' import { ContentWithMarkdown } from './content-with-markdown' import type { MarkdownPalette } from '../../utils/markdown-renderer' @@ -33,7 +33,7 @@ export const UserContentWithCopyButton = memo( }: UserContentWithCopyButtonProps) => { const isStreamingMessage = isLoading || !isComplete const normalizedContent = isStreamingMessage - ? trimTrailingNewlines(content) + ? trimNewlines(content) : content.trim() const hasContent = normalizedContent.length > 0 diff --git a/cli/src/components/message-block.tsx b/cli/src/components/message-block.tsx index 7907875868..90fbc89533 100644 --- a/cli/src/components/message-block.tsx +++ b/cli/src/components/message-block.tsx @@ -264,9 +264,8 @@ export const MessageBlock = memo(({ diff --git a/cli/src/components/tools/diff-viewer.tsx b/cli/src/components/tools/diff-viewer.tsx index d528c28054..72ee7361f3 100644 --- a/cli/src/components/tools/diff-viewer.tsx +++ b/cli/src/components/tools/diff-viewer.tsx @@ -50,7 +50,7 @@ const lineColor = ( export const DiffViewer = ({ diffText }: DiffViewerProps) => { const theme = useTheme() - const lines = diffText.split('\n') + const lines = diffText.trim().split('\n') return ( { {todo.completed ? ( <> - āœ“ + āœ“ { ) : ( <> - ☐ + ☐ {todo.task} )} diff --git a/common/src/types/contracts/llm.ts b/common/src/types/contracts/llm.ts index c38695fe1f..44e8f4d4e3 100644 --- a/common/src/types/contracts/llm.ts +++ b/common/src/types/contracts/llm.ts @@ -25,6 +25,13 @@ export type StreamChunk = > | { type: 'error'; message: string } +export type CacheDebugUsageData = { + inputTokens: number + outputTokens: number + cachedInputTokens: number + totalTokens: number +} + export type PromptAiSdkStreamFn = ( params: { apiKey: string @@ -45,6 +52,7 @@ export type PromptAiSdkStreamFn = ( rawBody: unknown normalizedBody?: unknown }) => void + onCacheDebugUsageReceived?: (usage: CacheDebugUsageData) => void includeCacheControl?: boolean cacheDebugCorrelation?: string agentProviderOptions?: OpenRouterProviderRoutingOptions @@ -79,6 +87,7 @@ export type PromptAiSdkFn = ( rawBody: unknown normalizedBody?: unknown }) => void + onCacheDebugUsageReceived?: (usage: CacheDebugUsageData) => void includeCacheControl?: boolean cacheDebugCorrelation?: string agentProviderOptions?: OpenRouterProviderRoutingOptions @@ -114,6 +123,7 @@ export type PromptAiSdkStructuredInput = { rawBody: unknown normalizedBody?: unknown }) => void + onCacheDebugUsageReceived?: (usage: CacheDebugUsageData) => void includeCacheControl?: boolean cacheDebugCorrelation?: string agentProviderOptions?: OpenRouterProviderRoutingOptions diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index f0e21a6392..d7ca6de62c 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.5", + "version": "0.0.7", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { diff --git a/freebuff/web/src/app/home-client.tsx b/freebuff/web/src/app/home-client.tsx index e58705f68f..bcef00bf97 100644 --- a/freebuff/web/src/app/home-client.tsx +++ b/freebuff/web/src/app/home-client.tsx @@ -2,73 +2,29 @@ import { AnimatePresence, motion } from 'framer-motion' import { - Terminal, - Brain, - Scissors, - Zap, - MessageSquare, - FileText, ChevronDown, } from 'lucide-react' import { useState } from 'react' -import Link from 'next/link' import { BackgroundBeams } from '@/components/background-beams' import { CopyButton } from '@/components/copy-button' import { HeroGrid } from '@/components/hero-grid' -import { TerminalDemo } from '@/components/terminal-demo' -import { Button } from '@/components/ui/button' import { cn } from '@/lib/utils' const INSTALL_COMMAND = 'npm install -g freebuff' -const features = [ - { - icon: Brain, - title: 'Deep Codebase Understanding', - description: - 'Indexes your entire project to generate code that fits your patterns and conventions.', - }, - { - icon: Scissors, - title: 'Surgical Code Edits', - description: - "Makes precise changes across files while respecting your codebase's structure.", - }, - { - icon: Terminal, - title: 'Terminal Integration', - description: - 'Runs commands on your behalf — install packages, run tests, and more.', - }, - { - icon: FileText, - title: 'Knowledge Files', - description: - 'Add knowledge.md to teach Freebuff about your project conventions.', - }, - { - icon: MessageSquare, - title: 'Chat History', - description: - 'Resume past conversations and pick up right where you left off.', - }, - { - icon: Zap, - title: 'Custom Agents', - description: - 'Load custom agents from your .agents/ directory for specialized workflows.', - }, -] - -const headlineWords = ["The", "strongest"] -const greenWords = ["free", "coding", "agent."] +const headlineWords = ["The", "free", "coding", "agent"] const faqs = [ { - question: 'Is it really free?', + question: 'How can it be free?', answer: - 'Yes! Freebuff is completely free to use. The service is supported by ads shown in the CLI.', + 'Freebuff is supported by ads shown in the CLI.', + }, + { + question: 'What models do you use?', + answer: + 'MiniMax M2.5 as the main coding agent, Gemini 3.1 Flash Lite for finding files and research, and GPT-5.4 for deep thinking if you connect your ChatGPT subscription.', }, { question: 'Are you training on my data?', @@ -80,11 +36,6 @@ const faqs = [ answer: "We don't store your codebase. We only collect minimal logs for debugging purposes.", }, - { - question: 'What model do you use?', - answer: - 'We use multiple models: MiniMax M2.5 as the main coding agent, Gemini 3.1 Flash Lite for finding files, and GPT-5.4 for deep thinking if you connect your ChatGPT subscription.', - }, ] function InstallCommand({ className }: { className?: string }) { @@ -156,6 +107,12 @@ function FAQList() { ) } +const PHILOSOPHY_WORDS = [ + { word: 'FAST', description: '3Ɨ the speed of Claude Code' }, + { word: 'SIMPLE', description: 'No modes. No config. Just code.' }, + { word: 'LOADED', description: 'Web research, browser use, and more — built in' }, +] + const wordVariant = { initial: { opacity: 0, y: 30, filter: 'blur(8px)' }, animate: { @@ -184,24 +141,6 @@ export default function HomeClient() { {/* Hero content */}
- {/* Pill badge */} - -
- - - - - - 100% Free - -
-
- {/* Headline with staggered word animation */} - - {headlineWords.map((word, i) => ( - - {word} - - ))} - - {greenWords.map((word, i) => ( + {headlineWords.map((word, i) => ( {word} @@ -244,7 +172,7 @@ export default function HomeClient() { transition={{ duration: 0.6, delay: 0.8 }} className="text-lg md:text-xl text-zinc-400 max-w-2xl mx-auto mb-10 leading-relaxed" > - No subscription. No API key. 5x faster than Claude Code. + No subscription. No configuration. Start in seconds. {/* Install command */} @@ -256,53 +184,30 @@ export default function HomeClient() { > - - {/* Terminal demo */} -
{/* Bottom fade */}
- {/* Divider */} -
- - {/* ─── Features Section ─── */} -
-
- -

- Everything you need. Nothing you don't. -

-

- Freebuff brings the full power of an AI coding agent to your - terminal — completely free. -

-
- -
- {features.map((feature, i) => ( + {/* ─── Philosophy Section ─── */} +
+
+
+ {PHILOSOPHY_WORDS.map((item, i) => ( -
- +
+ {item.word}
-

{feature.title}

-

- {feature.description} +

+ {item.description}

))} @@ -310,6 +215,9 @@ export default function HomeClient() {
+ {/* Divider */} +
+ {/* ─── FAQ Section ─── */}
@@ -328,42 +236,6 @@ export default function HomeClient() {
- - {/* Divider */} -
- - {/* ─── CTA Section ─── */} -
-
-
- -

- Start coding for free -

-

- No credit card. No trial period. Just install and go. -

- - - -

- Want more power?{' '} - - Check out Codebuff - {' '} - for premium models and higher limits. -

-
-
-
) } diff --git a/freebuff/web/src/components/footer.tsx b/freebuff/web/src/components/footer.tsx index 90886eba6a..1b9587dbec 100644 --- a/freebuff/web/src/components/footer.tsx +++ b/freebuff/web/src/components/footer.tsx @@ -20,7 +20,7 @@ export function Footer() {

- The world's strongest free coding agent. + The free coding agent

@@ -28,10 +28,11 @@ export function Footer() {

Links

diff --git a/packages/agent-runtime/src/prompt-agent-stream.ts b/packages/agent-runtime/src/prompt-agent-stream.ts index eaa8e70688..13d0ba2b11 100644 --- a/packages/agent-runtime/src/prompt-agent-stream.ts +++ b/packages/agent-runtime/src/prompt-agent-stream.ts @@ -3,7 +3,7 @@ import { globalStopSequence } from './constants' import type { AgentTemplate } from './templates/types' import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics' import type { SendActionFn } from '@codebuff/common/types/contracts/client' -import type { PromptAiSdkStreamFn } from '@codebuff/common/types/contracts/llm' +import type { CacheDebugUsageData, PromptAiSdkStreamFn } from '@codebuff/common/types/contracts/llm' import type { Logger } from '@codebuff/common/types/contracts/logger' import type { ParamsOf } from '@codebuff/common/types/function-params' import type { Message } from '@codebuff/common/types/messages/codebuff-message' @@ -32,6 +32,7 @@ export const getAgentStreamFromTemplate = (params: { rawBody: unknown normalizedBody?: unknown }) => void + onCacheDebugUsageReceived?: (usage: CacheDebugUsageData) => void onCostCalculated?: (credits: number) => Promise promptAiSdkStream: PromptAiSdkStreamFn @@ -55,6 +56,7 @@ export const getAgentStreamFromTemplate = (params: { userInputId, cacheDebugCorrelation, onCacheDebugProviderRequestBuilt, + onCacheDebugUsageReceived, sendAction, onCostCalculated, @@ -90,6 +92,7 @@ export const getAgentStreamFromTemplate = (params: { userInputId, cacheDebugCorrelation, onCacheDebugProviderRequestBuilt, + onCacheDebugUsageReceived, onCostCalculated, sendAction, diff --git a/packages/agent-runtime/src/run-agent-step.ts b/packages/agent-runtime/src/run-agent-step.ts index 8e9eaf946d..b323d5f0f5 100644 --- a/packages/agent-runtime/src/run-agent-step.ts +++ b/packages/agent-runtime/src/run-agent-step.ts @@ -23,6 +23,7 @@ import { getAgentOutput } from './util/agent-output' import { createCacheDebugSnapshot, enrichCacheDebugSnapshotWithProviderRequest, + enrichCacheDebugSnapshotWithUsage, } from './util/cache-debug' import { withSystemInstructionTags, @@ -39,7 +40,7 @@ import type { FinishAgentRunFn, StartAgentRunFn, } from '@codebuff/common/types/contracts/database' -import type { PromptAiSdkFn } from '@codebuff/common/types/contracts/llm' +import type { CacheDebugUsageData, PromptAiSdkFn } from '@codebuff/common/types/contracts/llm' import type { Logger } from '@codebuff/common/types/contracts/logger' import type { ParamsExcluding, @@ -312,6 +313,17 @@ export const runAgentStep = async ( } : undefined + const onCacheDebugUsageReceived = + cacheDebugCorrelation + ? (usage: CacheDebugUsageData) => { + enrichCacheDebugSnapshotWithUsage({ + correlation: cacheDebugCorrelation, + usage, + logger, + }) + } + : undefined + logger.debug( { iteration: iterationNum, @@ -343,6 +355,7 @@ export const runAgentStep = async ( ? serializeCacheDebugCorrelation(cacheDebugCorrelation) : undefined, onCacheDebugProviderRequestBuilt, + onCacheDebugUsageReceived, }) if (result.aborted) { @@ -399,6 +412,7 @@ export const runAgentStep = async ( includeCacheControl: supportsCacheControl(agentTemplate.model), messages: [systemMessage(system), ...agentState.messageHistory], onCacheDebugProviderRequestBuilt, + onCacheDebugUsageReceived, template: agentTemplate, onCostCalculated, }) diff --git a/packages/agent-runtime/src/util/cache-debug.ts b/packages/agent-runtime/src/util/cache-debug.ts index 826349a789..686dd67d74 100644 --- a/packages/agent-runtime/src/util/cache-debug.ts +++ b/packages/agent-runtime/src/util/cache-debug.ts @@ -5,6 +5,7 @@ import { dirname, join } from 'path' import { type CacheDebugCorrelation, } from '@codebuff/common/util/cache-debug' +import type { CacheDebugUsageData } from '@codebuff/common/types/contracts/llm' import type { Logger } from '@codebuff/common/types/contracts/logger' import type { Message } from '@codebuff/common/types/messages/codebuff-message' import type { ProviderMetadata } from '@codebuff/common/types/messages/provider-metadata' @@ -50,6 +51,7 @@ export type CacheDebugSnapshot = { toolsHash?: string preConversion: CacheDebugPreConversionSnapshot providerRequest?: CacheDebugProviderRequestSnapshot + usage?: CacheDebugUsageData } function getCacheDebugDir(projectRoot: string) { @@ -241,6 +243,42 @@ export function createCacheDebugSnapshot(params: { return { snapshotId, filename, projectRoot } } +export function enrichCacheDebugSnapshotWithUsage(params: { + correlation: CacheDebugCorrelation + usage: CacheDebugUsageData + logger: Logger +}) { + const { correlation, usage, logger } = params + try { + const existing = loadSnapshot({ + projectRoot: correlation.projectRoot, + filename: correlation.filename, + }) + if (!existing) { + logger.warn( + `[Cache Debug] Could not find snapshot ${correlation.filename} to enrich with usage`, + ) + return + } + + if (existing.id !== correlation.snapshotId) { + logger.warn( + `[Cache Debug] Snapshot ID mismatch while enriching ${correlation.filename} with usage`, + ) + return + } + + const updated: CacheDebugSnapshot = { + ...existing, + usage, + } + + writeSnapshot({ snapshot: updated, logger }) + } catch (err) { + logger.warn({ error: err }, '[Cache Debug] Failed to enrich snapshot with usage') + } +} + export function enrichCacheDebugSnapshotWithProviderRequest(params: { correlation: CacheDebugCorrelation provider: string diff --git a/scripts/compare-cache-debug.ts b/scripts/compare-cache-debug.ts index a0d1f72c82..8e8f7f897d 100644 --- a/scripts/compare-cache-debug.ts +++ b/scripts/compare-cache-debug.ts @@ -4,10 +4,14 @@ * Compare sequential cache debug snapshots to find what's causing prompt cache misses. * * Usage: - * bun scripts/compare-cache-debug.ts [directory] [--agent ] + * bun scripts/compare-cache-debug.ts [directory] [--agent ] [--run ] [--cross-run] * * Options: - * --agent Only compare snapshots from this agent type (e.g. base2) + * --agent Only compare snapshots from this agent type (e.g. base2) + * --run Only compare snapshots from this specific run + * --cross-run Compare all snapshots sequentially (old behavior, across runs) + * + * Default: groups snapshots by runId and compares consecutive steps within each run. * * Default directory: debug/cache-debug/ * @@ -50,6 +54,12 @@ interface Snapshot { rawBody: unknown normalized: unknown } + usage?: { + inputTokens: number + outputTokens: number + cachedInputTokens: number + totalTokens: number + } } function findFirstDifference( @@ -134,6 +144,20 @@ function printSectionHeader(title: string) { console.log(`${'─'.repeat(80)}`) } +function stripCacheControlFromMessage(msg: unknown): unknown { + if (!msg || typeof msg !== 'object') return msg + const obj = JSON.parse(JSON.stringify(msg)) + delete obj.cache_control + if (Array.isArray(obj.content)) { + for (const part of obj.content) { + if (part && typeof part === 'object') { + delete part.cache_control + } + } + } + return obj +} + function compareProviderRequests( prev: Snapshot['providerRequest'], curr: Snapshot['providerRequest'], @@ -199,12 +223,26 @@ function compareProviderRequests( console.log(` āœ… messages: identical (${prevMsgs.length} messages)`) } else { console.log(` āŒ messages: differ (${prevMsgs.length} → ${currMsgs.length})`) + + // Compare with cache_control stripped to check structural stability const minLen = Math.min(prevMsgs.length, currMsgs.length) + let firstRawDiff = -1 + let firstStructDiff = -1 for (let i = 0; i < minLen; i++) { - if (JSON.stringify(prevMsgs[i]) !== JSON.stringify(currMsgs[i])) { - console.log(` First diff at message index ${i}`) - break + if (firstRawDiff < 0 && JSON.stringify(prevMsgs[i]) !== JSON.stringify(currMsgs[i])) { + firstRawDiff = i } + if (firstStructDiff < 0 && JSON.stringify(stripCacheControlFromMessage(prevMsgs[i])) !== JSON.stringify(stripCacheControlFromMessage(currMsgs[i]))) { + firstStructDiff = i + } + } + if (firstRawDiff >= 0) { + console.log(` First raw diff at message index ${firstRawDiff}`) + } + if (firstStructDiff >= 0) { + console.log(` First structural diff (ignoring cache_control) at message index ${firstStructDiff}`) + } else if (prevMsgs.length === currMsgs.length) { + console.log(` āœ… Structurally identical (only cache_control placement differs)`) } if (prevMsgs.length !== currMsgs.length) { console.log(` Message count: ${prevMsgs.length} → ${currMsgs.length}`) @@ -218,7 +256,7 @@ function compareProviderRequests( function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile: string) { printSectionHeader( - `Comparing snapshot ${prev.index} → ${curr.index} (${prev.agentType})`, + `Comparing step ${prev.index} → ${curr.index} (${prev.agentType})`, ) console.log(` File A: ${prevFile}`) console.log(` File B: ${currFile}`) @@ -229,8 +267,15 @@ function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile: if (prev.systemHash || curr.systemHash) { console.log(` Hashes: system=${prev.systemHash ?? '?'}→${curr.systemHash ?? '?'} tools=${prev.toolsHash ?? '?'}→${curr.toolsHash ?? '?'}`) } - if (prev.runId || curr.runId) { - console.log(` RunId: ${prev.runId ?? '?'} → ${curr.runId ?? '?'}`) + for (const snap of [{ label: 'A', data: prev }, { label: 'B', data: curr }]) { + if (snap.data.usage) { + const u = snap.data.usage + const hitRate = u.inputTokens > 0 ? ((u.cachedInputTokens / u.inputTokens) * 100).toFixed(1) : '0.0' + console.log(` Usage ${snap.label}: ${u.inputTokens} in, ${u.outputTokens} out, ${u.cachedInputTokens} cached (${hitRate}% cache hit)`) + } + } + if (prev.runId !== curr.runId) { + console.log(` āš ļø Different runs: ${prev.runId ?? '?'} → ${curr.runId ?? '?'}`) } const prevSystem = prev.preConversion.systemPrompt @@ -323,11 +368,6 @@ function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile: console.log('\n šŸŽÆ Cache Verdict:') const systemIdentical = prevSystem === currSystem const toolsIdentical = prevToolJson === currToolJson - const providerNormIdentical = - prev.providerRequest && curr.providerRequest - ? JSON.stringify(prev.providerRequest.normalized) === - JSON.stringify(curr.providerRequest.normalized) - : undefined if (systemIdentical && toolsIdentical) { console.log( @@ -340,40 +380,54 @@ function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile: console.log(` āŒ PRE-CONVERSION CACHE MISS expected — ${causes.join(' and ')}`) } - if (providerNormIdentical === true) { - console.log( - ' āœ… Post-conversion (provider) request bodies are IDENTICAL', - ) - } else if (providerNormIdentical === false) { - console.log( - ' āŒ Post-conversion (provider) request bodies DIFFER — conversion layer may be introducing instability', - ) - if (systemIdentical && toolsIdentical) { - console.log( - ' āš ļø Pre-conversion was identical but post-conversion differs — bug is in the conversion layer!', - ) + // Check post-conversion structural stability (ignoring cache_control positions) + if (prev.providerRequest?.normalized && curr.providerRequest?.normalized) { + const prevObj = prev.providerRequest.normalized as Record + const currObj = curr.providerRequest.normalized as Record + if (Array.isArray(prevObj.messages) && Array.isArray(currObj.messages)) { + const prevMsgs = prevObj.messages as unknown[] + const currMsgs = currObj.messages as unknown[] + const minLen = Math.min(prevMsgs.length, currMsgs.length) + let sharedStructural = 0 + for (let i = 0; i < minLen; i++) { + if (JSON.stringify(stripCacheControlFromMessage(prevMsgs[i])) === JSON.stringify(stripCacheControlFromMessage(currMsgs[i]))) { + sharedStructural++ + } else { + break + } + } + console.log(` šŸ“Š Post-conversion shared prefix: ${sharedStructural}/${minLen} messages (ignoring cache_control)`) + if (sharedStructural < minLen && systemIdentical && toolsIdentical) { + console.log(` āš ļø Structural content differs in shared prefix — possible conversion issue`) + } } } } -function parseArgs(): { dir: string; agentFilter?: string } { +function parseArgs(): { dir: string; agentFilter?: string; runFilter?: string; crossRun: boolean } { const args = process.argv.slice(2) let dir = join(process.cwd(), 'debug', 'cache-debug') let agentFilter: string | undefined + let runFilter: string | undefined + let crossRun = false for (let i = 0; i < args.length; i++) { if (args[i] === '--agent' && i + 1 < args.length) { agentFilter = args[++i] + } else if (args[i] === '--run' && i + 1 < args.length) { + runFilter = args[++i] + } else if (args[i] === '--cross-run') { + crossRun = true } else if (!args[i].startsWith('--')) { dir = args[i] } } - return { dir, agentFilter } + return { dir, agentFilter, runFilter, crossRun } } function main() { - const { dir, agentFilter } = parseArgs() + const { dir, agentFilter, runFilter, crossRun } = parseArgs() let files: string[] try { @@ -408,46 +462,118 @@ function main() { allSnapshots = allSnapshots.filter( (s) => s.snapshot.agentType === agentFilter, ) - console.log( - `Filtered to ${allSnapshots.length} snapshot(s) for agent type: ${agentFilter}`, + } + + if (runFilter) { + allSnapshots = allSnapshots.filter( + (s) => s.snapshot.runId === runFilter || s.snapshot.runId?.startsWith(runFilter), ) - } else { - console.log(`Found ${allSnapshots.length} snapshot(s) in ${dir}`) - const agentTypes = [...new Set(allSnapshots.map((s) => s.snapshot.agentType))] - if (agentTypes.length > 1) { - console.log( - `\nāš ļø Multiple agent types found: ${agentTypes.join(', ')}`, - ) - console.log( - ' Use --agent to filter (e.g. --agent base2)', - ) - } + } + + console.log(`Found ${allSnapshots.length} snapshot(s) in ${dir}`) + if (agentFilter) { + console.log(` Filtered to agent type: ${agentFilter}`) + } + if (runFilter) { + console.log(` Filtered to run: ${runFilter}`) } const withProviderRequest = allSnapshots.filter((s) => s.snapshot.providerRequest !== undefined).length console.log(` Provider request data: ${withProviderRequest}/${allSnapshots.length} snapshots`) - console.log( - '\nFiles:', - allSnapshots.map((s) => ` ${s.filename}`).join('\n'), - ) - if (allSnapshots.length < 2) { console.error('\nNeed at least 2 snapshots to compare. Send another prompt.') process.exit(1) } - for (let i = 1; i < allSnapshots.length; i++) { - comparePair( - allSnapshots[i - 1].snapshot, - allSnapshots[i].snapshot, - allSnapshots[i - 1].filename, - allSnapshots[i].filename, + if (crossRun) { + // Old behavior: compare all snapshots sequentially + console.log('\nMode: cross-run (comparing all snapshots sequentially)') + console.log( + '\nFiles:', + allSnapshots.map((s) => ` ${s.filename}`).join('\n'), ) + + let totalPairs = 0 + for (let i = 1; i < allSnapshots.length; i++) { + comparePair( + allSnapshots[i - 1].snapshot, + allSnapshots[i].snapshot, + allSnapshots[i - 1].filename, + allSnapshots[i].filename, + ) + totalPairs++ + } + + console.log(`\n${'═'.repeat(80)}`) + console.log(` Summary: compared ${totalPairs} consecutive pair(s) across all runs`) + console.log(`${'═'.repeat(80)}\n`) + return + } + + // Default: group by runId and compare within each run + const byRun = new Map>() + const noRunId: Array<{ snapshot: Snapshot; filename: string }> = [] + + for (const s of allSnapshots) { + const runId = s.snapshot.runId + if (!runId) { + noRunId.push(s) + continue + } + if (!byRun.has(runId)) { + byRun.set(runId, []) + } + byRun.get(runId)!.push(s) + } + + // Filter to runs with at least 2 steps + const multiStepRuns = [...byRun.entries()].filter(([, snaps]) => snaps.length >= 2) + const singleStepRuns = [...byRun.entries()].filter(([, snaps]) => snaps.length < 2) + + console.log(`\n Runs: ${byRun.size} total, ${multiStepRuns.length} with multiple steps`) + if (singleStepRuns.length > 0) { + console.log(` Skipping ${singleStepRuns.length} single-step run(s)`) + } + if (noRunId.length > 0) { + console.log(` Skipping ${noRunId.length} snapshot(s) without runId`) + } + + let totalPairs = 0 + + for (const [runId, snaps] of multiStepRuns) { + // Sort by index (step number), then by timestamp as tiebreaker + snaps.sort((a, b) => { + if (a.snapshot.index !== b.snapshot.index) { + return a.snapshot.index - b.snapshot.index + } + return a.snapshot.timestamp.localeCompare(b.snapshot.timestamp) + }) + + console.log(`\n${'═'.repeat(80)}`) + console.log(` Run: ${runId} (${snaps.length} steps)`) + console.log(` Agent: ${snaps[0].snapshot.agentType} Model: ${snaps[0].snapshot.model ?? 'unknown'}`) + console.log(`${'═'.repeat(80)}`) + + // Print step overview + for (const s of snaps) { + console.log(` Step ${s.snapshot.index}: ${s.snapshot.preConversion.messages.length} msgs (${s.filename})`) + } + + // Compare consecutive steps + for (let i = 1; i < snaps.length; i++) { + comparePair( + snaps[i - 1].snapshot, + snaps[i].snapshot, + snaps[i - 1].filename, + snaps[i].filename, + ) + totalPairs++ + } } console.log(`\n${'═'.repeat(80)}`) - console.log(` Summary: compared ${allSnapshots.length - 1} consecutive pair(s)`) + console.log(` Summary: compared ${totalPairs} consecutive step pair(s) across ${multiStepRuns.length} run(s)`) console.log(`${'═'.repeat(80)}\n`) } diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts index 2e6e7624ed..37ed3a13b8 100644 --- a/sdk/src/impl/llm.ts +++ b/sdk/src/impl/llm.ts @@ -216,6 +216,30 @@ function emitCacheDebugProviderRequest(params: { }) } +function emitCacheDebugUsage(params: { + callback?: (usage: { + inputTokens: number + outputTokens: number + cachedInputTokens: number + totalTokens: number + }) => void + usage: { + inputTokens?: number + outputTokens?: number + totalTokens?: number + cachedInputTokens?: number + } +}) { + if (!params.callback) return + + params.callback({ + inputTokens: params.usage.inputTokens ?? 0, + outputTokens: params.usage.outputTokens ?? 0, + cachedInputTokens: params.usage.cachedInputTokens ?? 0, + totalTokens: params.usage.totalTokens ?? 0, + }) +} + export async function* promptAiSdkStream( params: ParamsOf & { skipClaudeOAuth?: boolean @@ -587,6 +611,12 @@ export async function* promptAiSdkStream( rawBody: requestMetadata.body, }) + const usageResult = await response.usage + emitCacheDebugUsage({ + callback: params.onCacheDebugUsageReceived, + usage: usageResult, + }) + // Skip cost tracking for Claude OAuth (user is on their own subscription) if (!isClaudeOAuth) { const providerMetadataResult = await response.providerMetadata @@ -654,6 +684,10 @@ export async function promptAiSdk( provider: getModelProvider(aiSDKModel), rawBody: response.request?.body, }) + emitCacheDebugUsage({ + callback: params.onCacheDebugUsageReceived, + usage: response.usage, + }) const content = response.text const providerMetadata = response.providerMetadata ?? {} @@ -719,6 +753,10 @@ export async function promptAiSdkStructured( provider: getModelProvider(aiSDKModel), rawBody: response.request?.body, }) + emitCacheDebugUsage({ + callback: params.onCacheDebugUsageReceived, + usage: response.usage, + }) const content = response.object