From b88db4e7ef493a4c739240a889807e1fb77a8c28 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 9 Mar 2026 17:03:02 -0700 Subject: [PATCH 01/15] cli: Trim new lines before/after assistant message --- cli/src/components/blocks/agent-branch-wrapper.tsx | 4 ++-- cli/src/components/blocks/block-helpers.ts | 4 ++-- cli/src/components/blocks/single-block.tsx | 4 ++-- cli/src/components/blocks/user-content-copy.tsx | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cli/src/components/blocks/agent-branch-wrapper.tsx b/cli/src/components/blocks/agent-branch-wrapper.tsx index 3b336735fa..e788ba5464 100644 --- a/cli/src/components/blocks/agent-branch-wrapper.tsx +++ b/cli/src/components/blocks/agent-branch-wrapper.tsx @@ -9,7 +9,7 @@ import React, { import { AgentBlockGrid } from './agent-block-grid' import { AgentBranchItem } from './agent-branch-item' -import { trimTrailingNewlines, sanitizePreview } from './block-helpers' +import { trimNewlines, sanitizePreview } from './block-helpers' import { ContentWithMarkdown } from './content-with-markdown' import { ImplementorGroup } from './implementor-row' import { ThinkingBlock } from './thinking-block' @@ -248,7 +248,7 @@ const AgentBody = memo( const isNestedStreamingText = p.parentIsStreaming || nestedStatus === 'running' const filteredNestedContent = isNestedStreamingText - ? trimTrailingNewlines(textBlock.content) + ? trimNewlines(textBlock.content) : textBlock.content.trim() const markdownOptionsForLevel = p.getAgentMarkdownOptions(0) const marginTop = textBlock.marginTop ?? 0 diff --git a/cli/src/components/blocks/block-helpers.ts b/cli/src/components/blocks/block-helpers.ts index 193d110d60..681d771fdd 100644 --- a/cli/src/components/blocks/block-helpers.ts +++ b/cli/src/components/blocks/block-helpers.ts @@ -1,6 +1,6 @@ -export function trimTrailingNewlines(str: string): string { - return str.replace(/\n+$/, '') +export function trimNewlines(str: string): string { + return str.replace(/^\n+|\n+$/g, '') } export function sanitizePreview(text: string): string { diff --git a/cli/src/components/blocks/single-block.tsx b/cli/src/components/blocks/single-block.tsx index e646e15ed1..c00d5d81d3 100644 --- a/cli/src/components/blocks/single-block.tsx +++ b/cli/src/components/blocks/single-block.tsx @@ -4,7 +4,7 @@ import React, { memo, type ReactNode } from 'react' import { AgentBranchWrapper } from './agent-branch-wrapper' import { AgentListBranch } from './agent-list-branch' import { AskUserBranch } from './ask-user-branch' -import { trimTrailingNewlines, isReasoningTextBlock } from './block-helpers' +import { trimNewlines, isReasoningTextBlock } from './block-helpers' import { ContentWithMarkdown } from './content-with-markdown' import { ImageBlock } from './image-block' import { UserBlockTextWithInlineCopy } from './user-content-copy' @@ -68,7 +68,7 @@ export const SingleBlock = memo( const textBlock = block as TextContentBlock const isStreamingText = isLoading || !isComplete const filteredContent = isStreamingText - ? trimTrailingNewlines(textBlock.content) + ? trimNewlines(textBlock.content) : textBlock.content.trim() const renderKey = `${messageId}-text-${idx}` const prevBlock = idx > 0 && blocks ? blocks[idx - 1] : null diff --git a/cli/src/components/blocks/user-content-copy.tsx b/cli/src/components/blocks/user-content-copy.tsx index e23bc65a38..256b8177f9 100644 --- a/cli/src/components/blocks/user-content-copy.tsx +++ b/cli/src/components/blocks/user-content-copy.tsx @@ -2,7 +2,7 @@ import { TextAttributes } from '@opentui/core' import React, { memo } from 'react' import { CopyButton } from '../copy-button' -import { trimTrailingNewlines } from './block-helpers' +import { trimNewlines } from './block-helpers' import { ContentWithMarkdown } from './content-with-markdown' import type { MarkdownPalette } from '../../utils/markdown-renderer' @@ -33,7 +33,7 @@ export const UserContentWithCopyButton = memo( }: UserContentWithCopyButtonProps) => { const isStreamingMessage = isLoading || !isComplete const normalizedContent = isStreamingMessage - ? trimTrailingNewlines(content) + ? trimNewlines(content) : content.trim() const hasContent = normalizedContent.length > 0 From c949d77be41e53591f85ffa093c1d94300fe0a04 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 9 Mar 2026 17:06:35 -0700 Subject: [PATCH 02/15] Use normal tools instead of complex agents --- agents/__tests__/context-pruner.test.ts | 20 -------------------- agents/base2/base2.ts | 9 +++------ agents/context-pruner.ts | 3 --- 3 files changed, 3 insertions(+), 29 deletions(-) diff --git a/agents/__tests__/context-pruner.test.ts b/agents/__tests__/context-pruner.test.ts index dd725a578b..45c61b4b9f 100644 --- a/agents/__tests__/context-pruner.test.ts +++ b/agents/__tests__/context-pruner.test.ts @@ -1087,26 +1087,6 @@ describe('context-pruner spawn_agents with prompt and params', () => { expect(content).toContain('params: {"command":"npm test"}') }) - test('includes both prompt and params for spawn_agent_inline', () => { - const messages = [ - createMessage('user', 'Search code'), - createToolCallMessage('call-1', 'spawn_agent_inline', { - agent_type: 'code-searcher', - prompt: 'Find usages of deprecated API', - params: { searchQueries: [{ pattern: 'oldFunction' }] }, - }), - createToolResultMessage('call-1', 'spawn_agent_inline', { output: {} }), - ] - - const results = runHandleSteps(messages) - const content = results[0].input.messages[0].content[0].text - - expect(content).toContain('Spawned agent: code-searcher') - expect(content).toContain('prompt: "Find usages of deprecated API"') - expect(content).toContain('params:') - expect(content).toContain('oldFunction') - }) - test('truncates very long prompts (over 1000 chars)', () => { const longPrompt = 'X'.repeat(1500) const messages = [ diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 31ffa89439..99062f178e 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -68,16 +68,13 @@ export function createBase2( !noAskUser && 'ask_user', 'skill', 'set_output', - isFree && 'code_search', - isFree && 'list_directory', - isFree && 'glob', + 'code_search', + 'list_directory', + 'glob', ), spawnableAgents: buildArray( !isMax && 'file-picker', isMax && 'file-picker-max', - !isFree && 'code-searcher', - !isFree && 'directory-lister', - !isFree && 'glob-matcher', 'researcher-web', 'researcher-docs', isFree ? 'commander-lite' : 'commander', diff --git a/agents/context-pruner.ts b/agents/context-pruner.ts index 0f31217402..dbb3c3cc57 100644 --- a/agents/context-pruner.ts +++ b/agents/context-pruner.ts @@ -297,9 +297,6 @@ const definition: AgentDefinition = { /** Agent IDs whose output should be excluded from spawn_agents results */ const SPAWN_AGENTS_OUTPUT_BLACKLIST = [ 'file-picker', - 'code-searcher', - 'directory-lister', - 'glob-matcher', 'researcher-web', 'researcher-docs', 'commander', From 35d71860ef29ec4f67db17a13e8502ad53cf95e8 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 9 Mar 2026 17:11:48 -0700 Subject: [PATCH 03/15] Update cache debug script --- scripts/compare-cache-debug.ts | 219 +++++++++++++++++++++++++-------- 1 file changed, 166 insertions(+), 53 deletions(-) diff --git a/scripts/compare-cache-debug.ts b/scripts/compare-cache-debug.ts index a0d1f72c82..db9b4b4ac0 100644 --- a/scripts/compare-cache-debug.ts +++ b/scripts/compare-cache-debug.ts @@ -4,10 +4,14 @@ * Compare sequential cache debug snapshots to find what's causing prompt cache misses. * * Usage: - * bun scripts/compare-cache-debug.ts [directory] [--agent ] + * bun scripts/compare-cache-debug.ts [directory] [--agent ] [--run ] [--cross-run] * * Options: - * --agent Only compare snapshots from this agent type (e.g. base2) + * --agent Only compare snapshots from this agent type (e.g. base2) + * --run Only compare snapshots from this specific run + * --cross-run Compare all snapshots sequentially (old behavior, across runs) + * + * Default: groups snapshots by runId and compares consecutive steps within each run. * * Default directory: debug/cache-debug/ * @@ -134,6 +138,20 @@ function printSectionHeader(title: string) { console.log(`${'─'.repeat(80)}`) } +function stripCacheControlFromMessage(msg: unknown): unknown { + if (!msg || typeof msg !== 'object') return msg + const obj = JSON.parse(JSON.stringify(msg)) + delete obj.cache_control + if (Array.isArray(obj.content)) { + for (const part of obj.content) { + if (part && typeof part === 'object') { + delete part.cache_control + } + } + } + return obj +} + function compareProviderRequests( prev: Snapshot['providerRequest'], curr: Snapshot['providerRequest'], @@ -199,13 +217,27 @@ function compareProviderRequests( console.log(` ✅ messages: identical (${prevMsgs.length} messages)`) } else { console.log(` ❌ messages: differ (${prevMsgs.length} → ${currMsgs.length})`) + + // Compare with cache_control stripped to check structural stability const minLen = Math.min(prevMsgs.length, currMsgs.length) + let firstRawDiff = -1 + let firstStructDiff = -1 for (let i = 0; i < minLen; i++) { - if (JSON.stringify(prevMsgs[i]) !== JSON.stringify(currMsgs[i])) { - console.log(` First diff at message index ${i}`) - break + if (firstRawDiff < 0 && JSON.stringify(prevMsgs[i]) !== JSON.stringify(currMsgs[i])) { + firstRawDiff = i + } + if (firstStructDiff < 0 && JSON.stringify(stripCacheControlFromMessage(prevMsgs[i])) !== JSON.stringify(stripCacheControlFromMessage(currMsgs[i]))) { + firstStructDiff = i } } + if (firstRawDiff >= 0) { + console.log(` First raw diff at message index ${firstRawDiff}`) + } + if (firstStructDiff >= 0) { + console.log(` First structural diff (ignoring cache_control) at message index ${firstStructDiff}`) + } else if (prevMsgs.length === currMsgs.length) { + console.log(` ✅ Structurally identical (only cache_control placement differs)`) + } if (prevMsgs.length !== currMsgs.length) { console.log(` Message count: ${prevMsgs.length} → ${currMsgs.length}`) } @@ -218,7 +250,7 @@ function compareProviderRequests( function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile: string) { printSectionHeader( - `Comparing snapshot ${prev.index} → ${curr.index} (${prev.agentType})`, + `Comparing step ${prev.index} → ${curr.index} (${prev.agentType})`, ) console.log(` File A: ${prevFile}`) console.log(` File B: ${currFile}`) @@ -229,8 +261,8 @@ function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile: if (prev.systemHash || curr.systemHash) { console.log(` Hashes: system=${prev.systemHash ?? '?'}→${curr.systemHash ?? '?'} tools=${prev.toolsHash ?? '?'}→${curr.toolsHash ?? '?'}`) } - if (prev.runId || curr.runId) { - console.log(` RunId: ${prev.runId ?? '?'} → ${curr.runId ?? '?'}`) + if (prev.runId !== curr.runId) { + console.log(` ⚠️ Different runs: ${prev.runId ?? '?'} → ${curr.runId ?? '?'}`) } const prevSystem = prev.preConversion.systemPrompt @@ -323,11 +355,6 @@ function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile: console.log('\n 🎯 Cache Verdict:') const systemIdentical = prevSystem === currSystem const toolsIdentical = prevToolJson === currToolJson - const providerNormIdentical = - prev.providerRequest && curr.providerRequest - ? JSON.stringify(prev.providerRequest.normalized) === - JSON.stringify(curr.providerRequest.normalized) - : undefined if (systemIdentical && toolsIdentical) { console.log( @@ -340,40 +367,54 @@ function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile: console.log(` ❌ PRE-CONVERSION CACHE MISS expected — ${causes.join(' and ')}`) } - if (providerNormIdentical === true) { - console.log( - ' ✅ Post-conversion (provider) request bodies are IDENTICAL', - ) - } else if (providerNormIdentical === false) { - console.log( - ' ❌ Post-conversion (provider) request bodies DIFFER — conversion layer may be introducing instability', - ) - if (systemIdentical && toolsIdentical) { - console.log( - ' ⚠️ Pre-conversion was identical but post-conversion differs — bug is in the conversion layer!', - ) + // Check post-conversion structural stability (ignoring cache_control positions) + if (prev.providerRequest?.normalized && curr.providerRequest?.normalized) { + const prevObj = prev.providerRequest.normalized as Record + const currObj = curr.providerRequest.normalized as Record + if (Array.isArray(prevObj.messages) && Array.isArray(currObj.messages)) { + const prevMsgs = prevObj.messages as unknown[] + const currMsgs = currObj.messages as unknown[] + const minLen = Math.min(prevMsgs.length, currMsgs.length) + let sharedStructural = 0 + for (let i = 0; i < minLen; i++) { + if (JSON.stringify(stripCacheControlFromMessage(prevMsgs[i])) === JSON.stringify(stripCacheControlFromMessage(currMsgs[i]))) { + sharedStructural++ + } else { + break + } + } + console.log(` 📊 Post-conversion shared prefix: ${sharedStructural}/${minLen} messages (ignoring cache_control)`) + if (sharedStructural < minLen && systemIdentical && toolsIdentical) { + console.log(` ⚠️ Structural content differs in shared prefix — possible conversion issue`) + } } } } -function parseArgs(): { dir: string; agentFilter?: string } { +function parseArgs(): { dir: string; agentFilter?: string; runFilter?: string; crossRun: boolean } { const args = process.argv.slice(2) let dir = join(process.cwd(), 'debug', 'cache-debug') let agentFilter: string | undefined + let runFilter: string | undefined + let crossRun = false for (let i = 0; i < args.length; i++) { if (args[i] === '--agent' && i + 1 < args.length) { agentFilter = args[++i] + } else if (args[i] === '--run' && i + 1 < args.length) { + runFilter = args[++i] + } else if (args[i] === '--cross-run') { + crossRun = true } else if (!args[i].startsWith('--')) { dir = args[i] } } - return { dir, agentFilter } + return { dir, agentFilter, runFilter, crossRun } } function main() { - const { dir, agentFilter } = parseArgs() + const { dir, agentFilter, runFilter, crossRun } = parseArgs() let files: string[] try { @@ -408,46 +449,118 @@ function main() { allSnapshots = allSnapshots.filter( (s) => s.snapshot.agentType === agentFilter, ) - console.log( - `Filtered to ${allSnapshots.length} snapshot(s) for agent type: ${agentFilter}`, + } + + if (runFilter) { + allSnapshots = allSnapshots.filter( + (s) => s.snapshot.runId === runFilter || s.snapshot.runId?.startsWith(runFilter), ) - } else { - console.log(`Found ${allSnapshots.length} snapshot(s) in ${dir}`) - const agentTypes = [...new Set(allSnapshots.map((s) => s.snapshot.agentType))] - if (agentTypes.length > 1) { - console.log( - `\n⚠️ Multiple agent types found: ${agentTypes.join(', ')}`, - ) - console.log( - ' Use --agent to filter (e.g. --agent base2)', - ) - } + } + + console.log(`Found ${allSnapshots.length} snapshot(s) in ${dir}`) + if (agentFilter) { + console.log(` Filtered to agent type: ${agentFilter}`) + } + if (runFilter) { + console.log(` Filtered to run: ${runFilter}`) } const withProviderRequest = allSnapshots.filter((s) => s.snapshot.providerRequest !== undefined).length console.log(` Provider request data: ${withProviderRequest}/${allSnapshots.length} snapshots`) - console.log( - '\nFiles:', - allSnapshots.map((s) => ` ${s.filename}`).join('\n'), - ) - if (allSnapshots.length < 2) { console.error('\nNeed at least 2 snapshots to compare. Send another prompt.') process.exit(1) } - for (let i = 1; i < allSnapshots.length; i++) { - comparePair( - allSnapshots[i - 1].snapshot, - allSnapshots[i].snapshot, - allSnapshots[i - 1].filename, - allSnapshots[i].filename, + if (crossRun) { + // Old behavior: compare all snapshots sequentially + console.log('\nMode: cross-run (comparing all snapshots sequentially)') + console.log( + '\nFiles:', + allSnapshots.map((s) => ` ${s.filename}`).join('\n'), ) + + let totalPairs = 0 + for (let i = 1; i < allSnapshots.length; i++) { + comparePair( + allSnapshots[i - 1].snapshot, + allSnapshots[i].snapshot, + allSnapshots[i - 1].filename, + allSnapshots[i].filename, + ) + totalPairs++ + } + + console.log(`\n${'═'.repeat(80)}`) + console.log(` Summary: compared ${totalPairs} consecutive pair(s) across all runs`) + console.log(`${'═'.repeat(80)}\n`) + return + } + + // Default: group by runId and compare within each run + const byRun = new Map>() + const noRunId: Array<{ snapshot: Snapshot; filename: string }> = [] + + for (const s of allSnapshots) { + const runId = s.snapshot.runId + if (!runId) { + noRunId.push(s) + continue + } + if (!byRun.has(runId)) { + byRun.set(runId, []) + } + byRun.get(runId)!.push(s) + } + + // Filter to runs with at least 2 steps + const multiStepRuns = [...byRun.entries()].filter(([, snaps]) => snaps.length >= 2) + const singleStepRuns = [...byRun.entries()].filter(([, snaps]) => snaps.length < 2) + + console.log(`\n Runs: ${byRun.size} total, ${multiStepRuns.length} with multiple steps`) + if (singleStepRuns.length > 0) { + console.log(` Skipping ${singleStepRuns.length} single-step run(s)`) + } + if (noRunId.length > 0) { + console.log(` Skipping ${noRunId.length} snapshot(s) without runId`) + } + + let totalPairs = 0 + + for (const [runId, snaps] of multiStepRuns) { + // Sort by index (step number), then by timestamp as tiebreaker + snaps.sort((a, b) => { + if (a.snapshot.index !== b.snapshot.index) { + return a.snapshot.index - b.snapshot.index + } + return a.snapshot.timestamp.localeCompare(b.snapshot.timestamp) + }) + + console.log(`\n${'═'.repeat(80)}`) + console.log(` Run: ${runId} (${snaps.length} steps)`) + console.log(` Agent: ${snaps[0].snapshot.agentType} Model: ${snaps[0].snapshot.model ?? 'unknown'}`) + console.log(`${'═'.repeat(80)}`) + + // Print step overview + for (const s of snaps) { + console.log(` Step ${s.snapshot.index}: ${s.snapshot.preConversion.messages.length} msgs (${s.filename})`) + } + + // Compare consecutive steps + for (let i = 1; i < snaps.length; i++) { + comparePair( + snaps[i - 1].snapshot, + snaps[i].snapshot, + snaps[i - 1].filename, + snaps[i].filename, + ) + totalPairs++ + } } console.log(`\n${'═'.repeat(80)}`) - console.log(` Summary: compared ${allSnapshots.length - 1} consecutive pair(s)`) + console.log(` Summary: compared ${totalPairs} consecutive step pair(s) across ${multiStepRuns.length} run(s)`) console.log(`${'═'.repeat(80)}\n`) } From 8c81553ef50e9b333fbffaf3719ed6aa98f789dd Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 9 Mar 2026 17:58:39 -0700 Subject: [PATCH 04/15] Fix inconsistent spacing! --- .../components/blocks/agent-block-grid.tsx | 1 - .../components/blocks/agent-branch-item.tsx | 6 +-- .../blocks/agent-branch-wrapper.tsx | 13 ++---- cli/src/components/blocks/image-block.tsx | 4 +- cli/src/components/blocks/single-block.tsx | 15 +++---- .../components/blocks/tool-block-group.tsx | 42 +------------------ cli/src/components/message-block.tsx | 3 +- cli/src/components/thinking.tsx | 2 - 8 files changed, 15 insertions(+), 71 deletions(-) diff --git a/cli/src/components/blocks/agent-block-grid.tsx b/cli/src/components/blocks/agent-block-grid.tsx index b303937fcb..a238510f98 100644 --- a/cli/src/components/blocks/agent-block-grid.tsx +++ b/cli/src/components/blocks/agent-block-grid.tsx @@ -41,7 +41,6 @@ export const AgentBlockGrid = memo( availableWidth={availableWidth} getItemKey={getItemKey} renderItem={renderItem} - marginTop={1} /> ) }, diff --git a/cli/src/components/blocks/agent-branch-item.tsx b/cli/src/components/blocks/agent-branch-item.tsx index 44d082c4ee..7661bd1be9 100644 --- a/cli/src/components/blocks/agent-branch-item.tsx +++ b/cli/src/components/blocks/agent-branch-item.tsx @@ -125,7 +125,7 @@ export const AgentBranchItem = memo((props: AgentBranchItemProps) => { if (React.isValidElement(value)) { if (value.key === null || value.key === undefined) { return ( - + {value} ) @@ -135,7 +135,7 @@ export const AgentBranchItem = memo((props: AgentBranchItemProps) => { if (Array.isArray(value)) { return ( - + {value.map((child, idx) => ( { } return ( - + {value} ) diff --git a/cli/src/components/blocks/agent-branch-wrapper.tsx b/cli/src/components/blocks/agent-branch-wrapper.tsx index e788ba5464..46bae0bf43 100644 --- a/cli/src/components/blocks/agent-branch-wrapper.tsx +++ b/cli/src/components/blocks/agent-branch-wrapper.tsx @@ -18,7 +18,6 @@ import { useTheme } from '../../hooks/use-theme' import { useChatStore } from '../../state/chat-store' import { isTextBlock } from '../../types/chat' import { getAgentStatusInfo } from '../../utils/agent-helpers' -import { extractHtmlBlockMargins } from '../../utils/block-margins' import { processBlocks, type BlockProcessorHandlers, @@ -250,9 +249,10 @@ const AgentBody = memo( const filteredNestedContent = isNestedStreamingText ? trimNewlines(textBlock.content) : textBlock.content.trim() + if (!filteredNestedContent) { + return null + } const markdownOptionsForLevel = p.getAgentMarkdownOptions(0) - const marginTop = textBlock.marginTop ?? 0 - const marginBottom = textBlock.marginBottom ?? 0 const explicitColor = textBlock.color const nestedTextColor = explicitColor ?? p.theme.foreground @@ -262,8 +262,6 @@ const AgentBody = memo( style={{ wrapMode: 'word', fg: nestedTextColor, - marginTop, - marginBottom, }} > {htmlBlock.render({ @@ -390,7 +384,6 @@ export const AgentBranchWrapper = memo( flexDirection: 'column', gap: 0, width: '100%', - marginTop: 1, }} > diff --git a/cli/src/components/blocks/image-block.tsx b/cli/src/components/blocks/image-block.tsx index 761295709f..6aada062ed 100644 --- a/cli/src/components/blocks/image-block.tsx +++ b/cli/src/components/blocks/image-block.tsx @@ -62,7 +62,7 @@ export const ImageBlock = memo(({ block, availableWidth }: ImageBlockProps) => { if (inlineSequence) { // Render inline image using terminal escape sequence return ( - + {/* Image caption/metadata */} 📷 @@ -84,8 +84,6 @@ export const ImageBlock = memo(({ block, availableWidth }: ImageBlockProps) => { style={{ flexDirection: 'column', gap: 0, - marginTop: 1, - marginBottom: 1, paddingLeft: 1, borderStyle: 'single', borderColor: theme.border, diff --git a/cli/src/components/blocks/single-block.tsx b/cli/src/components/blocks/single-block.tsx index c00d5d81d3..021c7c3212 100644 --- a/cli/src/components/blocks/single-block.tsx +++ b/cli/src/components/blocks/single-block.tsx @@ -9,7 +9,6 @@ import { ContentWithMarkdown } from './content-with-markdown' import { ImageBlock } from './image-block' import { UserBlockTextWithInlineCopy } from './user-content-copy' import { useTheme } from '../../hooks/use-theme' -import { extractTextBlockMargins, extractHtmlBlockMargins } from '../../utils/block-margins' import { PlanBox } from '../renderers/plan-box' import type { @@ -70,9 +69,10 @@ export const SingleBlock = memo( const filteredContent = isStreamingText ? trimNewlines(textBlock.content) : textBlock.content.trim() + if (!filteredContent) { + return null + } const renderKey = `${messageId}-text-${idx}` - const prevBlock = idx > 0 && blocks ? blocks[idx - 1] : null - const { marginTop, marginBottom } = extractTextBlockMargins(textBlock, prevBlock) const explicitColor = textBlock.color const blockTextColor = explicitColor ?? textColor @@ -86,8 +86,8 @@ export const SingleBlock = memo( textColor={blockTextColor} codeBlockWidth={codeBlockWidth} palette={markdownPalette} - marginTop={marginTop} - marginBottom={marginBottom} + marginTop={0} + marginBottom={0} /> ) } @@ -98,8 +98,6 @@ export const SingleBlock = memo( style={{ wrapMode: 'word', fg: blockTextColor, - marginTop, - marginBottom, }} attributes={isUser ? TextAttributes.ITALIC : undefined} > @@ -129,15 +127,12 @@ export const SingleBlock = memo( } case 'html': { - const { marginTop, marginBottom } = extractHtmlBlockMargins(block) return ( diff --git a/cli/src/components/blocks/tool-block-group.tsx b/cli/src/components/blocks/tool-block-group.tsx index ec215d6eb1..1da064412d 100644 --- a/cli/src/components/blocks/tool-block-group.tsx +++ b/cli/src/components/blocks/tool-block-group.tsx @@ -9,46 +9,20 @@ interface ToolBlockGroupProps { toolBlocks: Extract[] keyPrefix: string startIndex: number + /** @deprecated No longer used for margin calculation */ nextIndex: number + /** @deprecated No longer used for margin calculation */ siblingBlocks: ContentBlock[] availableWidth: number onToggleCollapsed: (id: string) => void markdownPalette: MarkdownPalette } -const isRenderableTimelineBlock = ( - block: ContentBlock | null | undefined, -): boolean => { - if (!block) { - return false - } - - if (block.type === 'tool') { - return block.toolName !== 'end_turn' - } - - switch (block.type) { - case 'text': - case 'html': - case 'agent': - case 'agent-list': - case 'plan': - case 'mode-divider': - case 'ask-user': - case 'image': - return true - default: - return false - } -} - export const ToolBlockGroup = memo( ({ toolBlocks, keyPrefix, startIndex, - nextIndex, - siblingBlocks, availableWidth, onToggleCollapsed, markdownPalette, @@ -68,24 +42,12 @@ export const ToolBlockGroup = memo( if (groupNodes.length === 0) return null - const hasRenderableBefore = - startIndex > 0 && isRenderableTimelineBlock(siblingBlocks[startIndex - 1]) - let hasRenderableAfter = false - for (let i = nextIndex; i < siblingBlocks.length; i++) { - if (isRenderableTimelineBlock(siblingBlocks[i])) { - hasRenderableAfter = true - break - } - } - return ( {groupNodes} diff --git a/cli/src/components/message-block.tsx b/cli/src/components/message-block.tsx index 7907875868..90fbc89533 100644 --- a/cli/src/components/message-block.tsx +++ b/cli/src/components/message-block.tsx @@ -264,9 +264,8 @@ export const MessageBlock = memo(({ From d343bc1c7b1fdf01e30c93a7c211c13d9a7e7b54 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 9 Mar 2026 18:03:16 -0700 Subject: [PATCH 05/15] Further cache debugging code to track usage --- common/src/types/contracts/llm.ts | 10 +++++ .../agent-runtime/src/prompt-agent-stream.ts | 5 ++- packages/agent-runtime/src/run-agent-step.ts | 16 +++++++- .../agent-runtime/src/util/cache-debug.ts | 38 +++++++++++++++++++ scripts/compare-cache-debug.ts | 13 +++++++ sdk/src/impl/llm.ts | 38 +++++++++++++++++++ 6 files changed, 118 insertions(+), 2 deletions(-) diff --git a/common/src/types/contracts/llm.ts b/common/src/types/contracts/llm.ts index c38695fe1f..44e8f4d4e3 100644 --- a/common/src/types/contracts/llm.ts +++ b/common/src/types/contracts/llm.ts @@ -25,6 +25,13 @@ export type StreamChunk = > | { type: 'error'; message: string } +export type CacheDebugUsageData = { + inputTokens: number + outputTokens: number + cachedInputTokens: number + totalTokens: number +} + export type PromptAiSdkStreamFn = ( params: { apiKey: string @@ -45,6 +52,7 @@ export type PromptAiSdkStreamFn = ( rawBody: unknown normalizedBody?: unknown }) => void + onCacheDebugUsageReceived?: (usage: CacheDebugUsageData) => void includeCacheControl?: boolean cacheDebugCorrelation?: string agentProviderOptions?: OpenRouterProviderRoutingOptions @@ -79,6 +87,7 @@ export type PromptAiSdkFn = ( rawBody: unknown normalizedBody?: unknown }) => void + onCacheDebugUsageReceived?: (usage: CacheDebugUsageData) => void includeCacheControl?: boolean cacheDebugCorrelation?: string agentProviderOptions?: OpenRouterProviderRoutingOptions @@ -114,6 +123,7 @@ export type PromptAiSdkStructuredInput = { rawBody: unknown normalizedBody?: unknown }) => void + onCacheDebugUsageReceived?: (usage: CacheDebugUsageData) => void includeCacheControl?: boolean cacheDebugCorrelation?: string agentProviderOptions?: OpenRouterProviderRoutingOptions diff --git a/packages/agent-runtime/src/prompt-agent-stream.ts b/packages/agent-runtime/src/prompt-agent-stream.ts index eaa8e70688..13d0ba2b11 100644 --- a/packages/agent-runtime/src/prompt-agent-stream.ts +++ b/packages/agent-runtime/src/prompt-agent-stream.ts @@ -3,7 +3,7 @@ import { globalStopSequence } from './constants' import type { AgentTemplate } from './templates/types' import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics' import type { SendActionFn } from '@codebuff/common/types/contracts/client' -import type { PromptAiSdkStreamFn } from '@codebuff/common/types/contracts/llm' +import type { CacheDebugUsageData, PromptAiSdkStreamFn } from '@codebuff/common/types/contracts/llm' import type { Logger } from '@codebuff/common/types/contracts/logger' import type { ParamsOf } from '@codebuff/common/types/function-params' import type { Message } from '@codebuff/common/types/messages/codebuff-message' @@ -32,6 +32,7 @@ export const getAgentStreamFromTemplate = (params: { rawBody: unknown normalizedBody?: unknown }) => void + onCacheDebugUsageReceived?: (usage: CacheDebugUsageData) => void onCostCalculated?: (credits: number) => Promise promptAiSdkStream: PromptAiSdkStreamFn @@ -55,6 +56,7 @@ export const getAgentStreamFromTemplate = (params: { userInputId, cacheDebugCorrelation, onCacheDebugProviderRequestBuilt, + onCacheDebugUsageReceived, sendAction, onCostCalculated, @@ -90,6 +92,7 @@ export const getAgentStreamFromTemplate = (params: { userInputId, cacheDebugCorrelation, onCacheDebugProviderRequestBuilt, + onCacheDebugUsageReceived, onCostCalculated, sendAction, diff --git a/packages/agent-runtime/src/run-agent-step.ts b/packages/agent-runtime/src/run-agent-step.ts index 8e9eaf946d..b323d5f0f5 100644 --- a/packages/agent-runtime/src/run-agent-step.ts +++ b/packages/agent-runtime/src/run-agent-step.ts @@ -23,6 +23,7 @@ import { getAgentOutput } from './util/agent-output' import { createCacheDebugSnapshot, enrichCacheDebugSnapshotWithProviderRequest, + enrichCacheDebugSnapshotWithUsage, } from './util/cache-debug' import { withSystemInstructionTags, @@ -39,7 +40,7 @@ import type { FinishAgentRunFn, StartAgentRunFn, } from '@codebuff/common/types/contracts/database' -import type { PromptAiSdkFn } from '@codebuff/common/types/contracts/llm' +import type { CacheDebugUsageData, PromptAiSdkFn } from '@codebuff/common/types/contracts/llm' import type { Logger } from '@codebuff/common/types/contracts/logger' import type { ParamsExcluding, @@ -312,6 +313,17 @@ export const runAgentStep = async ( } : undefined + const onCacheDebugUsageReceived = + cacheDebugCorrelation + ? (usage: CacheDebugUsageData) => { + enrichCacheDebugSnapshotWithUsage({ + correlation: cacheDebugCorrelation, + usage, + logger, + }) + } + : undefined + logger.debug( { iteration: iterationNum, @@ -343,6 +355,7 @@ export const runAgentStep = async ( ? serializeCacheDebugCorrelation(cacheDebugCorrelation) : undefined, onCacheDebugProviderRequestBuilt, + onCacheDebugUsageReceived, }) if (result.aborted) { @@ -399,6 +412,7 @@ export const runAgentStep = async ( includeCacheControl: supportsCacheControl(agentTemplate.model), messages: [systemMessage(system), ...agentState.messageHistory], onCacheDebugProviderRequestBuilt, + onCacheDebugUsageReceived, template: agentTemplate, onCostCalculated, }) diff --git a/packages/agent-runtime/src/util/cache-debug.ts b/packages/agent-runtime/src/util/cache-debug.ts index 826349a789..686dd67d74 100644 --- a/packages/agent-runtime/src/util/cache-debug.ts +++ b/packages/agent-runtime/src/util/cache-debug.ts @@ -5,6 +5,7 @@ import { dirname, join } from 'path' import { type CacheDebugCorrelation, } from '@codebuff/common/util/cache-debug' +import type { CacheDebugUsageData } from '@codebuff/common/types/contracts/llm' import type { Logger } from '@codebuff/common/types/contracts/logger' import type { Message } from '@codebuff/common/types/messages/codebuff-message' import type { ProviderMetadata } from '@codebuff/common/types/messages/provider-metadata' @@ -50,6 +51,7 @@ export type CacheDebugSnapshot = { toolsHash?: string preConversion: CacheDebugPreConversionSnapshot providerRequest?: CacheDebugProviderRequestSnapshot + usage?: CacheDebugUsageData } function getCacheDebugDir(projectRoot: string) { @@ -241,6 +243,42 @@ export function createCacheDebugSnapshot(params: { return { snapshotId, filename, projectRoot } } +export function enrichCacheDebugSnapshotWithUsage(params: { + correlation: CacheDebugCorrelation + usage: CacheDebugUsageData + logger: Logger +}) { + const { correlation, usage, logger } = params + try { + const existing = loadSnapshot({ + projectRoot: correlation.projectRoot, + filename: correlation.filename, + }) + if (!existing) { + logger.warn( + `[Cache Debug] Could not find snapshot ${correlation.filename} to enrich with usage`, + ) + return + } + + if (existing.id !== correlation.snapshotId) { + logger.warn( + `[Cache Debug] Snapshot ID mismatch while enriching ${correlation.filename} with usage`, + ) + return + } + + const updated: CacheDebugSnapshot = { + ...existing, + usage, + } + + writeSnapshot({ snapshot: updated, logger }) + } catch (err) { + logger.warn({ error: err }, '[Cache Debug] Failed to enrich snapshot with usage') + } +} + export function enrichCacheDebugSnapshotWithProviderRequest(params: { correlation: CacheDebugCorrelation provider: string diff --git a/scripts/compare-cache-debug.ts b/scripts/compare-cache-debug.ts index db9b4b4ac0..8e8f7f897d 100644 --- a/scripts/compare-cache-debug.ts +++ b/scripts/compare-cache-debug.ts @@ -54,6 +54,12 @@ interface Snapshot { rawBody: unknown normalized: unknown } + usage?: { + inputTokens: number + outputTokens: number + cachedInputTokens: number + totalTokens: number + } } function findFirstDifference( @@ -261,6 +267,13 @@ function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile: if (prev.systemHash || curr.systemHash) { console.log(` Hashes: system=${prev.systemHash ?? '?'}→${curr.systemHash ?? '?'} tools=${prev.toolsHash ?? '?'}→${curr.toolsHash ?? '?'}`) } + for (const snap of [{ label: 'A', data: prev }, { label: 'B', data: curr }]) { + if (snap.data.usage) { + const u = snap.data.usage + const hitRate = u.inputTokens > 0 ? ((u.cachedInputTokens / u.inputTokens) * 100).toFixed(1) : '0.0' + console.log(` Usage ${snap.label}: ${u.inputTokens} in, ${u.outputTokens} out, ${u.cachedInputTokens} cached (${hitRate}% cache hit)`) + } + } if (prev.runId !== curr.runId) { console.log(` ⚠️ Different runs: ${prev.runId ?? '?'} → ${curr.runId ?? '?'}`) } diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts index 2e6e7624ed..37ed3a13b8 100644 --- a/sdk/src/impl/llm.ts +++ b/sdk/src/impl/llm.ts @@ -216,6 +216,30 @@ function emitCacheDebugProviderRequest(params: { }) } +function emitCacheDebugUsage(params: { + callback?: (usage: { + inputTokens: number + outputTokens: number + cachedInputTokens: number + totalTokens: number + }) => void + usage: { + inputTokens?: number + outputTokens?: number + totalTokens?: number + cachedInputTokens?: number + } +}) { + if (!params.callback) return + + params.callback({ + inputTokens: params.usage.inputTokens ?? 0, + outputTokens: params.usage.outputTokens ?? 0, + cachedInputTokens: params.usage.cachedInputTokens ?? 0, + totalTokens: params.usage.totalTokens ?? 0, + }) +} + export async function* promptAiSdkStream( params: ParamsOf & { skipClaudeOAuth?: boolean @@ -587,6 +611,12 @@ export async function* promptAiSdkStream( rawBody: requestMetadata.body, }) + const usageResult = await response.usage + emitCacheDebugUsage({ + callback: params.onCacheDebugUsageReceived, + usage: usageResult, + }) + // Skip cost tracking for Claude OAuth (user is on their own subscription) if (!isClaudeOAuth) { const providerMetadataResult = await response.providerMetadata @@ -654,6 +684,10 @@ export async function promptAiSdk( provider: getModelProvider(aiSDKModel), rawBody: response.request?.body, }) + emitCacheDebugUsage({ + callback: params.onCacheDebugUsageReceived, + usage: response.usage, + }) const content = response.text const providerMetadata = response.providerMetadata ?? {} @@ -719,6 +753,10 @@ export async function promptAiSdkStructured( provider: getModelProvider(aiSDKModel), rawBody: response.request?.body, }) + emitCacheDebugUsage({ + callback: params.onCacheDebugUsageReceived, + usage: response.usage, + }) const content = response.object From 4f243da1ad334561187e776185e96fea881e6aa1 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 9 Mar 2026 18:41:24 -0700 Subject: [PATCH 06/15] Switch to inceptron provider for minimax --- agents/base2/base2.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 99062f178e..238fcf7152 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -30,8 +30,8 @@ export function createBase2( publisher, model: isFree ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6', providerOptions: isFree ? { - only: ['fireworks'], - order: ['fireworks'], + only: ['inceptron/fp8'], + order: ['inceptron/fp8'], allow_fallbacks: false, data_collection: 'deny', } : undefined, From 033e594c53926ced34fada2ab330d73f7241b001 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 9 Mar 2026 19:00:15 -0700 Subject: [PATCH 07/15] Fix todo rendering --- cli/src/components/tools/write-todos.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/src/components/tools/write-todos.tsx b/cli/src/components/tools/write-todos.tsx index 74b00303cf..4f1fffc487 100644 --- a/cli/src/components/tools/write-todos.tsx +++ b/cli/src/components/tools/write-todos.tsx @@ -41,7 +41,7 @@ const WriteTodosItem = ({ todos }: WriteTodosItemProps) => { {todo.completed ? ( <> - + { ) : ( <> - + {todo.task} )} From dc49e0e26582451d20048ef9186902b4b3c4cbe5 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 9 Mar 2026 19:30:23 -0700 Subject: [PATCH 08/15] Add new tmux-cli agent to codebuff base2! --- .agents/tmux-cli.ts | 634 ++++++++++++++++++++++++++++++++++++++++++ agents/base2/base2.ts | 2 + 2 files changed, 636 insertions(+) create mode 100644 .agents/tmux-cli.ts diff --git a/.agents/tmux-cli.ts b/.agents/tmux-cli.ts new file mode 100644 index 0000000000..10c0ecdeab --- /dev/null +++ b/.agents/tmux-cli.ts @@ -0,0 +1,634 @@ +import type { AgentDefinition } from './types/agent-definition' + +const outputSchema = { + type: 'object' as const, + properties: { + overallStatus: { + type: 'string' as const, + enum: ['success', 'failure', 'partial'], + description: '"success" when all tasks completed, "failure" when the primary task could not be done, "partial" when some subtasks succeeded but others failed', + }, + summary: { + type: 'string' as const, + description: 'Brief summary of the CLI interaction: what was done, key outputs observed, and the outcome', + }, + sessionName: { + type: 'string' as const, + description: 'The tmux session name used for this run (needed for cleanup if the session lingers)', + }, + results: { + type: 'array' as const, + items: { + type: 'object' as const, + properties: { + name: { type: 'string' as const, description: 'Short name of the task or interaction step' }, + passed: { type: 'boolean' as const, description: 'Whether this step succeeded' }, + details: { type: 'string' as const, description: 'What happened during this step' }, + capturedOutput: { type: 'string' as const, description: 'Relevant CLI output observed (keep concise — full output is in capture files)' }, + }, + required: ['name', 'passed'], + }, + description: 'Ordered list of interaction steps and their outcomes', + }, + scriptIssues: { + type: 'array' as const, + items: { + type: 'object' as const, + properties: { + script: { type: 'string' as const, description: 'Which helper command had the issue (e.g., "send", "capture", "wait-for")' }, + issue: { type: 'string' as const, description: 'What went wrong when using the helper script' }, + errorOutput: { type: 'string' as const, description: 'The actual error message or unexpected output' }, + suggestedFix: { type: 'string' as const, description: 'Suggested fix for the parent agent to implement' }, + }, + required: ['script', 'issue', 'suggestedFix'], + }, + description: 'Problems encountered with the helper script that the parent agent should address', + }, + captures: { + type: 'array' as const, + items: { + type: 'object' as const, + properties: { + path: { type: 'string' as const, description: 'Absolute path to the capture file in /tmp/tmux-captures-{session}/' }, + label: { type: 'string' as const, description: 'Descriptive label for what this capture shows (e.g., "after-login", "error-state", "final")' }, + timestamp: { type: 'string' as const, description: 'ISO 8601 timestamp of when the capture was taken' }, + }, + required: ['path', 'label'], + }, + description: 'Saved terminal captures the parent agent can read to verify results', + }, + lessons: { + type: 'array' as const, + items: { + type: 'string' as const, + }, + description: 'Advice for future runs: timing adjustments needed, unexpected CLI behavior, workarounds discovered, input quirks', + }, + }, + required: ['overallStatus', 'summary', 'sessionName', 'scriptIssues', 'captures'], +} + +const definition: AgentDefinition = { + id: 'tmux-cli', + displayName: 'Tmux CLI Agent', + model: 'minimax/minimax-m2.5', + // Provider options are tightly coupled to the model choice above. + // If you change the model, update these accordingly. + providerOptions: { + only: ['inceptron/fp8'], + order: ['inceptron/fp8'], + allow_fallbacks: false, + data_collection: 'deny', + }, + + spawnerPrompt: `General-purpose agent that uses tmux to interact with and test CLI applications. + +**Your responsibilities as the parent agent:** +1. If \`scriptIssues\` is not empty, check the error details and re-run the agent +2. Use \`read_files\` on the capture paths to see what the CLI displayed +3. Re-run the agent after fixing any issues +4. Check the \`lessons\` array for advice on how to improve future runs + +**Note:** Capture files are saved to \`/tmp/\`. Use \`run_terminal_command\` with \`cat\` to read them if \`read_files\` doesn't support absolute paths. + +**When spawning this agent**, provide as much advice as possible in the prompt about how to test the CLI, including lessons from any previous runs of tmux-cli (e.g., timing adjustments, commands that didn't work, expected output patterns). This helps the agent avoid repeating mistakes. + +**Orphaned session cleanup:** If the agent fails or times out, the tmux session may linger. Run \`tmux kill-session -t \` to clean up. The session name is in the agent's output.`, + + inputSchema: { + prompt: { + type: 'string', + description: 'What to do with the CLI application (e.g., "run /help and verify output", "send a prompt and capture the response")', + }, + params: { + type: 'object', + properties: { + command: { + type: 'string', + description: 'The CLI command to start in the tmux session (e.g., "python app.py", "node server.js", "my-cli --interactive")', + }, + }, + }, + }, + + outputMode: 'structured_output', + outputSchema, + includeMessageHistory: false, + + toolNames: ['run_terminal_command', 'read_files', 'set_output', 'add_message'], + + systemPrompt: `You are an expert at interacting with CLI applications via tmux. You start a CLI process in a tmux session and use a helper script to send input and capture output. + +## Session Management + +A tmux session is started for you automatically. The session name and helper script path will be announced in a setup message. Do NOT start a new session — use the one provided. + +The session runs \`bash\` and your command is sent to it automatically. This means the session stays alive even if the command exits. + +## Helper Script Reference + +The examples below use \`$HELPER\` and \`$SESSION\` as shorthand. The **actual paths** will be provided in the setup message when the session starts. Always use those real paths in your commands. + +### Sending Input + +\`\`\`bash +# Send input (presses Enter automatically) +$HELPER send "$SESSION" "your input here" + +# Send without pressing Enter +$HELPER send "$SESSION" "partial text" --no-enter + +# Send with bracketed paste mode (for TUI apps: vim, fzf, Ink-based CLIs) +$HELPER send "$SESSION" "pasted content" --paste + +# Send and wait for output to stabilize (for streaming CLIs) +$HELPER send "$SESSION" "command" --wait-idle 3 + +# Send special keys (Enter, Escape, C-c, C-u, Up, Down, Tab, etc.) +$HELPER key "$SESSION" Escape +$HELPER key "$SESSION" C-c + +# Pass arguments directly to tmux send-keys (escape hatch) +$HELPER raw "$SESSION" "some text" Enter +\`\`\` + +Input is sent as **plain text** by default (works for \`input()\`, readline, most CLIs). For TUI apps that need paste events, add \`--paste\`. + +### Capturing Output + +\`\`\`bash +# Capture visible pane (~30 lines). Default wait: 1 second. +$HELPER capture "$SESSION" + +# Capture with a descriptive label (used in the filename) +$HELPER capture "$SESSION" --label "after-login" + +# Capture with custom wait time +$HELPER capture "$SESSION" --wait 3 + +# Capture full scrollback (use for final capture) +$HELPER capture "$SESSION" --full --label "final" + +# Capture with ANSI color codes stripped (cleaner for parsing) +$HELPER capture "$SESSION" --strip-ansi --label "clean-output" + +# Instant capture (no wait) +$HELPER capture "$SESSION" --wait 0 +\`\`\` + +Captures show the **visible pane** by default. Add \`--full\` for the entire scrollback buffer. Each capture is saved to a file in \`/tmp/tmux-captures-{session}/\` and the path + content are printed. A timestamp is included in the output. + +### Waiting + +\`\`\`bash +# Wait until a pattern appears in the visible pane (regex, default timeout: 30s) +$HELPER wait-for "$SESSION" "Your guess:" +$HELPER wait-for "$SESSION" "\\$" --timeout 10 +$HELPER wait-for "$SESSION" "ready" --timeout 60 + +# Wait until output is stable for N seconds (max 120s) +$HELPER wait-idle "$SESSION" 3 +\`\`\` + +### Session Control + +\`\`\`bash +# Check if session is alive +$HELPER status "$SESSION" + +# Stop the session +$HELPER stop "$SESSION" +\`\`\` + +## File Creation + +Do NOT send file content through the tmux session. Use \`run_terminal_command\` with heredocs or scripting to create/edit files. The tmux session is for interacting with the CLI being tested. + +## Error Recovery + +If the CLI appears hung, try \`$HELPER key "$SESSION" C-c\` to interrupt. If it's still unresponsive, check session status with \`$HELPER status "$SESSION"\`. If the session is dead, report the failure. Always capture before stopping so the parent agent can diagnose issues. + +## Operating Heuristics + +- Use the provided tmux session as the single source of truth. Do not start a second session. +- **Capture discipline:** Aim for 3-8 captures per run. Capture at key milestones: startup, after important interactions, on errors, and final state. Do NOT capture after every single input. +- **Use \`--full\` on the final capture** to get complete scrollback history. Regular captures only show the visible pane (~30 lines), keeping them small and focused. +- **Use \`wait-for\` before sending input** when you need to wait for a prompt or specific output to appear. This is more reliable than guessing wait times. +- **Wait guidance:** Most CLIs need 1-2 seconds to process input. Use \`--wait-idle 2\` on send or \`--wait 2\` on capture. For streaming CLIs, use \`--wait-idle 3\` or higher. +- Use \`--label\` on captures to make filenames descriptive. +- If the CLI already shows enough evidence in the current viewport, do not keep recapturing.`, + + instructionsPrompt: `Instructions: + +## Workflow + +A tmux session has been started for you. A setup message will announce the session name, helper script path, and the initial terminal output. Your command has already been sent to the session. + +1. **Check the initial output** provided in the setup message. If you see errors like "command not found" or "No such file", report failure immediately. +2. **Interact with the CLI** using the helper commands documented in the system prompt (send, key, capture, wait-for, etc.). +3. **Capture output** at key milestones. Use \`wait-for\` to wait for expected prompts before sending input. +4. **Final capture** with full scrollback before stopping: \`$HELPER capture "$SESSION" --full --label "final"\` +5. **Stop the session**: \`$HELPER stop "$SESSION"\` + +## Output + +Report results using set_output with: +- \`overallStatus\`: "success" (all tasks completed), "failure" (primary task couldn't be done), or "partial" (some subtasks succeeded but others failed) +- \`summary\`: Brief description of what was done +- \`sessionName\`: The tmux session name (REQUIRED) +- \`results\`: Array of task outcomes +- \`scriptIssues\`: Array of any problems with the helper script +- \`captures\`: Array of capture paths with labels. Use the file paths printed by the capture command (MUST have at least one) +- \`lessons\`: Array of strings describing issues encountered and advice for future runs (e.g., "Need longer --wait for this CLI", "CLI requires pressing Enter twice", "Command X produced unexpected output") + +Always include captures so the parent agent can verify results. Always include lessons so future invocations can be improved.`, + + handleSteps: function* ({ params, logger }) { + // Self-contained tmux helper script written to /tmp at startup. + // Must be defined inside handleSteps because the function is serialized. + const helperScript = `#!/usr/bin/env bash +set -e + +usage() { + echo "Usage: $0 [args]" + echo "Commands: start, send, capture, stop, key, raw, wait-for, wait-idle, status" + exit 1 +} + +[[ $# -lt 1 ]] && usage +CMD="$1"; shift + +case "$CMD" in + start) + SESSION="$1" + [[ -z "$SESSION" ]] && { echo "Usage: start " >&2; exit 1; } + tmux new-session -d -s "$SESSION" -x 120 -y 30 bash 2>/dev/null || true + if ! tmux has-session -t "$SESSION" 2>/dev/null; then + echo "Failed to create session $SESSION" >&2; exit 1 + fi + mkdir -p "/tmp/tmux-captures-$SESSION" + echo "$SESSION" + ;; + + send) + # send [--no-enter] [--paste] [--wait-idle N] + SESSION="$1"; shift + TEXT=""; AUTO_ENTER=true; PASTE_MODE=false; WAIT_IDLE=0 + while [[ $# -gt 0 ]]; do + case $1 in + --no-enter) AUTO_ENTER=false; shift ;; + --paste) PASTE_MODE=true; shift ;; + --wait-idle) WAIT_IDLE="$2"; shift 2 ;; + *) TEXT="$1"; shift ;; + esac + done + [[ -z "$SESSION" || -z "$TEXT" ]] && { echo "Usage: send [--no-enter] [--paste] [--wait-idle N]" >&2; exit 1; } + tmux send-keys -t "$SESSION" C-u + sleep 0.05 + if [[ "$PASTE_MODE" == true ]]; then + tmux send-keys -t "$SESSION" $'\\x1b[200~'"$TEXT"$'\\x1b[201~' + else + tmux send-keys -t "$SESSION" -- "$TEXT" + fi + if [[ "$AUTO_ENTER" == true ]]; then + sleep 0.05 + tmux send-keys -t "$SESSION" Enter + sleep 0.5 + fi + if [[ "$WAIT_IDLE" -gt 0 ]]; then + LAST_OUTPUT="" + STABLE_START=$(date +%s) + MAX_END=$(( $(date +%s) + 120 )) + while true; do + CURRENT_OUTPUT=$(tmux capture-pane -t "$SESSION" -S - -p 2>/dev/null || echo "") + NOW=$(date +%s) + if [[ "$CURRENT_OUTPUT" != "$LAST_OUTPUT" ]]; then + LAST_OUTPUT="$CURRENT_OUTPUT" + STABLE_START=$NOW + fi + if (( NOW - STABLE_START >= WAIT_IDLE )); then break; fi + if (( NOW >= MAX_END )); then echo "wait-idle timed out after 120s" >&2; break; fi + sleep 0.25 + done + fi + ;; + + key) + SESSION="$1"; KEY="$2" + [[ -z "$SESSION" || -z "$KEY" ]] && { echo "Usage: key " >&2; exit 1; } + tmux send-keys -t "$SESSION" "$KEY" + ;; + + raw) + SESSION="$1"; shift + [[ -z "$SESSION" ]] && { echo "Usage: raw [tmux send-keys args...]" >&2; exit 1; } + tmux send-keys -t "$SESSION" "$@" + ;; + + capture) + # capture [--wait N] [--label LABEL] [--full] [--strip-ansi] + SESSION="$1"; shift + WAIT=1; LABEL=""; FULL=false; STRIP_ANSI=false + while [[ $# -gt 0 ]]; do + case $1 in + --wait) WAIT="$2"; shift 2 ;; + --label) LABEL="$2"; shift 2 ;; + --full) FULL=true; shift ;; + --strip-ansi) STRIP_ANSI=true; shift ;; + *) shift ;; + esac + done + [[ -z "$SESSION" ]] && { echo "Usage: capture [--wait N] [--label LABEL] [--full] [--strip-ansi]" >&2; exit 1; } + [[ "$WAIT" -gt 0 ]] && sleep "$WAIT" + CAPTURE_DIR="/tmp/tmux-captures-$SESSION" + mkdir -p "$CAPTURE_DIR" + SEQ_FILE="$CAPTURE_DIR/.seq" + if [[ -f "$SEQ_FILE" ]]; then SEQ=$(cat "$SEQ_FILE"); else SEQ=0; fi + SEQ=$((SEQ + 1)) + echo "$SEQ" > "$SEQ_FILE" + SEQ_PAD=$(printf "%03d" "$SEQ") + if [[ -n "$LABEL" ]]; then + CAPTURE_FILE="$CAPTURE_DIR/capture-\${SEQ_PAD}-\${LABEL}.txt" + else + CAPTURE_FILE="$CAPTURE_DIR/capture-\${SEQ_PAD}.txt" + fi + if [[ "$FULL" == true ]]; then + tmux capture-pane -t "$SESSION" -S - -p > "$CAPTURE_FILE" + else + tmux capture-pane -t "$SESSION" -p > "$CAPTURE_FILE" + fi + if [[ "$STRIP_ANSI" == true ]]; then + perl -pe 's/\\e\\[[\\d;]*[a-zA-Z]//g' "$CAPTURE_FILE" > "$CAPTURE_FILE.tmp" && mv "$CAPTURE_FILE.tmp" "$CAPTURE_FILE" + fi + TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + echo "[Saved: $CAPTURE_FILE] [$TIMESTAMP]" + cat "$CAPTURE_FILE" + ;; + + wait-for) + # wait-for [--timeout N] + # Polls visible pane until grep matches the pattern (default timeout: 30s) + SESSION="$1"; shift + PATTERN=""; TIMEOUT=30 + while [[ $# -gt 0 ]]; do + case $1 in + --timeout) TIMEOUT="$2"; shift 2 ;; + *) PATTERN="$1"; shift ;; + esac + done + [[ -z "$SESSION" || -z "$PATTERN" ]] && { echo "Usage: wait-for [--timeout N]" >&2; exit 1; } + MAX_END=$(( $(date +%s) + TIMEOUT )) + while true; do + if tmux capture-pane -t "$SESSION" -p 2>/dev/null | grep -q "$PATTERN"; then + echo "Found: $PATTERN" + break + fi + NOW=$(date +%s) + if (( NOW >= MAX_END )); then + echo "Timed out after \${TIMEOUT}s waiting for: $PATTERN" >&2 + exit 1 + fi + sleep 0.25 + done + ;; + + wait-idle) + # wait-idle [stable-seconds] + SESSION="$1"; STABLE_SECS="\${2:-2}" + [[ -z "$SESSION" ]] && { echo "Usage: wait-idle [seconds]" >&2; exit 1; } + LAST_OUTPUT="" + STABLE_START=$(date +%s) + MAX_END=$(( $(date +%s) + 120 )) + while true; do + CURRENT_OUTPUT=$(tmux capture-pane -t "$SESSION" -S - -p 2>/dev/null || echo "") + NOW=$(date +%s) + if [[ "$CURRENT_OUTPUT" != "$LAST_OUTPUT" ]]; then + LAST_OUTPUT="$CURRENT_OUTPUT" + STABLE_START=$NOW + fi + if (( NOW - STABLE_START >= STABLE_SECS )); then echo "Output stable for \${STABLE_SECS}s"; break; fi + if (( NOW >= MAX_END )); then echo "Timed out after 120s" >&2; break; fi + sleep 0.25 + done + ;; + + status) + SESSION="$1" + [[ -z "$SESSION" ]] && { echo "Usage: status " >&2; exit 1; } + if tmux has-session -t "$SESSION" 2>/dev/null; then + echo "alive" + else + echo "dead" + fi + ;; + + stop) + SESSION="$1" + [[ -z "$SESSION" ]] && { echo "Usage: stop " >&2; exit 1; } + tmux kill-session -t "$SESSION" 2>/dev/null || true + ;; + + *) usage ;; +esac +` + + const startCommand = (params && typeof params.command === 'string') ? params.command : '' + + if (!startCommand) { + logger.error('No command provided in params.command') + yield { + toolName: 'set_output', + input: { + overallStatus: 'failure', + summary: 'No command provided. Pass params.command with the CLI command to start.', + sessionName: '', + scriptIssues: [], + captures: [], + }, + } + return + } + + // Generate a unique session name + const sessionName = 'tui-test-' + Date.now() + '-' + Math.random().toString(36).slice(2, 6) + const helperPath = '/tmp/tmux-helper-' + sessionName + '.sh' + + logger.info('Writing helper script to ' + helperPath) + + // Write the self-contained helper script to /tmp + const { toolResult: writeResult } = yield { + toolName: 'run_terminal_command', + input: { + command: 'cat > ' + helperPath + " << 'TMUX_HELPER_EOF'\n" + helperScript + "TMUX_HELPER_EOF\nchmod +x " + helperPath, + timeout_seconds: 10, + }, + } + + const writeOutput = writeResult?.[0] + if (writeOutput && writeOutput.type === 'json') { + const value = writeOutput.value as Record + const exitCode = typeof value?.exitCode === 'number' ? value.exitCode : undefined + if (exitCode !== 0) { + const stderr = typeof value?.stderr === 'string' ? value.stderr.trim() : 'unknown error' + logger.error('Failed to write helper script: ' + stderr) + yield { + toolName: 'set_output', + input: { + overallStatus: 'failure', + summary: 'Failed to write helper script to /tmp. ' + stderr, + sessionName: '', + scriptIssues: [{ script: helperPath, issue: stderr, suggestedFix: 'Check /tmp is writable' }], + captures: [], + }, + } + return + } + } + + logger.info('Starting tmux session (bash)') + + // Start the tmux session with bash (not the user's command directly) + const { toolResult } = yield { + toolName: 'run_terminal_command', + input: { + command: helperPath + " start '" + sessionName + "'", + timeout_seconds: 30, + }, + } + + let started = false + let parseError = '' + + const result = toolResult?.[0] + if (result && result.type === 'json') { + const value = result.value as Record + const stdout = typeof value?.stdout === 'string' ? value.stdout.trim() : '' + const stderr = typeof value?.stderr === 'string' ? value.stderr.trim() : '' + const exitCode = typeof value?.exitCode === 'number' ? value.exitCode : undefined + + if (exitCode !== 0) { + parseError = stderr || 'Helper script failed with no error message' + } else if (stdout === sessionName) { + started = true + } else { + parseError = 'Unexpected output: ' + stdout + } + } else { + parseError = 'Unexpected result type from run_terminal_command' + } + + if (!started) { + const errorMsg = parseError || 'Failed to start session' + logger.error({ parseError: errorMsg }, 'Failed to start tmux session') + yield { + toolName: 'set_output', + input: { + overallStatus: 'failure', + summary: 'Failed to start tmux session. ' + errorMsg, + sessionName: '', + scriptIssues: [ + { + script: helperPath, + issue: errorMsg, + errorOutput: JSON.stringify(toolResult), + suggestedFix: 'Ensure tmux is installed and the command is valid.', + }, + ], + captures: [], + }, + } + return + } + + logger.info('Successfully started tmux session: ' + sessionName) + + // Send the user's command to the bash session + const escapedCommand = startCommand.replace(/'/g, "'\\''") + const { toolResult: sendResult } = yield { + toolName: 'run_terminal_command', + input: { + command: helperPath + " send '" + sessionName + "' '" + escapedCommand + "'", + timeout_seconds: 15, + }, + } + + const sendOutput = sendResult?.[0] + if (sendOutput && sendOutput.type === 'json') { + const value = sendOutput.value as Record + const exitCode = typeof value?.exitCode === 'number' ? value.exitCode : undefined + if (exitCode !== 0) { + const stderr = typeof value?.stderr === 'string' ? value.stderr.trim() : 'send failed' + logger.error('Failed to send command: ' + stderr) + yield { + toolName: 'run_terminal_command', + input: { command: helperPath + " stop '" + sessionName + "'", timeout_seconds: 5 }, + } + yield { + toolName: 'set_output', + input: { + overallStatus: 'failure', + summary: 'Started session but failed to send command. ' + stderr, + sessionName, + scriptIssues: [{ script: helperPath, issue: stderr, suggestedFix: 'Check that the command is valid.' }], + captures: [], + }, + } + return + } + } + + logger.info('Sent command to session: ' + startCommand) + + // Wait briefly then capture initial state so the agent starts with context + const { toolResult: initCapture } = yield { + toolName: 'run_terminal_command', + input: { + command: 'sleep 1.5 && ' + helperPath + " capture '" + sessionName + "' --wait 0 --label startup-check", + timeout_seconds: 10, + }, + } + + let initialOutput = '(no initial capture available)' + const initResult = initCapture?.[0] + if (initResult && initResult.type === 'json') { + const initValue = initResult.value as Record + if (typeof initValue?.stdout === 'string' && initValue.stdout.trim()) { + initialOutput = initValue.stdout.trim() + } + } + + const captureDir = '/tmp/tmux-captures-' + sessionName + + yield { + toolName: 'add_message', + input: { + role: 'user', + content: 'A tmux session has been started and `' + startCommand + '` has been sent to it.\n\n' + + '**Session:** `' + sessionName + '`\n' + + '**Helper:** `' + helperPath + '`\n' + + '**Captures dir:** `' + captureDir + '/`\n\n' + + '**Initial terminal output:**\n```\n' + initialOutput + '\n```\n\n' + + 'Check the initial output above — if you see errors like "command not found" or "No such file", report failure immediately.\n\n' + + 'Commands:\n' + + '- Send input: `' + helperPath + ' send "' + sessionName + '" "..."`\n' + + '- Send with paste mode: `' + helperPath + ' send "' + sessionName + '" "..." --paste`\n' + + '- Send + wait for output: `' + helperPath + ' send "' + sessionName + '" "..." --wait-idle 3`\n' + + '- Send key: `' + helperPath + ' key "' + sessionName + '" C-c`\n' + + '- Raw tmux send-keys: `' + helperPath + ' raw "' + sessionName + '" "text" Enter`\n' + + '- Wait for pattern: `' + helperPath + ' wait-for "' + sessionName + '" "pattern" --timeout 30`\n' + + '- Capture visible pane: `' + helperPath + ' capture "' + sessionName + '" --label "..."`\n' + + '- Capture full scrollback: `' + helperPath + ' capture "' + sessionName + '" --full --label "final"`\n' + + '- Capture without ANSI colors: `' + helperPath + ' capture "' + sessionName + '" --strip-ansi`\n' + + '- Check session status: `' + helperPath + ' status "' + sessionName + '"`\n' + + '- Wait for stable output: `' + helperPath + ' wait-idle "' + sessionName + '" 3`\n' + + '- Stop session: `' + helperPath + ' stop "' + sessionName + '"`\n\n' + + 'Captures are saved to `' + captureDir + '/` — use the file paths in your output so the parent agent can verify with `read_files`.', + }, + includeToolCall: false, + } + + yield 'STEP_ALL' + }, +} + +export default definition diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 238fcf7152..9fda6bf5ba 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -86,6 +86,7 @@ export function createBase2( isFree && 'code-reviewer-lite', isDefault && 'code-reviewer', isMax && 'code-reviewer-multi-prompt', + isDefault && 'tmux-cli', 'context-pruner', ), @@ -106,6 +107,7 @@ export function createBase2( } - **Be careful about terminal commands:** Be careful about instructing subagents to run terminal commands that could be destructive or have effects that are hard to undo (e.g. git push, git commit, running any scripts -- especially ones that could alter production environments (!), installing packages globally, etc). Don't run any of these effectful commands unless the user explicitly asks you to. - **Do what the user asks:** If the user asks you to do something, even running a risky terminal command, do it. +- **Don't use set_output:** The set_output tool is for spawned subagents to report results. Don't use it yourself. # Code Editing Mandates From b183bbcfdb6c6abf9ae0908adb5afbd0b4fd3e32 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 10 Mar 2026 02:48:48 +0000 Subject: [PATCH 09/15] Bump Freebuff version to 0.0.6 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index f0e21a6392..8947d21389 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.5", + "version": "0.0.6", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From b98c1cad5717f44ee275d8b785cf31029d9c7097 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 9 Mar 2026 19:38:26 -0700 Subject: [PATCH 10/15] Tweak base2 to not mention agents that were removed --- agents/base2/base2.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 9fda6bf5ba..95bce66748 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -139,7 +139,7 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u - **Spawn multiple agents in parallel:** This increases the speed of your response **and** allows you to be more comprehensive by spawning more total agents to synthesize the best response. - **Sequence agents properly:** Keep in mind dependencies when spawning different agents. Don't spawn agents in parallel that depend on each other. ${buildArray( - '- Spawn context-gathering agents (file pickers, code-searcher, directory-lister, glob-matcher, and web/docs researchers) before making edits.', + '- Spawn context-gathering agents (file pickers and web/docs researchers) before making edits. Use the code_search, list_directory, and glob tools directly for searching and exploring the codebase.', isFree && '- Spawn the editor-lite agent to implement the changes after you have gathered all the context you need.', isDefault && @@ -195,11 +195,11 @@ ${buildArray( please implement [a complex new feature] -[ You spawn 3 file-pickers, a code-searcher, and a docs researcher in parallel to find relevant files and do research online ] +[ You spawn 3 file-pickers and a docs researcher in parallel to find relevant files and do research online. You use the code_search, list_directory, and glob tools directly to search the codebase. ] [ You read a few of the relevant files using the read_files tool in two separate tool calls ] -[ You spawn one more code-searcher and file-picker ] +[ You use code_search and glob tools, and spawn another file-picker to find more relevant files ] [ You read a few other relevant files using the read_files tool ]${!noAskUser ? `\n\n[ You ask the user for important clarifications on their request or alternate implementation strategies using the ask_user tool ]` @@ -298,7 +298,7 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT} } } -const EXPLORE_PROMPT = `- Iteratively spawn file pickers, code-searchers, directory-listers, glob-matchers, commanders, and web/docs researchers to gather context as needed. The file-picker agent in particular is very useful to find relevant files -- try spawning multiple in parallel (say, 2-5) to explore different parts of the codebase. Use read_subtree if you need to grok a particular part of the codebase. Read all the relevant files using the read_files tool.` +const EXPLORE_PROMPT = `- Iteratively spawn file pickers, commanders, and web/docs researchers to gather context as needed. Use the code_search, list_directory, and glob tools directly for searching and exploring the codebase. The file-picker agent in particular is very useful to find relevant files -- try spawning multiple in parallel (say, 2-5) to explore different parts of the codebase. Use read_subtree if you need to grok a particular part of the codebase. Read all the relevant files using the read_files tool.` function buildImplementationInstructionsPrompt({ isSonnet, From c536c94fe605b3cd10a418ab91f8e3c0a103bdf7 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 9 Mar 2026 19:45:00 -0700 Subject: [PATCH 11/15] Trim diff viewer of new lines --- cli/src/components/tools/diff-viewer.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/components/tools/diff-viewer.tsx b/cli/src/components/tools/diff-viewer.tsx index d528c28054..72ee7361f3 100644 --- a/cli/src/components/tools/diff-viewer.tsx +++ b/cli/src/components/tools/diff-viewer.tsx @@ -50,7 +50,7 @@ const lineColor = ( export const DiffViewer = ({ diffText }: DiffViewerProps) => { const theme = useTheme() - const lines = diffText.split('\n') + const lines = diffText.trim().split('\n') return ( Date: Mon, 9 Mar 2026 19:49:45 -0700 Subject: [PATCH 12/15] Only use amazon bedrock for our base2 opus, so there are fewer prompt cache misses! --- agents/base2/base2.ts | 4 +++- agents/editor/best-of-n/best-of-n-selector2.ts | 5 +++++ agents/editor/best-of-n/editor-implementor.ts | 5 +++++ agents/editor/best-of-n/editor-multi-prompt.ts | 3 +++ agents/editor/editor.ts | 5 +++++ agents/general-agent/general-agent.ts | 5 +++++ agents/reviewer/code-reviewer.ts | 3 +++ agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts | 3 +++ agents/thinker/best-of-n/thinker-best-of-n.ts | 5 +++++ agents/thinker/best-of-n/thinker-selector.ts | 5 +++++ agents/thinker/thinker.ts | 3 +++ 11 files changed, 45 insertions(+), 1 deletion(-) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 95bce66748..be5ade5a1c 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -34,7 +34,9 @@ export function createBase2( order: ['inceptron/fp8'], allow_fallbacks: false, data_collection: 'deny', - } : undefined, + } : { + only: ['amazon-bedrock'], + }, displayName: 'Buffy the Orchestrator', spawnerPrompt: 'Advanced base agent that orchestrates planning, editing, and reviewing for complex coding tasks', diff --git a/agents/editor/best-of-n/best-of-n-selector2.ts b/agents/editor/best-of-n/best-of-n-selector2.ts index 852c268783..a0263a42cb 100644 --- a/agents/editor/best-of-n/best-of-n-selector2.ts +++ b/agents/editor/best-of-n/best-of-n-selector2.ts @@ -23,6 +23,11 @@ export const createBestOfNSelector2 = (options: { effort: 'high', }, }), + ...(isOpus && { + providerOptions: { + only: ['amazon-bedrock'], + }, + }), displayName: isGpt5 ? 'Best-of-N GPT-5 Diff Selector' : isOpus diff --git a/agents/editor/best-of-n/editor-implementor.ts b/agents/editor/best-of-n/editor-implementor.ts index d22cc77f31..87ec441ba3 100644 --- a/agents/editor/best-of-n/editor-implementor.ts +++ b/agents/editor/best-of-n/editor-implementor.ts @@ -20,6 +20,11 @@ export const createBestOfNImplementor = (options: { : isGemini ? 'google/gemini-3-pro-preview' : 'openai/gpt-5.1', + ...(isOpus && { + providerOptions: { + only: ['amazon-bedrock'], + }, + }), displayName: 'Implementation Generator', spawnerPrompt: 'Generates a complete implementation using propose_* tools that draft changes without applying them', diff --git a/agents/editor/best-of-n/editor-multi-prompt.ts b/agents/editor/best-of-n/editor-multi-prompt.ts index 2d101ea8a6..5c54cf9697 100644 --- a/agents/editor/best-of-n/editor-multi-prompt.ts +++ b/agents/editor/best-of-n/editor-multi-prompt.ts @@ -12,6 +12,9 @@ export function createMultiPromptEditor(): Omit { return { publisher, model: 'anthropic/claude-opus-4.6', + providerOptions: { + only: ['amazon-bedrock'], + }, displayName: 'Multi-Prompt Editor', spawnerPrompt: 'Edits code by spawning multiple implementor agents with different strategy prompts, selects the best implementation, and applies the changes. It also returns further suggested improvements which you should take seriously and act on. Pass as input an array of short prompts specifying different implementation approaches or strategies. Make sure to read any files intended to be edited before spawning this agent.', diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts index f765966879..6beb22d221 100644 --- a/agents/editor/editor.ts +++ b/agents/editor/editor.ts @@ -15,6 +15,11 @@ export const createCodeEditor = (options: { : options.model === 'minimax' ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6', + ...(options.model === 'opus' && { + providerOptions: { + only: ['amazon-bedrock'], + }, + }), displayName: 'Code Editor', spawnerPrompt: "Expert code editor that implements code changes based on the user's request. Do not specify an input prompt for this agent; it inherits the context of the entire conversation with the user. Make sure to read any files intended to be edited before spawning this agent as it cannot read files on its own.", diff --git a/agents/general-agent/general-agent.ts b/agents/general-agent/general-agent.ts index 37d92beacd..4925e60ab4 100644 --- a/agents/general-agent/general-agent.ts +++ b/agents/general-agent/general-agent.ts @@ -13,6 +13,11 @@ export const createGeneralAgent = (options: { return { publisher, model: isGpt5 ? 'openai/gpt-5.2' : 'anthropic/claude-opus-4.6', + ...(!isGpt5 && { + providerOptions: { + only: ['amazon-bedrock'], + }, + }), ...(isGpt5 && { reasoningOptions: { effort: 'high' as const, diff --git a/agents/reviewer/code-reviewer.ts b/agents/reviewer/code-reviewer.ts index c22d2d6c40..9cc840d69f 100644 --- a/agents/reviewer/code-reviewer.ts +++ b/agents/reviewer/code-reviewer.ts @@ -65,6 +65,9 @@ const definition: SecretAgentDefinition = { id: 'code-reviewer', publisher, ...createReviewer('anthropic/claude-opus-4.6'), + providerOptions: { + only: ['amazon-bedrock'], + }, } export default definition diff --git a/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts b/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts index 134862a57b..a6a380e3ee 100644 --- a/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts +++ b/agents/reviewer/multi-prompt/code-reviewer-multi-prompt.ts @@ -15,6 +15,9 @@ export function createCodeReviewerMultiPrompt(): Omit< return { publisher, model: 'anthropic/claude-opus-4.6', + providerOptions: { + only: ['amazon-bedrock'], + }, displayName: 'Multi-Prompt Code Reviewer', spawnerPrompt: 'Reviews code by spawning multiple code-reviewer agents with different focus prompts, then combines all review outputs into a comprehensive review. Make sure to read relevant files before spawning this agent. Pass an input array of short prompts specifying several different review focuses or perspectives.', diff --git a/agents/thinker/best-of-n/thinker-best-of-n.ts b/agents/thinker/best-of-n/thinker-best-of-n.ts index 66530a9269..3e1e532c5f 100644 --- a/agents/thinker/best-of-n/thinker-best-of-n.ts +++ b/agents/thinker/best-of-n/thinker-best-of-n.ts @@ -20,6 +20,11 @@ export function createThinkerBestOfN( : isOpus ? 'anthropic/claude-opus-4.6' : 'anthropic/claude-sonnet-4.5', + ...(isOpus && { + providerOptions: { + only: ['amazon-bedrock'], + }, + }), displayName: isGpt5 ? 'Best-of-N GPT-5 Thinker' : isOpus diff --git a/agents/thinker/best-of-n/thinker-selector.ts b/agents/thinker/best-of-n/thinker-selector.ts index a5c302bb96..ab10bff69f 100644 --- a/agents/thinker/best-of-n/thinker-selector.ts +++ b/agents/thinker/best-of-n/thinker-selector.ts @@ -11,6 +11,11 @@ export function createThinkerSelector( model: isOpus ? 'anthropic/claude-opus-4.6' : 'anthropic/claude-sonnet-4.5', + ...(isOpus && { + providerOptions: { + only: ['amazon-bedrock'], + }, + }), displayName: isOpus ? 'Opus Thinker Output Selector' : 'Thinker Output Selector', diff --git a/agents/thinker/thinker.ts b/agents/thinker/thinker.ts index dfd61db1a0..3dd57d472f 100644 --- a/agents/thinker/thinker.ts +++ b/agents/thinker/thinker.ts @@ -6,6 +6,9 @@ const definition: SecretAgentDefinition = { id: 'thinker', publisher, model: 'anthropic/claude-opus-4.6', + providerOptions: { + only: ['amazon-bedrock'], + }, displayName: 'Theo the Theorizer', spawnerPrompt: 'Does deep thinking given the current conversation history and a specific prompt to focus on. Use this to help you solve a specific problem. It is better to gather any relevant context before spawning this agent.', From e084c255e38928945c7b0903a392b030d6ecb73a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 10 Mar 2026 02:50:23 +0000 Subject: [PATCH 13/15] Bump Freebuff version to 0.0.7 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index 8947d21389..d7ca6de62c 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.6", + "version": "0.0.7", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From 822a2992e5e2065ac464627e6cd88d0044f16154 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 9 Mar 2026 21:55:11 -0700 Subject: [PATCH 14/15] Simplify feebuff landing page --- freebuff/web/src/app/home-client.tsx | 181 ++----------------------- freebuff/web/src/components/footer.tsx | 7 +- freebuff/web/src/components/navbar.tsx | 45 +----- 3 files changed, 17 insertions(+), 216 deletions(-) diff --git a/freebuff/web/src/app/home-client.tsx b/freebuff/web/src/app/home-client.tsx index e58705f68f..feb7131d34 100644 --- a/freebuff/web/src/app/home-client.tsx +++ b/freebuff/web/src/app/home-client.tsx @@ -2,73 +2,29 @@ import { AnimatePresence, motion } from 'framer-motion' import { - Terminal, - Brain, - Scissors, - Zap, - MessageSquare, - FileText, ChevronDown, } from 'lucide-react' import { useState } from 'react' -import Link from 'next/link' import { BackgroundBeams } from '@/components/background-beams' import { CopyButton } from '@/components/copy-button' import { HeroGrid } from '@/components/hero-grid' -import { TerminalDemo } from '@/components/terminal-demo' -import { Button } from '@/components/ui/button' import { cn } from '@/lib/utils' const INSTALL_COMMAND = 'npm install -g freebuff' -const features = [ - { - icon: Brain, - title: 'Deep Codebase Understanding', - description: - 'Indexes your entire project to generate code that fits your patterns and conventions.', - }, - { - icon: Scissors, - title: 'Surgical Code Edits', - description: - "Makes precise changes across files while respecting your codebase's structure.", - }, - { - icon: Terminal, - title: 'Terminal Integration', - description: - 'Runs commands on your behalf — install packages, run tests, and more.', - }, - { - icon: FileText, - title: 'Knowledge Files', - description: - 'Add knowledge.md to teach Freebuff about your project conventions.', - }, - { - icon: MessageSquare, - title: 'Chat History', - description: - 'Resume past conversations and pick up right where you left off.', - }, - { - icon: Zap, - title: 'Custom Agents', - description: - 'Load custom agents from your .agents/ directory for specialized workflows.', - }, -] - -const headlineWords = ["The", "strongest"] -const greenWords = ["free", "coding", "agent."] +const headlineWords = ["The", "free", "coding", "agent"] const faqs = [ { - question: 'Is it really free?', + question: 'How can it be free?', + answer: + 'Freebuff is supported by ads shown in the CLI.', + }, + { + question: 'What model do you use?', answer: - 'Yes! Freebuff is completely free to use. The service is supported by ads shown in the CLI.', + 'MiniMax M2.5 as the main coding agent, Gemini 3.1 Flash Lite for finding files and research, and GPT-5.4 for deep thinking if you connect your ChatGPT subscription.', }, { question: 'Are you training on my data?', @@ -80,11 +36,6 @@ const faqs = [ answer: "We don't store your codebase. We only collect minimal logs for debugging purposes.", }, - { - question: 'What model do you use?', - answer: - 'We use multiple models: MiniMax M2.5 as the main coding agent, Gemini 3.1 Flash Lite for finding files, and GPT-5.4 for deep thinking if you connect your ChatGPT subscription.', - }, ] function InstallCommand({ className }: { className?: string }) { @@ -184,24 +135,6 @@ export default function HomeClient() { {/* Hero content */}
- {/* Pill badge */} - -
- - - - - - 100% Free - -
-
- {/* Headline with staggered word animation */} - - {headlineWords.map((word, i) => ( - - {word} - - ))} - - {greenWords.map((word, i) => ( + {headlineWords.map((word, i) => ( {word} @@ -244,7 +166,7 @@ export default function HomeClient() { transition={{ duration: 0.6, delay: 0.8 }} className="text-lg md:text-xl text-zinc-400 max-w-2xl mx-auto mb-10 leading-relaxed" > - No subscription. No API key. 5x faster than Claude Code. + No subscription. No API key. Zero configuration. {/* Install command */} @@ -256,9 +178,6 @@ export default function HomeClient() { > - - {/* Terminal demo */} -
{/* Bottom fade */} @@ -268,48 +187,6 @@ export default function HomeClient() { {/* Divider */}
- {/* ─── Features Section ─── */} -
-
- -

- Everything you need. Nothing you don't. -

-

- Freebuff brings the full power of an AI coding agent to your - terminal — completely free. -

-
- -
- {features.map((feature, i) => ( - -
- -
-

{feature.title}

-

- {feature.description} -

-
- ))} -
-
-
- {/* ─── FAQ Section ─── */}
@@ -328,42 +205,6 @@ export default function HomeClient() {
- - {/* Divider */} -
- - {/* ─── CTA Section ─── */} -
-
-
- -

- Start coding for free -

-

- No credit card. No trial period. Just install and go. -

- - - -

- Want more power?{' '} - - Check out Codebuff - {' '} - for premium models and higher limits. -

-
-
-
) } diff --git a/freebuff/web/src/components/footer.tsx b/freebuff/web/src/components/footer.tsx index 90886eba6a..1b9587dbec 100644 --- a/freebuff/web/src/components/footer.tsx +++ b/freebuff/web/src/components/footer.tsx @@ -20,7 +20,7 @@ export function Footer() {

- The world's strongest free coding agent. + The free coding agent

@@ -28,10 +28,11 @@ export function Footer() {

Links