diff --git a/README.md b/README.md index b5bb34e..e241b73 100644 --- a/README.md +++ b/README.md @@ -141,6 +141,67 @@ codex ## Interactive sessions (e.g. Claude) ### Claude Code / Codex (PTY) - Start a session with command `claude` (or `codex` if installed). + +#### (Recommended) Claude Code status line for accurate budget tracking +Claude Code can run a custom status line command that receives structured JSON about the current session (context window usage, estimated cost, etc.). + +For the most reliable budget tracking in Agents Fleet, configure a **single-line** status line that prints parse-friendly key/value pairs. + +1) Create the script: +```bash +#!/bin/bash +input=$(cat) + +CTX_IN=$(echo "$input" | jq -r '.context_window.total_input_tokens // 0') +CTX_OUT=$(echo "$input" | jq -r '.context_window.total_output_tokens // 0') +CTX_SIZE=$(echo "$input" | jq -r '.context_window.context_window_size // 0') +CTX_PCT=$(echo "$input" | jq -r '.context_window.used_percentage // 0' | cut -d. -f1) + +COST=$(echo "$input" | jq -r '.cost.total_cost_usd // 0') +COST_FMT=$(printf '$%.6f' "$COST") + +# Single-line, parse-friendly output: +# Use a unique prefix + delimiter to make parsing reliable even with TUI redraws. +echo "AF|ctx=${CTX_IN}/${CTX_SIZE}(${CTX_PCT}%)|in=${CTX_IN}|out=${CTX_OUT}|cost=${COST_FMT}" +``` +Save it as `~/.claude/agents_fleet_statusline.sh` and make it executable: +```bash +chmod +x ~/.claude/agents_fleet_statusline.sh +``` + +2) Update `~/.claude/settings.json`: +```json +{ + "statusLine": { + "type": "command", + "command": "~/.claude/agents_fleet_statusline.sh", + "padding": 1, + "refreshInterval": 1 + } +} +``` + +Notes: +- Requires `jq` to be installed (`brew install jq` on macOS). +- `cost.total_cost_usd` is an estimate computed client-side by Claude Code and may differ from your actual bill. +- Type directly into the **Terminal (live)** pane (xterm.js). +- Use **Terminal (persisted)** to replay and scroll through the recorded PTY output (xterm.js replay). + +#### (Recommended) Codex status line for accurate budget tracking +Codex can also show session usage in a single-line status line. For Agents Fleet, the simplest reliable setup is to keep Codex’s built-in status line enabled and ensure it includes the usage fields below. + +1) Update `~/.codex/config.toml`: +```toml +[tui] +status_line = ["model-with-reasoning","current-dir","context-remaining","context-used","total-input-token","total-output-tokens","weekly-limit","five-hour-limit","run-state","task-progress"] +status_line_use_color = true +``` + +2) Make sure the output stays on one line in the Codex TUI. + +Notes: +- The config above matches the usage fields Agents Fleet can parse for budget tracking. +- If you change the field list, keep it single-line so PTY replay remains parse-friendly. - Type directly into the **Terminal (live)** pane (xterm.js). - Use **Terminal (persisted)** to replay and scroll through the recorded PTY output (xterm.js replay). diff --git a/apps/server/src/processManager.ts b/apps/server/src/processManager.ts index 31897b8..3783945 100644 --- a/apps/server/src/processManager.ts +++ b/apps/server/src/processManager.ts @@ -24,12 +24,261 @@ type RunningSession = { // PTY persistence buffering (avoid DB write per chunk) ptyBuffer: string; ptyFlushTimer: NodeJS.Timeout | null; + + // Best-effort usage parsing for agent CLIs. + // For Codex, usage lines report absolute totals; we overwrite session estimates from these. + lastCodexUsage?: { input: number; output: number }; + codexCleanTail?: string; + + // Best-effort parsing for Claude Code statusLine scripts. + lastClaudeUsage?: { + ctxIn: number; + ctxOut: number; + ctxSize: number; + ctxPct: number; + costUsd: number | null; + }; + claudeCleanTail?: string; + + // Throttle writes from redraw-heavy status parsing. + codexLastPersistAtMs?: number; + claudeLastPersistAtMs?: number; + + // Debug instrumentation (optional). + _lastCodexDebugAtMs?: number; + _lastClaudeDebugAtMs?: number; }; function nowIso() { return new Date().toISOString(); } +function parseCodexUsageTotalsFromText( + cleanText: string, +): { input: number; output: number; source: "summary" | "status" } | null { + // Codex can show usage in two forms: + // 1) Status line (redraw-heavy): "... · 15.7K in · 27 out · Ready" + // 2) Summary line (authoritative): "Token usage: total=... input=... output=..." + + // Prefer the explicit "Token usage:" line when present. + const m = cleanText.match( + /Token usage:\s*total=([0-9,]+)\s+input=([0-9,]+)[^\n]*?\s+output=([0-9,]+)/, + ); + if (m) { + const input = Number(m[2].replace(/,/g, "")); + const output = Number(m[3].replace(/,/g, "")); + if (!Number.isFinite(input) || !Number.isFinite(output)) return null; + if (input < 0 || output < 0) return null; + return { input, output, source: "summary" }; + } + + // Status line parsing (best-effort). Supports K/M suffixes. + // We want the *last* occurrence in the buffer (TUI redraws can leave stale copies). + const re = + /\b([0-9]+(?:\.[0-9]+)?)([KM]?)\s*in\b[\s\S]*?\b([0-9]+(?:\.[0-9]+)?)([KM]?)\s*out\b/gi; + let last: RegExpExecArray | null = null; + for (;;) { + const m2 = re.exec(cleanText); + if (!m2) break; + last = m2; + } + if (!last) return null; + + function parseCompact(num: string, suffix: string): number { + const n = Number(num); + if (!Number.isFinite(n) || n < 0) return NaN; + const s = suffix.toUpperCase(); + if (s === "K") return Math.round(n * 1_000); + if (s === "M") return Math.round(n * 1_000_000); + return Math.round(n); + } + + const input = parseCompact(last[1], last[2]); + const output = parseCompact(last[3], last[4]); + if (!Number.isFinite(input) || !Number.isFinite(output)) return null; + return { input, output, source: "status" }; +} + +function _parseClaudeStatusLineFromText(cleanText: string): { + ctxIn: number; + ctxOut: number; + ctxSize: number; + ctxPct: number; + costUsd: number | null; +} | null { + // Claude Code statusLine output can be redraw-fragmented in PTY output. + // We support a few formats and then pick the best candidate: + // 1) Preferred: AF|ctx=/(%)|in=|out=|cost= + // 2) Prefix-dropped redraw artifact: /(%)|in=|out=|cost= + // 3) Minimal: in=|out=|cost= (ctx fields missing) + // 4) Legacy space-delimited: ctx=... in=... out=... cost=... + // cost is optional. + + type Candidate = { + ctxIn: number; + ctxOut: number; + ctxSize: number; + ctxPct: number; + costUsd: number | null; + score: number; + }; + + const cands: Candidate[] = []; + + function pushCand(args: { + ctxIn: number; + ctxOut: number; + ctxSize: number; + ctxPct: number; + in2?: number; + costUsdRaw?: string; + hasCtx: boolean; + hasAf: boolean; + }) { + const { ctxIn, ctxOut, ctxSize, ctxPct } = args; + const in2 = typeof args.in2 === "number" ? args.in2 : ctxIn; + if (![ctxIn, ctxOut, ctxSize, ctxPct, in2].every(Number.isFinite)) return; + if (ctxIn < 0 || ctxOut < 0) return; + if (args.hasCtx) { + if (ctxSize <= 0) return; + if (ctxPct < 0 || ctxPct > 100) return; + // If we have both ctxIn and in= repeated, they should match. + if (in2 !== ctxIn) return; + } + + const costUsd = + typeof args.costUsdRaw === "string" && args.costUsdRaw.length > 0 + ? Number(args.costUsdRaw) + : null; + if (costUsd != null && (!Number.isFinite(costUsd) || costUsd < 0)) return; + + // Score: prefer AF format, prefer having ctx, prefer non-zero, prefer higher totals. + // NOTE: treat "all zeros" as very low-quality, since early statusline invocations + // often emit zeros before the first API call completes. + const base = (args.hasAf ? 1000 : 0) + (args.hasCtx ? 100 : 0); + const nonZero = (ctxIn > 0 ? 50 : 0) + (ctxOut > 0 ? 10 : 0); + const magnitude = Math.min(100, Math.floor(Math.log10(ctxIn + 1) * 10)); + const allZeroPenalty = ctxIn === 0 && ctxOut === 0 ? -10_000 : 0; + cands.push({ + ctxIn, + ctxOut, + ctxSize, + ctxPct, + costUsd, + score: base + nonZero + magnitude + allZeroPenalty, + }); + } + + // 1) Preferred AF|ctx=... + { + const re = + /AF\|ctx=(\d+)\/(\d+)\((\d+)%\)\|in=(\d+)\|out=(\d+)(?:\|cost=\$?([0-9]+(?:\.[0-9]+)?))?/g; + for (;;) { + const m = re.exec(cleanText); + if (!m) break; + pushCand({ + ctxIn: Number(m[1]), + ctxSize: Number(m[2]), + ctxPct: Number(m[3]), + in2: Number(m[4]), + ctxOut: Number(m[5]), + costUsdRaw: m[6], + hasCtx: true, + hasAf: true, + }); + } + } + + // 2) Prefix-dropped: /(%)|in=|out=... + { + const re = + /(\d+)\/(\d+)\((\d+)%\)\|in=(\d+)\|out=(\d+)(?:\|cost=\$?([0-9]+(?:\.[0-9]+)?))?/g; + for (;;) { + const m = re.exec(cleanText); + if (!m) break; + pushCand({ + ctxIn: Number(m[4]), + ctxSize: Number(m[2]), + ctxPct: Number(m[3]), + in2: Number(m[4]), + ctxOut: Number(m[5]), + costUsdRaw: m[6], + hasCtx: true, + hasAf: false, + }); + } + } + + // 3) Minimal: in/out/cost only (pipe-delimited). + { + const re = /\bin=(\d+)\|out=(\d+)(?:\|cost=\$?([0-9]+(?:\.[0-9]+)?))?/g; + for (;;) { + const m = re.exec(cleanText); + if (!m) break; + pushCand({ + ctxIn: Number(m[1]), + ctxOut: Number(m[2]), + ctxSize: 1, + ctxPct: 0, + costUsdRaw: m[3], + hasCtx: false, + hasAf: false, + }); + } + } + + // 3b) Minimal: in/out only (space-delimited fragments). + // TUI redraw artifacts can leave "in=... out=..." but also stray "cost=..." from unrelated text. + // We intentionally DO NOT parse cost here; cost should come from a full AF line. + { + const re = /\bin=(\d+)\s+out=(\d+)/g; + for (;;) { + const m = re.exec(cleanText); + if (!m) break; + pushCand({ + ctxIn: Number(m[1]), + ctxOut: Number(m[2]), + ctxSize: 1, + ctxPct: 0, + costUsdRaw: undefined, + hasCtx: false, + hasAf: false, + }); + } + } + + // 4) Legacy space-delimited. + { + const re = + /ctx=(\d+)\/(\d+)\((\d+)%\)\s*in=(\d+)\s*out=(\d+)(?:\s*cost=\$?([0-9]+(?:\.[0-9]+)?))?/g; + for (;;) { + const m = re.exec(cleanText); + if (!m) break; + pushCand({ + ctxIn: Number(m[1]), + ctxSize: Number(m[2]), + ctxPct: Number(m[3]), + in2: Number(m[4]), + ctxOut: Number(m[5]), + costUsdRaw: m[6], + hasCtx: true, + hasAf: false, + }); + } + } + + if (cands.length === 0) return null; + cands.sort((a, b) => b.score - a.score); + const best = cands[0]; + return { + ctxIn: best.ctxIn, + ctxOut: best.ctxOut, + ctxSize: best.ctxSize, + ctxPct: best.ctxPct, + costUsd: best.costUsd, + }; +} + function shouldCaptureGitOnEnd(): boolean { // Opt-in by default. Set AGENTS_FLEET_CAPTURE_GIT_ON_END=0/false/no to disable. const v = process.env.AGENTS_FLEET_CAPTURE_GIT_ON_END; @@ -142,6 +391,54 @@ async function updateSessionFields( export class ProcessManager { private readonly running = new Map(); + + applyUsageTick( + sessionId: string, + tick: { + inputTokens: number; + outputTokens: number; + costUsd: number | null; + source: "client_rendered_statusline"; + }, + ) { + void (async () => { + const session = await getSession(sessionId); + if (!session) return; + + // Only accept updates for Claude PTY sessions for MVP. + if (session.command.trim() !== "claude") return; + + // Trust the client-rendered statusline as authoritative. Take the max for + // each field so transient zero/lower readings don't clobber real values. + const nextCost = + typeof tick.costUsd === "number" + ? Math.max(session.estimated_cost_usd, tick.costUsd) + : session.estimated_cost_usd; + const nextIn = Math.max(session.estimated_input_tokens, tick.inputTokens); + const nextOut = Math.max( + session.estimated_output_tokens, + tick.outputTokens, + ); + + if ( + nextCost === session.estimated_cost_usd && + nextIn === session.estimated_input_tokens && + nextOut === session.estimated_output_tokens + ) { + return; + } + + const updated = await updateSessionFields(sessionId, { + estimated_cost_usd: nextCost, + estimated_input_tokens: nextIn, + estimated_output_tokens: nextOut, + }); + + if (updated) this.hub.broadcastSession(updated); + void this.enforceBudget(sessionId, updated ?? session); + })(); + } + constructor(private readonly hub: SessionWsHub) { // Global idle timeout: stop sessions with no output for a while. setInterval(() => { @@ -256,7 +553,134 @@ export class ProcessManager { } // Budget estimation is best-effort; count from the raw stream. - void handleOutputText(data); + // NOTE: For Codex sessions, we rely on Codex-reported totals (parsed from output). + // For Claude Code, we rely on client-rendered statusline ticks (usage_tick) for accuracy. + const cmd = args.command.trim(); + if (cmd === "codex") { + const cleanChunk = stripAnsi(data); + const r2 = this.running.get(args.sessionId); + if (r2) { + const TAIL_MAX = 16_384; + + if (cmd === "codex") { + const prevTail = r2.codexCleanTail ?? ""; + const nextTailRaw = prevTail + cleanChunk; + r2.codexCleanTail = + nextTailRaw.length > TAIL_MAX + ? nextTailRaw.slice(nextTailRaw.length - TAIL_MAX) + : nextTailRaw; + + const usage = parseCodexUsageTotalsFromText(r2.codexCleanTail); + if (usage) { + // Debug: allow inspecting codex tail + matches when needed. + if (process.env.AGENTS_FLEET_DEBUG_CODEX_USAGE === "1") { + // Avoid spamming: print at most once per second per session. + const now = Date.now(); + const last = r2._lastCodexDebugAtMs; + if (!last || now - last >= 1000) { + r2._lastCodexDebugAtMs = now; + const tail = r2.codexCleanTail.slice(-500); + console.log( + `[codex-usage] session=${args.sessionId} src=${usage.source} in=${usage.input} out=${usage.output} tail=${JSON.stringify(tail)}`, + ); + } + } + const prev = r2.lastCodexUsage; + const prevIn = prev?.input ?? 0; + const prevOut = prev?.output ?? 0; + const nextIn = usage.input; + const nextOut = usage.output; + + const monotonic = nextIn >= prevIn && nextOut >= prevOut; + const maxJumpStatus = 50_000; + const jumpOk = + usage.source === "summary" || + (nextIn - prevIn <= maxJumpStatus && + nextOut - prevOut <= maxJumpStatus); + + if (monotonic && jumpOk) { + if (!prev || prev.input !== nextIn || prev.output !== nextOut) { + r2.lastCodexUsage = { input: nextIn, output: nextOut }; + + const nowMs = Date.now(); + const lastPersist = r2.codexLastPersistAtMs ?? 0; + const minIntervalMs = 500; + const shouldPersist = + usage.source === "summary" || + nowMs - lastPersist >= minIntervalMs; + + if (shouldPersist) { + r2.codexLastPersistAtMs = nowMs; + void (async () => { + const session = await getSession(args.sessionId); + if (!session) return; + const cost = computeCostUsd(nextIn, nextOut); + const updated = await updateSessionFields( + args.sessionId, + { + estimated_input_tokens: nextIn, + estimated_output_tokens: nextOut, + estimated_cost_usd: cost, + }, + ); + if (updated) this.hub.broadcastSession(updated); + void this.enforceBudget( + args.sessionId, + updated ?? session, + ); + })(); + } + } + } + } + } + } + } else { + if (cmd === "claude") { + // Parse the Agents Fleet statusline line out of the PTY stream. + // Format: "[AF] in= out= cost=$ [/AF]" + const cleanChunk = stripAnsi(data); + const r2 = this.running.get(args.sessionId); + if (r2) { + const TAIL_MAX = 16_384; + const prevTail = r2.claudeCleanTail ?? ""; + const nextTailRaw = prevTail + cleanChunk; + r2.claudeCleanTail = + nextTailRaw.length > TAIL_MAX + ? nextTailRaw.slice(nextTailRaw.length - TAIL_MAX) + : nextTailRaw; + + // Find the LAST [AF]...[/AF] block in the tail. + const re = + /\[AF\]\s+in=(\d+)\s+out=(\d+)\s+cost=\$?([0-9]+(?:\.[0-9]+)?)\s+\[\/AF\]/g; + let lastMatch: RegExpExecArray | null = null; + for (;;) { + const m = re.exec(r2.claudeCleanTail); + if (!m) break; + lastMatch = m; + } + if (lastMatch) { + const inputTokens = Number(lastMatch[1]); + const outputTokens = Number(lastMatch[2]); + const costUsd = Number(lastMatch[3]); + if ( + Number.isFinite(inputTokens) && + Number.isFinite(outputTokens) && + Number.isFinite(costUsd) + ) { + this.applyUsageTick(args.sessionId, { + inputTokens, + outputTokens, + costUsd, + source: "client_rendered_statusline", + }); + } + } + } + } + // Skip handleOutputText for claude (authoritative AF tick handles it). + if (cmd !== "claude") void handleOutputText(data); + } }); p.onExit(({ exitCode, signal }) => { diff --git a/apps/web/src/TerminalPane.tsx b/apps/web/src/TerminalPane.tsx index 3b18ea0..9cba4df 100644 --- a/apps/web/src/TerminalPane.tsx +++ b/apps/web/src/TerminalPane.tsx @@ -2,6 +2,52 @@ import { useEffect, useMemo, useRef } from "react"; import { Terminal } from "xterm"; import { FitAddon } from "xterm-addon-fit"; +function parseClaudeStatuslineFromRenderedRow(rowText: string): { + inputTokens: number; + outputTokens: number; + costUsd?: number; +} | null { + // Only trust our AF-tagged status line to avoid accidentally matching other output. + // Supported formats: + // 1) API-usage oriented (recommended): + // "[AF] cost=0.019656 last_in=6 last_out=13 cache_r=38601 cache_w=0 [/AF]" + // 2) Legacy context-oriented: + // "[AF] ctx=... in=... out=... cost=... [/AF]" + + // Prefer the API-usage oriented format. + // Supported examples: + // - "[AF] cost=0.019656 last_in=6 last_out=13 cache_r=38601 cache_w=0 [/AF]" + // - "[AF] cost=0.019656 in=38607 out=15 [/AF]" (some builds don't print last_*) + const m1 = rowText.match( + /\[AF\][\s\S]*?\bcost=\$?([0-9]+(?:\.[0-9]+)?)\b[\s\S]*?(?:\blast_in=(\d+)\b[\s\S]*?\blast_out=(\d+)\b|\bin=(\d+)\b[\s\S]*?\bout=(\d+)\b)[\s\S]*?\[\/AF\]/i, + ); + if (m1) { + const costUsd = Number(m1[1]); + const inputTokens = Number(m1[2] ?? m1[4]); + const outputTokens = Number(m1[3] ?? m1[5]); + if (!Number.isFinite(costUsd) || costUsd < 0) return null; + if (!Number.isFinite(inputTokens) || inputTokens < 0) return null; + if (!Number.isFinite(outputTokens) || outputTokens < 0) return null; + return { inputTokens, outputTokens, costUsd }; + } + + // Fall back to the legacy format. + const m2 = rowText.match( + /\[AF\][\s\S]*?\bin=(\d+)\b[\s\S]*?\bout=(\d+)\b(?:[\s\S]*?\bcost=\$?([0-9]+(?:\.[0-9]+)?))?[\s\S]*?\[\/AF\]/i, + ); + if (!m2) return null; + + const inputTokens = Number(m2[1]); + const outputTokens = Number(m2[2]); + const costUsd = m2[3] ? Number(m2[3]) : undefined; + if (!Number.isFinite(inputTokens) || inputTokens < 0) return null; + if (!Number.isFinite(outputTokens) || outputTokens < 0) return null; + if (costUsd !== undefined && (!Number.isFinite(costUsd) || costUsd < 0)) + return null; + + return { inputTokens, outputTokens, costUsd }; +} + type Props = { sessionId: string; ws: WebSocket | null; @@ -122,6 +168,86 @@ export default function TerminalPane({ sessionId, ws, active }: Props) { window.removeEventListener("agents_fleet:pty", handler as EventListener); }, [sessionId]); + // MVP: for Claude Code sessions, read the rendered bottom row from xterm and send + // usage ticks to the server. This avoids brittle parsing of redraw-heavy PTY output. + useEffect(() => { + if (!active) return; + + let lastSent: { inTok: number; outTok: number; cost?: number } | null = + null; + + const interval = window.setInterval(() => { + const t = termRef.current; + const s = wsRef.current; + if (!t || !s || s.readyState !== WebSocket.OPEN) return; + + // Read a small window of bottom rows; the status line may not be the very last row + // depending on terminal layout / prompts. NOTE: buffer.getLine takes an absolute + // line index into the scrollback+viewport buffer, NOT a viewport row index. The + // last visible line is at `buf.length - 1`. + const buf = t.buffer.active; + const end = buf.length - 1; + const start = Math.max(0, end - 60); + + let best: { + inputTokens: number; + outputTokens: number; + costUsd?: number; + } | null = null; + for (let row = end; row >= start; row--) { + const line = buf.getLine(row); + const text = line ? line.translateToString(true) : ""; + const p = parseClaudeStatuslineFromRenderedRow(text); + if (!p) continue; + if (!best) { + best = p; + } else { + // Prefer the line with higher tokens (more likely the latest statusline). + // If tokens are equal, prefer the one with a defined (non-null) cost. + const pTotal = p.inputTokens + p.outputTokens; + const bTotal = best.inputTokens + best.outputTokens; + + const pHasCost = typeof p.costUsd === "number"; + const bHasCost = typeof best.costUsd === "number"; + + if (pTotal > bTotal || (pTotal === bTotal && pHasCost && !bHasCost)) { + best = p; + } + } + } + if (!best) return; + + const parsed = best; + + // Only send if it changed. + if ( + lastSent && + lastSent.inTok === parsed.inputTokens && + lastSent.outTok === parsed.outputTokens && + lastSent.cost === parsed.costUsd + ) { + return; + } + lastSent = { + inTok: parsed.inputTokens, + outTok: parsed.outputTokens, + cost: parsed.costUsd, + }; + + s.send( + JSON.stringify({ + type: "usage_tick", + sessionId, + inputTokens: parsed.inputTokens, + outputTokens: parsed.outputTokens, + costUsd: parsed.costUsd, + }), + ); + }, 500); + + return () => window.clearInterval(interval); + }, [active, sessionId, ws]); + const helper = useMemo(() => { if (canUseWs) return null; return ( diff --git a/packages/shared/src/index.ts b/packages/shared/src/index.ts index 615b39a..d63cfee 100644 --- a/packages/shared/src/index.ts +++ b/packages/shared/src/index.ts @@ -53,6 +53,15 @@ export type WsClientMessage = | { type: "subscribe"; sessionId: string } | { type: "input"; sessionId: string; data: string } | { type: "resize"; sessionId: string; cols: number; rows: number } + | { + type: "usage_tick"; + sessionId: string; + inputTokens: number; + outputTokens: number; + costUsd?: number; + ctxSize?: number; + ctxUsedPct?: number; + } | { type: "claude_sdk_send"; sessionId: string; text: string } | { type: "claude_sdk_tool_decision";