From 172d94c9006353c4f9bf9b8a95e0469138f14741 Mon Sep 17 00:00:00 2001 From: Juha Kangas <42040080+valuecodes@users.noreply.github.com> Date: Thu, 29 Jan 2026 08:48:23 +0200 Subject: [PATCH 1/9] feat: add AI usage tracking and reporting functionality - Implement Claude and Codex log readers for usage data - Create output formatter for displaying aggregated usage - Add pricing configuration and usage aggregation logic - Introduce CLI commands for usage reporting --- package.json | 1 + src/cli/ai-usage/ai-usage.pricing.json | 44 +++ src/cli/ai-usage/clients/claude-log-reader.ts | 158 +++++++++++ src/cli/ai-usage/clients/codex-log-reader.ts | 256 ++++++++++++++++++ src/cli/ai-usage/clients/output-formatter.ts | 188 +++++++++++++ src/cli/ai-usage/clients/usage-aggregator.ts | 187 +++++++++++++ src/cli/ai-usage/constants.ts | 21 ++ src/cli/ai-usage/main.ts | 162 +++++++++++ src/cli/ai-usage/types/schemas.ts | 143 ++++++++++ 9 files changed, 1160 insertions(+) create mode 100644 src/cli/ai-usage/ai-usage.pricing.json create mode 100644 src/cli/ai-usage/clients/claude-log-reader.ts create mode 100644 src/cli/ai-usage/clients/codex-log-reader.ts create mode 100644 src/cli/ai-usage/clients/output-formatter.ts create mode 100644 src/cli/ai-usage/clients/usage-aggregator.ts create mode 100644 src/cli/ai-usage/constants.ts create mode 100644 src/cli/ai-usage/main.ts create mode 100644 src/cli/ai-usage/types/schemas.ts diff --git a/package.json b/package.json index 50803be..f770184 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ "run:scrape-publications": "tsx src/cli/scrape-publications/main.ts", "run:etf-backtest": "tsx src/cli/etf-backtest/main.ts", "run:agent-evals": "tsx src/cli/agent-evals/main.ts", + "ai:usage": "tsx src/cli/ai-usage/main.ts", "scaffold:cli": "tsx scripts/scaffold-cli.ts", "node:tsx": "node --disable-warning=ExperimentalWarning --import tsx", "typecheck": "tsc --noEmit", diff --git a/src/cli/ai-usage/ai-usage.pricing.json b/src/cli/ai-usage/ai-usage.pricing.json new file mode 100644 index 0000000..72e48e3 --- /dev/null +++ b/src/cli/ai-usage/ai-usage.pricing.json @@ -0,0 +1,44 @@ +{ + "unit": "per_1m_tokens", + "models": { + "claude-opus-4-5-20251101": { + "input": 5.0, + "output": 25.0, + "cacheRead": 0.5, + "cacheWrite": 10.0, + "cacheWrite_5m": 6.25, + "cacheWrite_1h": 10.0 + }, + "claude-sonnet-4": { + "input": 3.0, + "output": 15.0, + "cacheRead": 0.3, + "cacheWrite": 6.0, + "cacheWrite_5m": 3.75, + "cacheWrite_1h": 6.0 + }, + "claude-haiku-4-5": { + "input": 1.0, + "output": 5.0, + "cacheRead": 0.1, + "cacheWrite": 2.0, + "cacheWrite_5m": 1.25, + "cacheWrite_1h": 2.0 + }, + "gpt-5.2-codex": { + "input": 1.75, + "cacheRead": 0.175, + "output": 14.0 + }, + "gpt-5-mini": { + "input": 0.25, + "cacheRead": 0.025, + "output": 2.0 + }, + "gpt-5.1-codex-mini": { + "input": 0.25, + "output": 2.0, + "cacheRead": 0.025 + } + } +} diff --git a/src/cli/ai-usage/clients/claude-log-reader.ts b/src/cli/ai-usage/clients/claude-log-reader.ts new file mode 100644 index 0000000..0d51f57 --- /dev/null +++ b/src/cli/ai-usage/clients/claude-log-reader.ts @@ -0,0 +1,158 @@ +import { createReadStream } from "fs"; +import { readdir, stat } from "fs/promises"; +import { join } from "path"; +import { createInterface } from "readline"; + +import type { Logger } from "~clients/logger"; + +import { CLAUDE_PROJECTS_PATH, PROVIDER_CLAUDE } from "../constants"; +import type { ClaudeLogEntry, UsageRecord } from "../types/schemas"; +import { ClaudeLogEntrySchema } from "../types/schemas"; + +type ClaudeLogReaderOptions = { + logger: Logger; + basePath?: string; + debug?: boolean; +}; + +type GetUsageOptions = { + since: Date; + repoPath: string; +}; + +/** + * Encodes a repo path to the Claude projects directory format. + * /home/juha/code/foo -> -home-juha-code-foo + */ +const encodeRepoPath = (repoPath: string): string => { + return repoPath.replace(/\//g, "-"); +}; + +export class ClaudeLogReader { + private logger: Logger; + private basePath: string; + private debug: boolean; + + constructor(options: ClaudeLogReaderOptions) { + this.logger = options.logger; + this.basePath = options.basePath ?? CLAUDE_PROJECTS_PATH; + this.debug = options.debug ?? false; + } + + /** + * Find all JSONL log files for a given repo path. + */ + async findLogFiles(repoPath: string): Promise { + const encodedPath = encodeRepoPath(repoPath); + const projectDir = join(this.basePath, encodedPath); + + try { + await stat(projectDir); + } catch { + if (this.debug) { + this.logger.debug("Claude project dir not found", { projectDir }); + } + return []; + } + + const files: string[] = []; + const entries = await readdir(projectDir, { withFileTypes: true }); + + for (const entry of entries) { + if (entry.isFile() && entry.name.endsWith(".jsonl")) { + files.push(join(projectDir, entry.name)); + } + } + + if (this.debug) { + this.logger.debug("Found Claude log files", { count: files.length }); + } + + return files; + } + + /** + * Parse a single JSONL file and yield usage entries. + */ + async *parseFile(filePath: string): AsyncGenerator { + const fileStream = createReadStream(filePath); + const rl = createInterface({ + input: fileStream, + crlfDelay: Infinity, + }); + + for await (const line of rl) { + if (!line.trim()) { + continue; + } + + try { + const parsed: unknown = JSON.parse(line); + const result = ClaudeLogEntrySchema.safeParse(parsed); + + if (result.success) { + yield result.data; + } + } catch { + // Skip malformed lines + } + } + } + + /** + * Get usage records filtered by time and repo. + */ + async getUsage(options: GetUsageOptions): Promise { + const { since, repoPath } = options; + const files = await this.findLogFiles(repoPath); + const records: UsageRecord[] = []; + + for (const file of files) { + for await (const entry of this.parseFile(file)) { + // Only process assistant entries with usage data + if (entry.type !== "assistant") { + continue; + } + if (!entry.message?.usage) { + continue; + } + if (!entry.message.model) { + continue; + } + + const timestamp = new Date(entry.timestamp); + if (timestamp < since) { + continue; + } + + // Match repo path if cwd is present + if (entry.cwd && !entry.cwd.startsWith(repoPath)) { + if (this.debug) { + this.logger.debug("Skipping entry: cwd mismatch", { + cwd: entry.cwd, + repoPath, + }); + } + continue; + } + + const usage = entry.message.usage; + records.push({ + provider: PROVIDER_CLAUDE, + model: entry.message.model, + inputTokens: usage.input_tokens, + outputTokens: usage.output_tokens, + cacheReadTokens: usage.cache_read_input_tokens, + cacheWriteTokens: usage.cache_creation_input_tokens, + timestamp, + }); + } + } + + if (this.debug) { + this.logger.debug("Found Claude usage records", { count: records.length }); + } + + return records; + } +} diff --git a/src/cli/ai-usage/clients/codex-log-reader.ts b/src/cli/ai-usage/clients/codex-log-reader.ts new file mode 100644 index 0000000..ca25b26 --- /dev/null +++ b/src/cli/ai-usage/clients/codex-log-reader.ts @@ -0,0 +1,256 @@ +import { createReadStream } from "fs"; +import { readdir, stat } from "fs/promises"; +import { join } from "path"; +import { createInterface } from "readline"; + +import type { Logger } from "~clients/logger"; + +import { CODEX_SESSIONS_PATH, PROVIDER_CODEX } from "../constants"; +import type { UsageRecord } from "../types/schemas"; +import { + CodexEventMsgPayloadSchema, + CodexLogEntrySchema, + CodexSessionMetaPayloadSchema, + CodexTurnContextPayloadSchema, +} from "../types/schemas"; + +type CodexLogReaderOptions = { + logger: Logger; + basePath?: string; + debug?: boolean; +}; + +type GetUsageOptions = { + since: Date; + repoPath: string; +}; + +type SessionData = { + cwd?: string; + model?: string; +}; + +export class CodexLogReader { + private logger: Logger; + private basePath: string; + private debug: boolean; + + constructor(options: CodexLogReaderOptions) { + this.logger = options.logger; + this.basePath = options.basePath ?? CODEX_SESSIONS_PATH; + this.debug = options.debug ?? false; + } + + /** + * Find all JSONL log files within the date range. + * Codex stores logs in YYYY/MM/DD directories. + */ + async findLogFiles(since: Date): Promise { + const files: string[] = []; + + try { + await stat(this.basePath); + } catch { + if (this.debug) { + this.logger.debug("Codex sessions dir not found", { + basePath: this.basePath, + }); + } + return []; + } + + // Walk through year/month/day directories + const years = await this.safeReaddir(this.basePath); + + for (const year of years) { + const yearPath = join(this.basePath, year); + const yearStat = await this.safeStat(yearPath); + if (!yearStat?.isDirectory()) { + continue; + } + + const months = await this.safeReaddir(yearPath); + + for (const month of months) { + const monthPath = join(yearPath, month); + const monthStat = await this.safeStat(monthPath); + if (!monthStat?.isDirectory()) { + continue; + } + + const days = await this.safeReaddir(monthPath); + + for (const day of days) { + const dayPath = join(monthPath, day); + const dayStat = await this.safeStat(dayPath); + if (!dayStat?.isDirectory()) { + continue; + } + + // Check if this date is within range + const dirDate = new Date(`${year}-${month}-${day}`); + if (dirDate < since) { + continue; + } + + const dayFiles = await this.safeReaddir(dayPath); + + for (const file of dayFiles) { + if (file.endsWith(".jsonl")) { + files.push(join(dayPath, file)); + } + } + } + } + } + + if (this.debug) { + this.logger.debug("Found Codex log files", { count: files.length }); + } + + return files; + } + + private async safeReaddir(path: string): Promise { + try { + return await readdir(path); + } catch { + return []; + } + } + + private async safeStat(path: string) { + try { + return await stat(path); + } catch { + return null; + } + } + + /** + * Parse a session file and extract usage records. + * Need to track session metadata and model across entries. + */ + async parseSession( + filePath: string, + repoPath: string, + since: Date + ): Promise { + const records: UsageRecord[] = []; + const sessionData: SessionData = {}; + + const fileStream = createReadStream(filePath); + const rl = createInterface({ + input: fileStream, + crlfDelay: Infinity, + }); + + for await (const line of rl) { + if (!line.trim()) { + continue; + } + + try { + const parsed: unknown = JSON.parse(line); + const baseResult = CodexLogEntrySchema.safeParse(parsed); + if (!baseResult.success) { + continue; + } + + const entry = baseResult.data; + const timestamp = new Date(entry.timestamp); + + // Extract session metadata (cwd) + if (entry.type === "session_meta") { + const metaResult = CodexSessionMetaPayloadSchema.safeParse( + entry.payload + ); + if (metaResult.success) { + sessionData.cwd = metaResult.data.cwd; + } + } + + // Extract model from turn_context + if (entry.type === "turn_context") { + const contextResult = CodexTurnContextPayloadSchema.safeParse( + entry.payload + ); + if (contextResult.success && contextResult.data.model) { + sessionData.model = contextResult.data.model; + } + } + + // Extract token usage from event_msg + if (entry.type === "event_msg") { + const eventResult = CodexEventMsgPayloadSchema.safeParse( + entry.payload + ); + if (!eventResult.success) { + continue; + } + + const payload = eventResult.data; + if (payload.type !== "token_count") { + continue; + } + if (!payload.info?.total_token_usage) { + continue; + } + + // Skip if before since date + if (timestamp < since) { + continue; + } + + // Skip if cwd doesn't match repo + if (sessionData.cwd && !sessionData.cwd.startsWith(repoPath)) { + if (this.debug) { + this.logger.debug("Skipping Codex entry: cwd mismatch", { + cwd: sessionData.cwd, + repoPath, + }); + } + continue; + } + + const usage = payload.info.total_token_usage; + records.push({ + provider: PROVIDER_CODEX, + model: sessionData.model ?? "unknown", + inputTokens: usage.input_tokens, + outputTokens: usage.output_tokens, + cacheReadTokens: usage.cached_input_tokens, + cacheWriteTokens: 0, // Codex doesn't track cache writes separately + timestamp, + }); + } + } catch { + // Skip malformed lines + } + } + + return records; + } + + /** + * Get usage records filtered by time and repo. + */ + async getUsage(options: GetUsageOptions): Promise { + const { since, repoPath } = options; + const files = await this.findLogFiles(since); + const allRecords: UsageRecord[] = []; + + for (const file of files) { + const records = await this.parseSession(file, repoPath, since); + allRecords.push(...records); + } + + if (this.debug) { + this.logger.debug("Found Codex usage records", { + count: allRecords.length, + }); + } + + return allRecords; + } +} diff --git a/src/cli/ai-usage/clients/output-formatter.ts b/src/cli/ai-usage/clients/output-formatter.ts new file mode 100644 index 0000000..e430c99 --- /dev/null +++ b/src/cli/ai-usage/clients/output-formatter.ts @@ -0,0 +1,188 @@ +import type { AggregatedUsage } from "../types/schemas"; + +type OutputFormatterOptions = { + sinceLabel: string; +}; + +/** + * Formats aggregated usage data for console output. + */ +export class OutputFormatter { + private sinceLabel: string; + + constructor(options: OutputFormatterOptions) { + this.sinceLabel = options.sinceLabel; + } + + /** + * Format a number with thousand separators. + */ + private formatNumber(n: number): string { + return n.toLocaleString("en-US"); + } + + /** + * Format a cost in USD. + */ + private formatCost(cost: number): string { + return `$${cost.toFixed(2)}`; + } + + /** + * Pad a string to a given width (right-aligned for numbers). + */ + private padRight(s: string, width: number): string { + return s.padEnd(width); + } + + private padLeft(s: string, width: number): string { + return s.padStart(width); + } + + /** + * Print warning about models missing from pricing config. + */ + printUnknownModelsWarning(models: string[]): void { + if (models.length === 0) { + return; + } + + console.log("\nWarning: The following models have no pricing data:"); + for (const model of models) { + console.log(` - ${model}`); + } + console.log("Add pricing info to ai-usage.pricing.json for accurate cost estimates."); + } + + /** + * Print the summary section (totals per provider and model). + */ + printSummary(usage: AggregatedUsage): void { + console.log(`\nAI Usage Summary (Last ${this.sinceLabel})`); + console.log(`Repo: ${usage.repo}`); + this.printUnknownModelsWarning(usage.unknownModels); + console.log(""); + + // By provider + console.log("By Provider:"); + for (const [provider, summary] of Object.entries(usage.byProvider)) { + const tokens = this.formatNumber(summary.tokens); + const cost = this.formatCost(summary.cost); + console.log(` ${provider}: ${tokens} tokens (${cost})`); + } + + // By model + console.log("\nBy Model:"); + for (const summary of usage.byModel) { + const tokens = this.formatNumber(summary.tokens); + const cost = this.formatCost(summary.cost); + console.log(` ${summary.model}: ${tokens} tokens (${cost})`); + } + + console.log(""); + } + + /** + * Print the markdown table. + */ + printTable(usage: AggregatedUsage): void { + // Column widths + const cols = { + provider: 8, + model: 28, + input: 12, + output: 12, + cacheR: 12, + cacheW: 12, + total: 12, + cost: 12, + }; + + // Header + const header = [ + this.padRight("Provider", cols.provider), + this.padRight("Model", cols.model), + this.padLeft("Input", cols.input), + this.padLeft("Output", cols.output), + this.padLeft("Cache R", cols.cacheR), + this.padLeft("Cache W", cols.cacheW), + this.padLeft("Total", cols.total), + this.padLeft("Est. Cost", cols.cost), + ].join(" | "); + + const separator = [ + "-".repeat(cols.provider), + "-".repeat(cols.model), + "-".repeat(cols.input), + "-".repeat(cols.output), + "-".repeat(cols.cacheR), + "-".repeat(cols.cacheW), + "-".repeat(cols.total), + "-".repeat(cols.cost), + ].join("-|-"); + + console.log(`| ${header} |`); + console.log(`|-${separator}-|`); + + // Rows + for (const row of usage.rows) { + const line = [ + this.padRight(row.provider, cols.provider), + this.padRight(row.model, cols.model), + this.padLeft(this.formatNumber(row.inputTokens), cols.input), + this.padLeft(this.formatNumber(row.outputTokens), cols.output), + this.padLeft(this.formatNumber(row.cacheReadTokens), cols.cacheR), + this.padLeft(this.formatNumber(row.cacheWriteTokens), cols.cacheW), + this.padLeft(this.formatNumber(row.totalTokens), cols.total), + this.padLeft(this.formatCost(row.cost), cols.cost), + ].join(" | "); + console.log(`| ${line} |`); + } + + // Separator before totals + console.log(`|-${separator}-|`); + + // Totals row + const totalsLine = [ + this.padRight("TOTAL", cols.provider), + this.padRight("", cols.model), + this.padLeft(this.formatNumber(usage.totals.inputTokens), cols.input), + this.padLeft(this.formatNumber(usage.totals.outputTokens), cols.output), + this.padLeft(this.formatNumber(usage.totals.cacheReadTokens), cols.cacheR), + this.padLeft( + this.formatNumber(usage.totals.cacheWriteTokens), + cols.cacheW + ), + this.padLeft(this.formatNumber(usage.totals.totalTokens), cols.total), + this.padLeft(this.formatCost(usage.totals.cost), cols.cost), + ].join(" | "); + console.log(`| ${totalsLine} |`); + } + + /** + * Print JSON output. + */ + printJson(usage: AggregatedUsage): void { + const output = { + period: { + since: usage.period.since.toISOString(), + until: usage.period.until.toISOString(), + }, + repo: usage.repo, + byProvider: usage.byProvider, + byModel: usage.byModel, + rows: usage.rows, + totals: usage.totals, + unknownModels: usage.unknownModels, + }; + console.log(JSON.stringify(output, null, 2)); + } + + /** + * Print a message when no data is found. + */ + printNoData(repo: string): void { + console.log(`\nNo usage data found for repo: ${repo}`); + console.log(`Time period: Last ${this.sinceLabel}\n`); + } +} diff --git a/src/cli/ai-usage/clients/usage-aggregator.ts b/src/cli/ai-usage/clients/usage-aggregator.ts new file mode 100644 index 0000000..e39e3ee --- /dev/null +++ b/src/cli/ai-usage/clients/usage-aggregator.ts @@ -0,0 +1,187 @@ +import type { + AggregatedRow, + AggregatedUsage, + ModelSummary, + PricingConfig, + ProviderSummary, + UsageRecord, +} from "../types/schemas"; + +type UsageAggregatorOptions = { + pricing: PricingConfig; +}; + +type AggregateOptions = { + records: UsageRecord[]; + since: Date; + until: Date; + repo: string; +}; + +export class UsageAggregator { + private pricing: PricingConfig; + private unknownModels = new Set(); + + constructor(options: UsageAggregatorOptions) { + this.pricing = options.pricing; + } + + /** + * Calculate cost for a usage record based on pricing config. + * Tracks models that have no pricing data configured. + */ + calculateCost(record: UsageRecord): number { + const modelPricing = this.pricing.models[record.model]; + + if (!modelPricing) { + this.unknownModels.add(record.model); + return 0; + } + + // Prices are per 1M tokens + const inputCost = (record.inputTokens / 1_000_000) * modelPricing.input; + const outputCost = (record.outputTokens / 1_000_000) * modelPricing.output; + const cacheReadCost = + (record.cacheReadTokens / 1_000_000) * (modelPricing.cacheRead ?? 0); + const cacheWriteCost = + (record.cacheWriteTokens / 1_000_000) * (modelPricing.cacheWrite ?? 0); + + return inputCost + outputCost + cacheReadCost + cacheWriteCost; + } + + /** + * Aggregate usage records by provider and model. + */ + aggregate(options: AggregateOptions): AggregatedUsage { + const { records, since, until, repo } = options; + + // Group by provider+model + const groups = new Map< + string, + { + provider: "claude" | "codex"; + model: string; + inputTokens: number; + outputTokens: number; + cacheReadTokens: number; + cacheWriteTokens: number; + cost: number; + } + >(); + + for (const record of records) { + const key = `${record.provider}:${record.model}`; + const existing = groups.get(key); + const cost = this.calculateCost(record); + + if (existing) { + existing.inputTokens += record.inputTokens; + existing.outputTokens += record.outputTokens; + existing.cacheReadTokens += record.cacheReadTokens; + existing.cacheWriteTokens += record.cacheWriteTokens; + existing.cost += cost; + } else { + groups.set(key, { + provider: record.provider, + model: record.model, + inputTokens: record.inputTokens, + outputTokens: record.outputTokens, + cacheReadTokens: record.cacheReadTokens, + cacheWriteTokens: record.cacheWriteTokens, + cost, + }); + } + } + + // Build rows + const rows: AggregatedRow[] = Array.from(groups.values()).map((g) => ({ + provider: g.provider, + model: g.model, + inputTokens: g.inputTokens, + outputTokens: g.outputTokens, + cacheReadTokens: g.cacheReadTokens, + cacheWriteTokens: g.cacheWriteTokens, + totalTokens: + g.inputTokens + + g.outputTokens + + g.cacheReadTokens + + g.cacheWriteTokens, + cost: g.cost, + })); + + // Sort by provider, then by total tokens descending + rows.sort((a, b) => { + if (a.provider !== b.provider) { + return a.provider.localeCompare(b.provider); + } + return b.totalTokens - a.totalTokens; + }); + + // Aggregate by provider + const byProvider: Record = {}; + for (const row of rows) { + const existing = byProvider[row.provider]; + if (existing) { + existing.tokens += row.totalTokens; + existing.cost += row.cost; + } else { + byProvider[row.provider] = { + tokens: row.totalTokens, + cost: row.cost, + }; + } + } + + // Aggregate by model + const modelMap = new Map(); + for (const row of rows) { + const existing = modelMap.get(row.model); + if (existing) { + existing.tokens += row.totalTokens; + existing.cost += row.cost; + } else { + modelMap.set(row.model, { + tokens: row.totalTokens, + cost: row.cost, + }); + } + } + + const byModel: ModelSummary[] = Array.from(modelMap.entries()) + .map(([model, data]) => ({ + model, + tokens: data.tokens, + cost: data.cost, + })) + .sort((a, b) => b.tokens - a.tokens); + + // Calculate totals + const totals = { + inputTokens: 0, + outputTokens: 0, + cacheReadTokens: 0, + cacheWriteTokens: 0, + totalTokens: 0, + cost: 0, + }; + + for (const row of rows) { + totals.inputTokens += row.inputTokens; + totals.outputTokens += row.outputTokens; + totals.cacheReadTokens += row.cacheReadTokens; + totals.cacheWriteTokens += row.cacheWriteTokens; + totals.totalTokens += row.totalTokens; + totals.cost += row.cost; + } + + return { + period: { since, until }, + repo, + byProvider, + byModel, + rows, + totals, + unknownModels: Array.from(this.unknownModels).sort(), + }; + } +} diff --git a/src/cli/ai-usage/constants.ts b/src/cli/ai-usage/constants.ts new file mode 100644 index 0000000..9287be0 --- /dev/null +++ b/src/cli/ai-usage/constants.ts @@ -0,0 +1,21 @@ +import { homedir } from "os"; +import { join } from "path"; + +// Default log paths +export const CLAUDE_PROJECTS_PATH = join(homedir(), ".claude", "projects"); +export const CODEX_SESSIONS_PATH = join( + process.env.CODEX_HOME ?? join(homedir(), ".codex"), + "sessions" +); + +// Duration mappings (string to milliseconds) +export const DURATION_MS: Record = { + "1h": 60 * 60 * 1000, + "24h": 24 * 60 * 60 * 1000, + "7d": 7 * 24 * 60 * 60 * 1000, + "30d": 30 * 24 * 60 * 60 * 1000, +}; + +// Provider names +export const PROVIDER_CLAUDE = "claude" as const; +export const PROVIDER_CODEX = "codex" as const; diff --git a/src/cli/ai-usage/main.ts b/src/cli/ai-usage/main.ts new file mode 100644 index 0000000..1edd335 --- /dev/null +++ b/src/cli/ai-usage/main.ts @@ -0,0 +1,162 @@ +// pnpm ai:usage +// pnpm ai:usage --since 24h +// pnpm ai:usage --since 30d +// pnpm ai:usage --repo /path/to/repo +// pnpm ai:usage --json +// pnpm ai:usage --debug + +import { execSync } from "child_process"; +import { readFileSync } from "fs"; +import { dirname, join } from "path"; +import { fileURLToPath } from "url"; + +import { Logger } from "~clients/logger"; +import { parseArgs } from "~utils/parse-args"; + +import { ClaudeLogReader } from "./clients/claude-log-reader"; +import { CodexLogReader } from "./clients/codex-log-reader"; +import { OutputFormatter } from "./clients/output-formatter"; +import { UsageAggregator } from "./clients/usage-aggregator"; +import { DURATION_MS } from "./constants"; +import type { PricingConfig } from "./types/schemas"; +import { CliArgsSchema, PricingConfigSchema } from "./types/schemas"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +/** + * Resolve repo path, defaulting to git root of current directory. + */ +const resolveRepoPath = (repoArg?: string): string => { + if (repoArg) { + return repoArg; + } + + try { + const result = execSync("git rev-parse --show-toplevel", { + encoding: "utf-8", + stdio: ["pipe", "pipe", "pipe"], + }); + return result.trim(); + } catch { + // Not in a git repo, use cwd + return process.cwd(); + } +}; + +/** + * Parse since duration string to a Date. + */ +const parseSinceDuration = (since: string): Date => { + const ms = DURATION_MS[since]; + if (!ms) { + throw new Error( + `Invalid --since value: ${since}. Use one of: ${Object.keys(DURATION_MS).join(", ")}` + ); + } + return new Date(Date.now() - ms); +}; + +/** + * Load pricing config from JSON file. + */ +const loadPricing = (): PricingConfig => { + const pricingPath = join(__dirname, "ai-usage.pricing.json"); + const content = readFileSync(pricingPath, "utf-8"); + const data: unknown = JSON.parse(content); + return PricingConfigSchema.parse(data); +}; + +// --- Main --- + +const logger = new Logger({ level: "info" }); + +// Parse CLI arguments +const args = parseArgs({ logger, schema: CliArgsSchema }); + +if (args.debug) { + logger.debug("Debug mode enabled"); + logger.debug("Arguments", args); +} + +// Resolve repo path +const repoPath = resolveRepoPath(args.repo); + +if (args.debug) { + logger.debug("Repo path", { repoPath }); +} + +// Calculate since date +const sinceDate = parseSinceDuration(args.since); +const untilDate = new Date(); + +if (args.debug) { + logger.debug("Time range", { + since: sinceDate.toISOString(), + until: untilDate.toISOString(), + }); +} + +// Load pricing config +const pricing = loadPricing(); + +// Initialize readers +const claudeReader = new ClaudeLogReader({ logger, debug: args.debug }); +const codexReader = new CodexLogReader({ logger, debug: args.debug }); + +// Read logs from both providers in parallel +const [claudeRecords, codexRecords] = await Promise.all([ + claudeReader.getUsage({ since: sinceDate, repoPath }), + codexReader.getUsage({ since: sinceDate, repoPath }), +]); + +const allRecords = [...claudeRecords, ...codexRecords]; + +// Initialize formatter +const formatter = new OutputFormatter({ sinceLabel: args.since }); + +// Handle no data case +if (allRecords.length === 0) { + if (args.json) { + console.log( + JSON.stringify({ + period: { + since: sinceDate.toISOString(), + until: untilDate.toISOString(), + }, + repo: repoPath, + byProvider: {}, + byModel: [], + rows: [], + totals: { + inputTokens: 0, + outputTokens: 0, + cacheReadTokens: 0, + cacheWriteTokens: 0, + totalTokens: 0, + cost: 0, + }, + unknownModels: [], + }) + ); + } else { + formatter.printNoData(repoPath); + } + process.exit(0); +} + +// Aggregate and calculate costs +const aggregator = new UsageAggregator({ pricing }); +const usage = aggregator.aggregate({ + records: allRecords, + since: sinceDate, + until: untilDate, + repo: repoPath, +}); + +// Output +if (args.json) { + formatter.printJson(usage); +} else { + formatter.printSummary(usage); + formatter.printTable(usage); +} diff --git a/src/cli/ai-usage/types/schemas.ts b/src/cli/ai-usage/types/schemas.ts new file mode 100644 index 0000000..805c1e2 --- /dev/null +++ b/src/cli/ai-usage/types/schemas.ts @@ -0,0 +1,143 @@ +import { z } from "zod"; + +// CLI arguments schema +export const CliArgsSchema = z.object({ + since: z.string().default("7d"), + repo: z.string().optional(), + json: z.coerce.boolean().default(false), + debug: z.coerce.boolean().default(false), +}); + +export type CliArgs = z.infer; + +// Pricing config schema +export const ModelPricingSchema = z.object({ + input: z.number(), + output: z.number(), + cacheRead: z.number().optional(), + cacheWrite: z.number().optional(), +}); + +export type ModelPricing = z.infer; + +export const PricingConfigSchema = z.object({ + unit: z.literal("per_1m_tokens"), + models: z.record(z.string(), ModelPricingSchema), +}); + +export type PricingConfig = z.infer; + +// Normalized usage record (shared between providers) +export const UsageRecordSchema = z.object({ + provider: z.enum(["claude", "codex"]), + model: z.string(), + inputTokens: z.number(), + outputTokens: z.number(), + cacheReadTokens: z.number(), + cacheWriteTokens: z.number(), + timestamp: z.date(), +}); + +export type UsageRecord = z.infer; + +// Claude Code CLI log entry schemas +export const ClaudeUsageSchema = z.object({ + input_tokens: z.number().optional().default(0), + output_tokens: z.number().optional().default(0), + cache_creation_input_tokens: z.number().optional().default(0), + cache_read_input_tokens: z.number().optional().default(0), +}); + +export const ClaudeMessageSchema = z.object({ + model: z.string().optional(), + usage: ClaudeUsageSchema.optional(), +}); + +export const ClaudeLogEntrySchema = z.object({ + type: z.string(), + timestamp: z.string(), + cwd: z.string().optional(), + message: ClaudeMessageSchema.optional(), +}); + +export type ClaudeLogEntry = z.infer; + +// Codex CLI log entry schemas +export const CodexTokenUsageSchema = z.object({ + input_tokens: z.number().optional().default(0), + cached_input_tokens: z.number().optional().default(0), + output_tokens: z.number().optional().default(0), +}); + +export const CodexSessionMetaPayloadSchema = z.object({ + cwd: z.string().optional(), + git: z + .object({ + repository_url: z.string().optional(), + }) + .optional(), +}); + +export const CodexTurnContextPayloadSchema = z.object({ + model: z.string().optional(), +}); + +export const CodexEventMsgPayloadSchema = z.object({ + type: z.string(), + info: z + .object({ + total_token_usage: CodexTokenUsageSchema.optional(), + }) + .optional(), +}); + +export const CodexLogEntrySchema = z.object({ + type: z.string(), + timestamp: z.string(), + payload: z.unknown(), +}); + +export type CodexLogEntry = z.infer; + +// Aggregated usage types +export type AggregatedRow = { + provider: "claude" | "codex"; + model: string; + inputTokens: number; + outputTokens: number; + cacheReadTokens: number; + cacheWriteTokens: number; + totalTokens: number; + cost: number; +}; + +export type ProviderSummary = { + tokens: number; + cost: number; +}; + +export type ModelSummary = { + model: string; + tokens: number; + cost: number; +}; + +export type AggregatedUsage = { + period: { + since: Date; + until: Date; + }; + repo: string; + byProvider: Record; + byModel: ModelSummary[]; + rows: AggregatedRow[]; + totals: { + inputTokens: number; + outputTokens: number; + cacheReadTokens: number; + cacheWriteTokens: number; + totalTokens: number; + cost: number; + }; + unknownModels: string[]; +}; From 07940dd4deab12a5ae5f877ec6cdb34ddcf5c670 Mon Sep 17 00:00:00 2001 From: Juha Kangas <42040080+valuecodes@users.noreply.github.com> Date: Thu, 29 Jan 2026 08:48:31 +0200 Subject: [PATCH 2/9] fix: skip synthetic entries in usage records filtering --- src/cli/ai-usage/clients/claude-log-reader.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cli/ai-usage/clients/claude-log-reader.ts b/src/cli/ai-usage/clients/claude-log-reader.ts index 0d51f57..f54d4a8 100644 --- a/src/cli/ai-usage/clients/claude-log-reader.ts +++ b/src/cli/ai-usage/clients/claude-log-reader.ts @@ -119,6 +119,10 @@ export class ClaudeLogReader { if (!entry.message.model) { continue; } + // Skip synthetic entries (client-side error messages, not real API calls) + if (entry.message.model === "") { + continue; + } const timestamp = new Date(entry.timestamp); if (timestamp < since) { From 734d9874c40078aef55912dc712ef24b387cee79 Mon Sep 17 00:00:00 2001 From: Juha Kangas <42040080+valuecodes@users.noreply.github.com> Date: Thu, 29 Jan 2026 08:52:02 +0200 Subject: [PATCH 3/9] feat: add dynamic column width calculation for output formatting --- src/cli/ai-usage/clients/output-formatter.ts | 60 ++++++++++++++++---- 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/src/cli/ai-usage/clients/output-formatter.ts b/src/cli/ai-usage/clients/output-formatter.ts index e430c99..3f7b422 100644 --- a/src/cli/ai-usage/clients/output-formatter.ts +++ b/src/cli/ai-usage/clients/output-formatter.ts @@ -4,6 +4,17 @@ type OutputFormatterOptions = { sinceLabel: string; }; +type ColumnWidths = { + provider: number; + model: number; + input: number; + output: number; + cacheR: number; + cacheW: number; + total: number; + cost: number; +}; + /** * Formats aggregated usage data for console output. */ @@ -39,6 +50,43 @@ export class OutputFormatter { return s.padStart(width); } + /** + * Calculate column widths based on actual data values. + */ + private calculateColumnWidths(usage: AggregatedUsage): ColumnWidths { + const widths = { + provider: "Provider".length, + model: "Model".length, + input: "Input".length, + output: "Output".length, + cacheR: "Cache R".length, + cacheW: "Cache W".length, + total: "Total".length, + cost: "Est. Cost".length, + }; + + for (const row of usage.rows) { + widths.provider = Math.max(widths.provider, row.provider.length); + widths.model = Math.max(widths.model, row.model.length); + widths.input = Math.max(widths.input, this.formatNumber(row.inputTokens).length); + widths.output = Math.max(widths.output, this.formatNumber(row.outputTokens).length); + widths.cacheR = Math.max(widths.cacheR, this.formatNumber(row.cacheReadTokens).length); + widths.cacheW = Math.max(widths.cacheW, this.formatNumber(row.cacheWriteTokens).length); + widths.total = Math.max(widths.total, this.formatNumber(row.totalTokens).length); + widths.cost = Math.max(widths.cost, this.formatCost(row.cost).length); + } + + widths.provider = Math.max(widths.provider, "TOTAL".length); + widths.input = Math.max(widths.input, this.formatNumber(usage.totals.inputTokens).length); + widths.output = Math.max(widths.output, this.formatNumber(usage.totals.outputTokens).length); + widths.cacheR = Math.max(widths.cacheR, this.formatNumber(usage.totals.cacheReadTokens).length); + widths.cacheW = Math.max(widths.cacheW, this.formatNumber(usage.totals.cacheWriteTokens).length); + widths.total = Math.max(widths.total, this.formatNumber(usage.totals.totalTokens).length); + widths.cost = Math.max(widths.cost, this.formatCost(usage.totals.cost).length); + + return widths; + } + /** * Print warning about models missing from pricing config. */ @@ -86,17 +134,7 @@ export class OutputFormatter { * Print the markdown table. */ printTable(usage: AggregatedUsage): void { - // Column widths - const cols = { - provider: 8, - model: 28, - input: 12, - output: 12, - cacheR: 12, - cacheW: 12, - total: 12, - cost: 12, - }; + const cols = this.calculateColumnWidths(usage); // Header const header = [ From 75a6686fc5b38ec51776039dff642d4342cf0790 Mon Sep 17 00:00:00 2001 From: Juha Kangas <42040080+valuecodes@users.noreply.github.com> Date: Thu, 29 Jan 2026 10:48:38 +0200 Subject: [PATCH 4/9] feat: deduplicate token_count events in usage record parsing --- src/cli/ai-usage/clients/codex-log-reader.ts | 15 +++++++++++++-- src/cli/ai-usage/types/schemas.ts | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/cli/ai-usage/clients/codex-log-reader.ts b/src/cli/ai-usage/clients/codex-log-reader.ts index ca25b26..130139a 100644 --- a/src/cli/ai-usage/clients/codex-log-reader.ts +++ b/src/cli/ai-usage/clients/codex-log-reader.ts @@ -130,6 +130,7 @@ export class CodexLogReader { /** * Parse a session file and extract usage records. * Need to track session metadata and model across entries. + * Codex emits duplicate token_count events - we dedupe by tracking last seen values. */ async parseSession( filePath: string, @@ -138,6 +139,7 @@ export class CodexLogReader { ): Promise { const records: UsageRecord[] = []; const sessionData: SessionData = {}; + let lastUsageKey = ""; const fileStream = createReadStream(filePath); const rl = createInterface({ @@ -193,7 +195,8 @@ export class CodexLogReader { if (payload.type !== "token_count") { continue; } - if (!payload.info?.total_token_usage) { + // Use last_token_usage (incremental per-request) not total_token_usage (cumulative) + if (!payload.info?.last_token_usage) { continue; } @@ -213,7 +216,15 @@ export class CodexLogReader { continue; } - const usage = payload.info.total_token_usage; + const usage = payload.info.last_token_usage; + + // Dedupe: Codex emits duplicate token_count events with identical values + const usageKey = `${usage.input_tokens}:${usage.output_tokens}:${usage.cached_input_tokens}`; + if (usageKey === lastUsageKey) { + continue; + } + lastUsageKey = usageKey; + records.push({ provider: PROVIDER_CODEX, model: sessionData.model ?? "unknown", diff --git a/src/cli/ai-usage/types/schemas.ts b/src/cli/ai-usage/types/schemas.ts index 805c1e2..1ae7c33 100644 --- a/src/cli/ai-usage/types/schemas.ts +++ b/src/cli/ai-usage/types/schemas.ts @@ -87,6 +87,7 @@ export const CodexEventMsgPayloadSchema = z.object({ info: z .object({ total_token_usage: CodexTokenUsageSchema.optional(), + last_token_usage: CodexTokenUsageSchema.optional(), }) .optional(), }); From 7e1d6f507b0911d5df35006c441beb1bf3f0dc76 Mon Sep 17 00:00:00 2001 From: Juha Kangas <42040080+valuecodes@users.noreply.github.com> Date: Thu, 29 Jan 2026 10:52:14 +0200 Subject: [PATCH 5/9] refactor: simplify totalTokens calculation in aggregate method --- src/cli/ai-usage/clients/claude-log-reader.ts | 5 +- src/cli/ai-usage/clients/codex-log-reader.ts | 1 - src/cli/ai-usage/clients/output-formatter.ts | 64 +++++++++++++++---- src/cli/ai-usage/clients/usage-aggregator.ts | 6 +- src/cli/ai-usage/main.ts | 1 - 5 files changed, 55 insertions(+), 22 deletions(-) diff --git a/src/cli/ai-usage/clients/claude-log-reader.ts b/src/cli/ai-usage/clients/claude-log-reader.ts index f54d4a8..ebc9362 100644 --- a/src/cli/ai-usage/clients/claude-log-reader.ts +++ b/src/cli/ai-usage/clients/claude-log-reader.ts @@ -2,7 +2,6 @@ import { createReadStream } from "fs"; import { readdir, stat } from "fs/promises"; import { join } from "path"; import { createInterface } from "readline"; - import type { Logger } from "~clients/logger"; import { CLAUDE_PROJECTS_PATH, PROVIDER_CLAUDE } from "../constants"; @@ -154,7 +153,9 @@ export class ClaudeLogReader { } if (this.debug) { - this.logger.debug("Found Claude usage records", { count: records.length }); + this.logger.debug("Found Claude usage records", { + count: records.length, + }); } return records; diff --git a/src/cli/ai-usage/clients/codex-log-reader.ts b/src/cli/ai-usage/clients/codex-log-reader.ts index 130139a..a912db9 100644 --- a/src/cli/ai-usage/clients/codex-log-reader.ts +++ b/src/cli/ai-usage/clients/codex-log-reader.ts @@ -2,7 +2,6 @@ import { createReadStream } from "fs"; import { readdir, stat } from "fs/promises"; import { join } from "path"; import { createInterface } from "readline"; - import type { Logger } from "~clients/logger"; import { CODEX_SESSIONS_PATH, PROVIDER_CODEX } from "../constants"; diff --git a/src/cli/ai-usage/clients/output-formatter.ts b/src/cli/ai-usage/clients/output-formatter.ts index 3f7b422..18878cf 100644 --- a/src/cli/ai-usage/clients/output-formatter.ts +++ b/src/cli/ai-usage/clients/output-formatter.ts @@ -68,21 +68,54 @@ export class OutputFormatter { for (const row of usage.rows) { widths.provider = Math.max(widths.provider, row.provider.length); widths.model = Math.max(widths.model, row.model.length); - widths.input = Math.max(widths.input, this.formatNumber(row.inputTokens).length); - widths.output = Math.max(widths.output, this.formatNumber(row.outputTokens).length); - widths.cacheR = Math.max(widths.cacheR, this.formatNumber(row.cacheReadTokens).length); - widths.cacheW = Math.max(widths.cacheW, this.formatNumber(row.cacheWriteTokens).length); - widths.total = Math.max(widths.total, this.formatNumber(row.totalTokens).length); + widths.input = Math.max( + widths.input, + this.formatNumber(row.inputTokens).length + ); + widths.output = Math.max( + widths.output, + this.formatNumber(row.outputTokens).length + ); + widths.cacheR = Math.max( + widths.cacheR, + this.formatNumber(row.cacheReadTokens).length + ); + widths.cacheW = Math.max( + widths.cacheW, + this.formatNumber(row.cacheWriteTokens).length + ); + widths.total = Math.max( + widths.total, + this.formatNumber(row.totalTokens).length + ); widths.cost = Math.max(widths.cost, this.formatCost(row.cost).length); } widths.provider = Math.max(widths.provider, "TOTAL".length); - widths.input = Math.max(widths.input, this.formatNumber(usage.totals.inputTokens).length); - widths.output = Math.max(widths.output, this.formatNumber(usage.totals.outputTokens).length); - widths.cacheR = Math.max(widths.cacheR, this.formatNumber(usage.totals.cacheReadTokens).length); - widths.cacheW = Math.max(widths.cacheW, this.formatNumber(usage.totals.cacheWriteTokens).length); - widths.total = Math.max(widths.total, this.formatNumber(usage.totals.totalTokens).length); - widths.cost = Math.max(widths.cost, this.formatCost(usage.totals.cost).length); + widths.input = Math.max( + widths.input, + this.formatNumber(usage.totals.inputTokens).length + ); + widths.output = Math.max( + widths.output, + this.formatNumber(usage.totals.outputTokens).length + ); + widths.cacheR = Math.max( + widths.cacheR, + this.formatNumber(usage.totals.cacheReadTokens).length + ); + widths.cacheW = Math.max( + widths.cacheW, + this.formatNumber(usage.totals.cacheWriteTokens).length + ); + widths.total = Math.max( + widths.total, + this.formatNumber(usage.totals.totalTokens).length + ); + widths.cost = Math.max( + widths.cost, + this.formatCost(usage.totals.cost).length + ); return widths; } @@ -99,7 +132,9 @@ export class OutputFormatter { for (const model of models) { console.log(` - ${model}`); } - console.log("Add pricing info to ai-usage.pricing.json for accurate cost estimates."); + console.log( + "Add pricing info to ai-usage.pricing.json for accurate cost estimates." + ); } /** @@ -186,7 +221,10 @@ export class OutputFormatter { this.padRight("", cols.model), this.padLeft(this.formatNumber(usage.totals.inputTokens), cols.input), this.padLeft(this.formatNumber(usage.totals.outputTokens), cols.output), - this.padLeft(this.formatNumber(usage.totals.cacheReadTokens), cols.cacheR), + this.padLeft( + this.formatNumber(usage.totals.cacheReadTokens), + cols.cacheR + ), this.padLeft( this.formatNumber(usage.totals.cacheWriteTokens), cols.cacheW diff --git a/src/cli/ai-usage/clients/usage-aggregator.ts b/src/cli/ai-usage/clients/usage-aggregator.ts index e39e3ee..d58155b 100644 --- a/src/cli/ai-usage/clients/usage-aggregator.ts +++ b/src/cli/ai-usage/clients/usage-aggregator.ts @@ -101,11 +101,7 @@ export class UsageAggregator { outputTokens: g.outputTokens, cacheReadTokens: g.cacheReadTokens, cacheWriteTokens: g.cacheWriteTokens, - totalTokens: - g.inputTokens + - g.outputTokens + - g.cacheReadTokens + - g.cacheWriteTokens, + totalTokens: g.inputTokens + g.outputTokens, cost: g.cost, })); diff --git a/src/cli/ai-usage/main.ts b/src/cli/ai-usage/main.ts index 1edd335..152ba03 100644 --- a/src/cli/ai-usage/main.ts +++ b/src/cli/ai-usage/main.ts @@ -9,7 +9,6 @@ import { execSync } from "child_process"; import { readFileSync } from "fs"; import { dirname, join } from "path"; import { fileURLToPath } from "url"; - import { Logger } from "~clients/logger"; import { parseArgs } from "~utils/parse-args"; From 2cdc248d4d135996cb24df73f8c775347aaa7703 Mon Sep 17 00:00:00 2001 From: Juha Kangas <42040080+valuecodes@users.noreply.github.com> Date: Thu, 29 Jan 2026 11:04:36 +0200 Subject: [PATCH 6/9] docs: add README for AI Usage CLI with usage instructions and examples --- src/cli/ai-usage/README.md | 60 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 src/cli/ai-usage/README.md diff --git a/src/cli/ai-usage/README.md b/src/cli/ai-usage/README.md new file mode 100644 index 0000000..1f8446f --- /dev/null +++ b/src/cli/ai-usage/README.md @@ -0,0 +1,60 @@ +# AI Usage CLI + +Summarize Claude and Codex token usage for a repo, including estimated costs from +`ai-usage.pricing.json`. + +## Run + +```bash +# Default: last 7 days for current git repo (or cwd if not in git) +pnpm ai:usage + +# With options +pnpm ai:usage --since 24h +pnpm ai:usage --since 30d --repo /path/to/repo +pnpm ai:usage --json +pnpm ai:usage --debug +``` + +## Arguments + +- `--since` (optional): time window to include. One of `1h`, `24h`, `7d`, `30d`. +- `--repo` (optional): path to repo to match against log cwd. +- `--json` (optional): emit JSON instead of the summary + table. +- `--debug` (optional): verbose logging about discovery and filtering. + +## Log Sources + +- **Claude:** `~/.claude/projects//` JSONL logs +- **Codex:** `$CODEX_HOME/sessions` or `~/.codex/sessions` (YYYY/MM/DD folders) + +Only entries whose `cwd` matches the repo path are counted. + +## Output + +- Summary by provider and by model. +- Markdown table with input/output/cache tokens, totals, and estimated cost. +- If a model is missing from `ai-usage.pricing.json`, cost is `0` and a warning is printed. + +## Example Result + +```text +AI Usage Summary (Last 30d) + +By Provider: + claude: 314,925 tokens ($223.49) + codex: 38,018,298 tokens ($80.22) + +By Model: + gpt-5.2-codex: 37,582,714 tokens ($80.09) + gpt-5.1-codex-mini: 435,584 tokens ($0.12) + claude-opus-4-5-20251101: 314,925 tokens ($223.49) + +| Provider | Model | Input | Output | Cache R | Cache W | Total | Est. Cost | +|----------|--------------------------|------------|---------|-------------|------------|------------|-----------| +| claude | claude-opus-4-5-20251101 | 267,897 | 47,028 | 202,979,575 | 11,948,269 | 314,925 | $223.49 | +| codex | gpt-5.2-codex | 36,901,025 | 681,689 | 34,124,288 | 0 | 37,582,714 | $80.09 | +| codex | gpt-5.1-codex-mini | 430,304 | 5,280 | 189,440 | 0 | 435,584 | $0.12 | +|----------|--------------------------|------------|---------|-------------|------------|------------|-----------| +| TOTAL | | 37,599,226 | 733,997 | 237,293,303 | 11,948,269 | 38,333,223 | $303.70 | +``` From 7af5767ddd655b2f081dfb8b74992be61e7210e4 Mon Sep 17 00:00:00 2001 From: Juha Kangas <42040080+valuecodes@users.noreply.github.com> Date: Thu, 29 Jan 2026 11:16:09 +0200 Subject: [PATCH 7/9] feat: add AI usage summary CLI and enhance token aggregation - Introduce `pnpm ai:usage` command for summarizing token usage - Update total token calculation to include cache read/write tokens - Enhance documentation for AI usage features --- AGENTS.md | 1 + README.md | 30 +++++++++++++++- src/cli/ai-usage/README.md | 36 +++++++++++++------- src/cli/ai-usage/clients/usage-aggregator.ts | 3 +- 4 files changed, 55 insertions(+), 15 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 0471f0b..9203884 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -43,6 +43,7 @@ **Common scripts (see `package.json` for all):** - `pnpm run:[cli-name-here]` +- `pnpm ai:usage` (summarize Claude/Codex usage logs for a repo) - `pnpm typecheck` - `pnpm lint` (use `pnpm lint:fix` if errors are auto-fixable) - `pnpm format` / `pnpm format:check` diff --git a/README.md b/README.md index 9acc8a0..ed2d341 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # cli-agent-sandbox -A minimal TypeScript CLI sandbox for testing agent workflows and safe web scraping. This is a single-package repo built with [`@openai/agents`](https://github.com/openai/openai-agents-js), and it includes a guestbook demo, a Finnish name explorer CLI, a publication scraping pipeline with a Playwright-based scraper for JS-rendered pages, an ETF backtest CLI, an agent evals CLI, and agent tools scoped to `tmp` with strong safety checks. +A minimal TypeScript CLI sandbox for testing agent workflows and safe web scraping. This is a single-package repo built with [`@openai/agents`](https://github.com/openai/openai-agents-js), and it includes a guestbook demo, a Finnish name explorer CLI, a publication scraping pipeline with a Playwright-based scraper for JS-rendered pages, an ETF backtest CLI, an agent evals CLI, an AI usage summary CLI, and agent tools scoped to `tmp` with strong safety checks. ## Quick Start @@ -13,6 +13,7 @@ A minimal TypeScript CLI sandbox for testing agent workflows and safe web scrapi 7. (Optional) Explore Finnish name stats: `pnpm run:name-explorer -- --mode ai|stats` 8. (Optional) Run publication scraping: `pnpm run:scrape-publications -- --url="https://example.com"` 9. (Optional) Run ETF backtest: `pnpm run:etf-backtest -- --isin=IE00B5BMR087` (requires Python setup below) +10. (Optional) Summarize AI usage: `pnpm ai:usage --since 7d` ### Python Setup (for ETF backtest) @@ -34,6 +35,7 @@ pip install numpy pandas torch | `pnpm run:name-explorer` | Explore Finnish name statistics (AI Q&A or stats) | | `pnpm run:scrape-publications` | Scrape publication links and build a review page | | `pnpm run:etf-backtest` | Run ETF backtest + feature optimizer (requires Python) | +| `pnpm ai:usage` | Summarize Claude/Codex token usage for a repo | | `pnpm typecheck` | Run TypeScript type checking | | `pnpm lint` | Run ESLint for code quality | | `pnpm lint:fix` | Run ESLint and auto-fix issues | @@ -100,6 +102,24 @@ pnpm run:agent-evals -- --suite=example pnpm run:agent-evals -- --all ``` +## AI usage + +The `ai:usage` CLI summarizes Claude and Codex token usage for a repo based on local logs and `ai-usage.pricing.json`. + +Usage: + +``` +pnpm ai:usage +pnpm ai:usage --since 24h +pnpm ai:usage --since 30d --repo /path/to/repo +pnpm ai:usage --json +``` + +Notes: + +- Defaults to the last 7 days for the current git repo (or `cwd` when not in a git repo). +- Log sources: `~/.claude/projects//` and `$CODEX_HOME/sessions` or `~/.codex/sessions`. + ## Tools File tools are sandboxed to the `tmp/` directory with path validation to prevent traversal and symlink attacks. The `fetchUrl` tool adds SSRF protections and HTML sanitization, and `runPython` executes whitelisted Python scripts from a configured directory. @@ -123,6 +143,14 @@ File tools are sandboxed to the `tmp/` directory with path validation to prevent ``` src/ ├── cli/ +│ ├── ai-usage/ +│ │ ├── main.ts # AI usage CLI entry point +│ │ ├── README.md # AI usage CLI docs +│ │ ├── ai-usage.pricing.json # Model pricing lookup +│ │ ├── constants.ts # CLI constants +│ │ ├── types/ # CLI schemas +│ │ │ └── schemas.ts # CLI args + pricing schemas +│ │ └── clients/ # Log readers + aggregation + formatting │ ├── agent-evals/ │ │ ├── main.ts # Agent evals CLI entry point │ │ ├── README.md # Agent evals CLI docs diff --git a/src/cli/ai-usage/README.md b/src/cli/ai-usage/README.md index 1f8446f..2f70ff2 100644 --- a/src/cli/ai-usage/README.md +++ b/src/cli/ai-usage/README.md @@ -36,25 +36,35 @@ Only entries whose `cwd` matches the repo path are counted. - Markdown table with input/output/cache tokens, totals, and estimated cost. - If a model is missing from `ai-usage.pricing.json`, cost is `0` and a warning is printed. +## Flow + +```mermaid +flowchart TD + A[Discover logs] --> B[Filter by repo + since] + B --> C[Aggregate tokens] + C --> D[Apply pricing] + D --> E[Render summary + table] +``` + ## Example Result ```text AI Usage Summary (Last 30d) By Provider: - claude: 314,925 tokens ($223.49) - codex: 38,018,298 tokens ($80.22) + claude: 216,462,575 tokens ($224.87) + codex: 73,995,660 tokens ($82.01) By Model: - gpt-5.2-codex: 37,582,714 tokens ($80.09) - gpt-5.1-codex-mini: 435,584 tokens ($0.12) - claude-opus-4-5-20251101: 314,925 tokens ($223.49) - -| Provider | Model | Input | Output | Cache R | Cache W | Total | Est. Cost | -|----------|--------------------------|------------|---------|-------------|------------|------------|-----------| -| claude | claude-opus-4-5-20251101 | 267,897 | 47,028 | 202,979,575 | 11,948,269 | 314,925 | $223.49 | -| codex | gpt-5.2-codex | 36,901,025 | 681,689 | 34,124,288 | 0 | 37,582,714 | $80.09 | -| codex | gpt-5.1-codex-mini | 430,304 | 5,280 | 189,440 | 0 | 435,584 | $0.12 | -|----------|--------------------------|------------|---------|-------------|------------|------------|-----------| -| TOTAL | | 37,599,226 | 733,997 | 237,293,303 | 11,948,269 | 38,333,223 | $303.70 | + claude-opus-4-5-20251101: 216,462,575 tokens ($224.87) + gpt-5.2-codex: 73,370,636 tokens ($81.89) + gpt-5.1-codex-mini: 625,024 tokens ($0.12) + +| Provider | Model | Input | Output | Cache R | Cache W | Total | Est. Cost | +|----------|--------------------------|------------|---------|-------------|------------|-------------|-----------| +| claude | claude-opus-4-5-20251101 | 267,949 | 47,358 | 204,118,277 | 12,028,991 | 216,462,575 | $224.87 | +| codex | gpt-5.2-codex | 37,768,935 | 691,877 | 34,909,824 | 0 | 73,370,636 | $81.89 | +| codex | gpt-5.1-codex-mini | 430,304 | 5,280 | 189,440 | 0 | 625,024 | $0.12 | +|----------|--------------------------|------------|---------|-------------|------------|-------------|-----------| +| TOTAL | | 38,467,188 | 744,515 | 239,217,541 | 12,028,991 | 290,458,235 | $306.89 | ``` diff --git a/src/cli/ai-usage/clients/usage-aggregator.ts b/src/cli/ai-usage/clients/usage-aggregator.ts index d58155b..b531466 100644 --- a/src/cli/ai-usage/clients/usage-aggregator.ts +++ b/src/cli/ai-usage/clients/usage-aggregator.ts @@ -101,7 +101,8 @@ export class UsageAggregator { outputTokens: g.outputTokens, cacheReadTokens: g.cacheReadTokens, cacheWriteTokens: g.cacheWriteTokens, - totalTokens: g.inputTokens + g.outputTokens, + totalTokens: + g.inputTokens + g.outputTokens + g.cacheReadTokens + g.cacheWriteTokens, cost: g.cost, })); From 03e64c3d76c6d5fe0c4f1a30e49674373e8e9aa7 Mon Sep 17 00:00:00 2001 From: Juha Kangas <42040080+valuecodes@users.noreply.github.com> Date: Thu, 29 Jan 2026 11:42:02 +0200 Subject: [PATCH 8/9] feat: implement usage pipeline for data collection and reporting - Add UsagePipeline class to orchestrate log reading and formatting - Refactor main CLI logic to utilize the new pipeline for report generation - Enhance output formatting methods for better string handling --- README.md | 2 +- src/cli/ai-usage/README.md | 5 + src/cli/ai-usage/clients/output-formatter.ts | 105 ++++++++----- src/cli/ai-usage/clients/usage-pipeline.ts | 142 ++++++++++++++++++ src/cli/ai-usage/main.ts | 148 +------------------ 5 files changed, 225 insertions(+), 177 deletions(-) create mode 100644 src/cli/ai-usage/clients/usage-pipeline.ts diff --git a/README.md b/README.md index ed2d341..47d6566 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,7 @@ src/ │ │ ├── constants.ts # CLI constants │ │ ├── types/ # CLI schemas │ │ │ └── schemas.ts # CLI args + pricing schemas -│ │ └── clients/ # Log readers + aggregation + formatting +│ │ └── clients/ # Pipeline + log readers + aggregation + formatting │ ├── agent-evals/ │ │ ├── main.ts # Agent evals CLI entry point │ │ ├── README.md # Agent evals CLI docs diff --git a/src/cli/ai-usage/README.md b/src/cli/ai-usage/README.md index 2f70ff2..4266b84 100644 --- a/src/cli/ai-usage/README.md +++ b/src/cli/ai-usage/README.md @@ -36,6 +36,11 @@ Only entries whose `cwd` matches the repo path are counted. - Markdown table with input/output/cache tokens, totals, and estimated cost. - If a model is missing from `ai-usage.pricing.json`, cost is `0` and a warning is printed. +## Internals + +- `UsagePipeline` owns repo resolution, log collection, aggregation, and formatting. +- `OutputFormatter` returns strings (summary/table/JSON); `main.ts` prints the report. + ## Flow ```mermaid diff --git a/src/cli/ai-usage/clients/output-formatter.ts b/src/cli/ai-usage/clients/output-formatter.ts index 18878cf..7f2ed92 100644 --- a/src/cli/ai-usage/clients/output-formatter.ts +++ b/src/cli/ai-usage/clients/output-formatter.ts @@ -16,7 +16,8 @@ type ColumnWidths = { }; /** - * Formats aggregated usage data for console output. + * Formats aggregated usage data for output. + * All format methods return strings; printing is the caller's responsibility. */ export class OutputFormatter { private sinceLabel: string; @@ -121,54 +122,60 @@ export class OutputFormatter { } /** - * Print warning about models missing from pricing config. + * Format warning about models missing from pricing config. */ - printUnknownModelsWarning(models: string[]): void { + formatUnknownModelsWarning(models: string[]): string { if (models.length === 0) { - return; + return ""; } - console.log("\nWarning: The following models have no pricing data:"); - for (const model of models) { - console.log(` - ${model}`); - } - console.log( - "Add pricing info to ai-usage.pricing.json for accurate cost estimates." - ); + const lines = [ + "\nWarning: The following models have no pricing data:", + ...models.map((model) => ` - ${model}`), + "Add pricing info to ai-usage.pricing.json for accurate cost estimates.", + ]; + return lines.join("\n"); } /** - * Print the summary section (totals per provider and model). + * Format the summary section (totals per provider and model). */ - printSummary(usage: AggregatedUsage): void { - console.log(`\nAI Usage Summary (Last ${this.sinceLabel})`); - console.log(`Repo: ${usage.repo}`); - this.printUnknownModelsWarning(usage.unknownModels); - console.log(""); + formatSummary(usage: AggregatedUsage): string { + const lines: string[] = []; + + lines.push(`\nAI Usage Summary (Last ${this.sinceLabel})`); + + const warning = this.formatUnknownModelsWarning(usage.unknownModels); + if (warning) { + lines.push(warning); + } + lines.push(""); // By provider - console.log("By Provider:"); + lines.push("By Provider:"); for (const [provider, summary] of Object.entries(usage.byProvider)) { const tokens = this.formatNumber(summary.tokens); const cost = this.formatCost(summary.cost); - console.log(` ${provider}: ${tokens} tokens (${cost})`); + lines.push(` ${provider}: ${tokens} tokens (${cost})`); } // By model - console.log("\nBy Model:"); + lines.push("\nBy Model:"); for (const summary of usage.byModel) { const tokens = this.formatNumber(summary.tokens); const cost = this.formatCost(summary.cost); - console.log(` ${summary.model}: ${tokens} tokens (${cost})`); + lines.push(` ${summary.model}: ${tokens} tokens (${cost})`); } - console.log(""); + lines.push(""); + return lines.join("\n"); } /** - * Print the markdown table. + * Format the markdown table. */ - printTable(usage: AggregatedUsage): void { + formatTable(usage: AggregatedUsage): string { + const lines: string[] = []; const cols = this.calculateColumnWidths(usage); // Header @@ -194,8 +201,8 @@ export class OutputFormatter { "-".repeat(cols.cost), ].join("-|-"); - console.log(`| ${header} |`); - console.log(`|-${separator}-|`); + lines.push(`| ${header} |`); + lines.push(`|-${separator}-|`); // Rows for (const row of usage.rows) { @@ -209,11 +216,11 @@ export class OutputFormatter { this.padLeft(this.formatNumber(row.totalTokens), cols.total), this.padLeft(this.formatCost(row.cost), cols.cost), ].join(" | "); - console.log(`| ${line} |`); + lines.push(`| ${line} |`); } // Separator before totals - console.log(`|-${separator}-|`); + lines.push(`|-${separator}-|`); // Totals row const totalsLine = [ @@ -232,13 +239,15 @@ export class OutputFormatter { this.padLeft(this.formatNumber(usage.totals.totalTokens), cols.total), this.padLeft(this.formatCost(usage.totals.cost), cols.cost), ].join(" | "); - console.log(`| ${totalsLine} |`); + lines.push(`| ${totalsLine} |`); + + return lines.join("\n"); } /** - * Print JSON output. + * Format JSON output. */ - printJson(usage: AggregatedUsage): void { + formatJson(usage: AggregatedUsage): string { const output = { period: { since: usage.period.since.toISOString(), @@ -251,14 +260,38 @@ export class OutputFormatter { totals: usage.totals, unknownModels: usage.unknownModels, }; - console.log(JSON.stringify(output, null, 2)); + return JSON.stringify(output, null, 2); + } + + /** + * Format empty JSON output when no data is found. + */ + formatEmptyJson(repo: string, since: Date, until: Date): string { + return JSON.stringify({ + period: { + since: since.toISOString(), + until: until.toISOString(), + }, + repo, + byProvider: {}, + byModel: [], + rows: [], + totals: { + inputTokens: 0, + outputTokens: 0, + cacheReadTokens: 0, + cacheWriteTokens: 0, + totalTokens: 0, + cost: 0, + }, + unknownModels: [], + }); } /** - * Print a message when no data is found. + * Format a message when no data is found. */ - printNoData(repo: string): void { - console.log(`\nNo usage data found for repo: ${repo}`); - console.log(`Time period: Last ${this.sinceLabel}\n`); + formatNoData(repo: string): string { + return `\nNo usage data found for repo: ${repo}\nTime period: Last ${this.sinceLabel}\n`; } } diff --git a/src/cli/ai-usage/clients/usage-pipeline.ts b/src/cli/ai-usage/clients/usage-pipeline.ts new file mode 100644 index 0000000..5f759d8 --- /dev/null +++ b/src/cli/ai-usage/clients/usage-pipeline.ts @@ -0,0 +1,142 @@ +import { execSync } from "child_process"; +import { readFileSync } from "fs"; +import { dirname, join } from "path"; +import { fileURLToPath } from "url"; +import type { Logger } from "~clients/logger"; + +import { DURATION_MS } from "../constants"; +import type { PricingConfig } from "../types/schemas"; +import { PricingConfigSchema } from "../types/schemas"; +import { ClaudeLogReader } from "./claude-log-reader"; +import { CodexLogReader } from "./codex-log-reader"; +import { OutputFormatter } from "./output-formatter"; +import { UsageAggregator } from "./usage-aggregator"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +type UsagePipelineOptions = { + logger: Logger; + debug: boolean; +}; + +type GetReportOptions = { + since: string; + repoPath?: string; + json: boolean; +}; + +/** + * Orchestrates usage data collection, aggregation, and formatting. + */ +export class UsagePipeline { + private logger: Logger; + private debug: boolean; + + constructor(options: UsagePipelineOptions) { + this.logger = options.logger; + this.debug = options.debug; + } + + /** + * Execute the pipeline and return a formatted report. + */ + async getReport(options: GetReportOptions): Promise { + const repoPath = this.resolveRepoPath(options.repoPath); + const sinceDate = this.parseSinceDuration(options.since); + const untilDate = new Date(); + const pricing = this.loadPricing(); + + if (this.debug) { + this.logger.debug("Repo path", { repoPath }); + this.logger.debug("Time range", { + since: sinceDate.toISOString(), + until: untilDate.toISOString(), + }); + } + + // Initialize readers + const claudeReader = new ClaudeLogReader({ + logger: this.logger, + debug: this.debug, + }); + const codexReader = new CodexLogReader({ + logger: this.logger, + debug: this.debug, + }); + + // Read logs from both providers in parallel + const [claudeRecords, codexRecords] = await Promise.all([ + claudeReader.getUsage({ since: sinceDate, repoPath }), + codexReader.getUsage({ since: sinceDate, repoPath }), + ]); + + const allRecords = [...claudeRecords, ...codexRecords]; + const formatter = new OutputFormatter({ sinceLabel: options.since }); + + // Handle no data case + if (allRecords.length === 0) { + if (options.json) { + return formatter.formatEmptyJson(repoPath, sinceDate, untilDate); + } + return formatter.formatNoData(repoPath); + } + + // Aggregate and calculate costs + const aggregator = new UsageAggregator({ pricing }); + const usage = aggregator.aggregate({ + records: allRecords, + since: sinceDate, + until: untilDate, + repo: repoPath, + }); + + // Format output + if (options.json) { + return formatter.formatJson(usage); + } + return formatter.formatSummary(usage) + formatter.formatTable(usage); + } + + /** + * Resolve repo path, defaulting to git root of current directory. + */ + private resolveRepoPath(repoArg?: string): string { + if (repoArg) { + return repoArg; + } + + try { + const result = execSync("git rev-parse --show-toplevel", { + encoding: "utf-8", + stdio: ["pipe", "pipe", "pipe"], + }); + return result.trim(); + } catch { + // Not in a git repo, use cwd + return process.cwd(); + } + } + + /** + * Parse since duration string to a Date. + */ + private parseSinceDuration(since: string): Date { + const ms = DURATION_MS[since]; + if (!ms) { + throw new Error( + `Invalid --since value: ${since}. Use one of: ${Object.keys(DURATION_MS).join(", ")}` + ); + } + return new Date(Date.now() - ms); + } + + /** + * Load pricing config from JSON file. + */ + private loadPricing(): PricingConfig { + const pricingPath = join(__dirname, "..", "ai-usage.pricing.json"); + const content = readFileSync(pricingPath, "utf-8"); + const data: unknown = JSON.parse(content); + return PricingConfigSchema.parse(data); + } +} diff --git a/src/cli/ai-usage/main.ts b/src/cli/ai-usage/main.ts index 152ba03..166decb 100644 --- a/src/cli/ai-usage/main.ts +++ b/src/cli/ai-usage/main.ts @@ -5,71 +5,13 @@ // pnpm ai:usage --json // pnpm ai:usage --debug -import { execSync } from "child_process"; -import { readFileSync } from "fs"; -import { dirname, join } from "path"; -import { fileURLToPath } from "url"; import { Logger } from "~clients/logger"; import { parseArgs } from "~utils/parse-args"; -import { ClaudeLogReader } from "./clients/claude-log-reader"; -import { CodexLogReader } from "./clients/codex-log-reader"; -import { OutputFormatter } from "./clients/output-formatter"; -import { UsageAggregator } from "./clients/usage-aggregator"; -import { DURATION_MS } from "./constants"; -import type { PricingConfig } from "./types/schemas"; -import { CliArgsSchema, PricingConfigSchema } from "./types/schemas"; - -const __dirname = dirname(fileURLToPath(import.meta.url)); - -/** - * Resolve repo path, defaulting to git root of current directory. - */ -const resolveRepoPath = (repoArg?: string): string => { - if (repoArg) { - return repoArg; - } - - try { - const result = execSync("git rev-parse --show-toplevel", { - encoding: "utf-8", - stdio: ["pipe", "pipe", "pipe"], - }); - return result.trim(); - } catch { - // Not in a git repo, use cwd - return process.cwd(); - } -}; - -/** - * Parse since duration string to a Date. - */ -const parseSinceDuration = (since: string): Date => { - const ms = DURATION_MS[since]; - if (!ms) { - throw new Error( - `Invalid --since value: ${since}. Use one of: ${Object.keys(DURATION_MS).join(", ")}` - ); - } - return new Date(Date.now() - ms); -}; - -/** - * Load pricing config from JSON file. - */ -const loadPricing = (): PricingConfig => { - const pricingPath = join(__dirname, "ai-usage.pricing.json"); - const content = readFileSync(pricingPath, "utf-8"); - const data: unknown = JSON.parse(content); - return PricingConfigSchema.parse(data); -}; - -// --- Main --- +import { UsagePipeline } from "./clients/usage-pipeline"; +import { CliArgsSchema } from "./types/schemas"; const logger = new Logger({ level: "info" }); - -// Parse CLI arguments const args = parseArgs({ logger, schema: CliArgsSchema }); if (args.debug) { @@ -77,85 +19,11 @@ if (args.debug) { logger.debug("Arguments", args); } -// Resolve repo path -const repoPath = resolveRepoPath(args.repo); - -if (args.debug) { - logger.debug("Repo path", { repoPath }); -} - -// Calculate since date -const sinceDate = parseSinceDuration(args.since); -const untilDate = new Date(); - -if (args.debug) { - logger.debug("Time range", { - since: sinceDate.toISOString(), - until: untilDate.toISOString(), - }); -} - -// Load pricing config -const pricing = loadPricing(); - -// Initialize readers -const claudeReader = new ClaudeLogReader({ logger, debug: args.debug }); -const codexReader = new CodexLogReader({ logger, debug: args.debug }); - -// Read logs from both providers in parallel -const [claudeRecords, codexRecords] = await Promise.all([ - claudeReader.getUsage({ since: sinceDate, repoPath }), - codexReader.getUsage({ since: sinceDate, repoPath }), -]); - -const allRecords = [...claudeRecords, ...codexRecords]; - -// Initialize formatter -const formatter = new OutputFormatter({ sinceLabel: args.since }); - -// Handle no data case -if (allRecords.length === 0) { - if (args.json) { - console.log( - JSON.stringify({ - period: { - since: sinceDate.toISOString(), - until: untilDate.toISOString(), - }, - repo: repoPath, - byProvider: {}, - byModel: [], - rows: [], - totals: { - inputTokens: 0, - outputTokens: 0, - cacheReadTokens: 0, - cacheWriteTokens: 0, - totalTokens: 0, - cost: 0, - }, - unknownModels: [], - }) - ); - } else { - formatter.printNoData(repoPath); - } - process.exit(0); -} - -// Aggregate and calculate costs -const aggregator = new UsageAggregator({ pricing }); -const usage = aggregator.aggregate({ - records: allRecords, - since: sinceDate, - until: untilDate, - repo: repoPath, +const pipeline = new UsagePipeline({ logger, debug: args.debug }); +const report = await pipeline.getReport({ + since: args.since, + repoPath: args.repo, + json: args.json, }); -// Output -if (args.json) { - formatter.printJson(usage); -} else { - formatter.printSummary(usage); - formatter.printTable(usage); -} +console.log(report); From 1339d4f5d9ebcea1a907e1fcbdd1a29ae84ff91a Mon Sep 17 00:00:00 2001 From: Juha Kangas <42040080+valuecodes@users.noreply.github.com> Date: Thu, 29 Jan 2026 16:38:52 +0200 Subject: [PATCH 9/9] feat: enhance Claude and Codex log readers with improved filtering and deduplication - Simplify repo path encoding in ClaudeLogReader - Add tests for CodexLogReader to validate log file handling - Improve date validation and deduplication logic in CodexLogReader --- src/cli/ai-usage/ai-usage.pricing.json | 12 +- .../clients/claude-log-reader.test.ts | 86 ++++++++++++ src/cli/ai-usage/clients/claude-log-reader.ts | 13 +- .../ai-usage/clients/codex-log-reader.test.ts | 122 ++++++++++++++++++ src/cli/ai-usage/clients/codex-log-reader.ts | 53 ++++++-- 5 files changed, 266 insertions(+), 20 deletions(-) create mode 100644 src/cli/ai-usage/clients/claude-log-reader.test.ts create mode 100644 src/cli/ai-usage/clients/codex-log-reader.test.ts diff --git a/src/cli/ai-usage/ai-usage.pricing.json b/src/cli/ai-usage/ai-usage.pricing.json index 72e48e3..7f6de58 100644 --- a/src/cli/ai-usage/ai-usage.pricing.json +++ b/src/cli/ai-usage/ai-usage.pricing.json @@ -5,25 +5,19 @@ "input": 5.0, "output": 25.0, "cacheRead": 0.5, - "cacheWrite": 10.0, - "cacheWrite_5m": 6.25, - "cacheWrite_1h": 10.0 + "cacheWrite": 10.0 }, "claude-sonnet-4": { "input": 3.0, "output": 15.0, "cacheRead": 0.3, - "cacheWrite": 6.0, - "cacheWrite_5m": 3.75, - "cacheWrite_1h": 6.0 + "cacheWrite": 6.0 }, "claude-haiku-4-5": { "input": 1.0, "output": 5.0, "cacheRead": 0.1, - "cacheWrite": 2.0, - "cacheWrite_5m": 1.25, - "cacheWrite_1h": 2.0 + "cacheWrite": 2.0 }, "gpt-5.2-codex": { "input": 1.75, diff --git a/src/cli/ai-usage/clients/claude-log-reader.test.ts b/src/cli/ai-usage/clients/claude-log-reader.test.ts new file mode 100644 index 0000000..6531254 --- /dev/null +++ b/src/cli/ai-usage/clients/claude-log-reader.test.ts @@ -0,0 +1,86 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { TMP_ROOT } from "~tools/utils/fs"; +import { afterEach, describe, expect, it } from "vitest"; + +import { ClaudeLogReader } from "./claude-log-reader"; + +const mockLogger = { + debug: () => { + /* empty */ + }, +} as never; + +const encodeRepoPath = (repoPath: string) => + repoPath.replace(/\\/g, "/").replace(/\//g, "-"); + +const since = new Date("2024-01-01T00:00:00.000Z"); +const repoPath = "/repo"; + +const buildEntry = (overrides: Record = {}) => ({ + type: "assistant", + timestamp: "2025-01-01T00:00:00.000Z", + message: { + model: "claude-3", + usage: { + input_tokens: 1, + output_tokens: 2, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }, + }, + ...overrides, +}); + +const writeLogFile = async (lines: unknown[]) => { + await fs.mkdir(TMP_ROOT, { recursive: true }); + const baseDir = await fs.mkdtemp(path.join(TMP_ROOT, "vitest-claude-")); + const projectDir = path.join(baseDir, encodeRepoPath(repoPath)); + await fs.mkdir(projectDir, { recursive: true }); + const filePath = path.join(projectDir, "session.jsonl"); + const content = lines.map((line) => JSON.stringify(line)).join("\n"); + await fs.writeFile(filePath, content, "utf8"); + return baseDir; +}; + +describe("ClaudeLogReader cwd filtering", () => { + let baseDir = ""; + + afterEach(async () => { + if (baseDir) { + await fs.rm(baseDir, { recursive: true, force: true }); + baseDir = ""; + } + }); + + it("skips entries missing cwd", async () => { + baseDir = await writeLogFile([buildEntry()]); + const reader = new ClaudeLogReader({ + logger: mockLogger, + basePath: baseDir, + }); + const records = await reader.getUsage({ since, repoPath }); + expect(records).toHaveLength(0); + }); + + it("skips entries with mismatched cwd", async () => { + baseDir = await writeLogFile([buildEntry({ cwd: "/other" })]); + const reader = new ClaudeLogReader({ + logger: mockLogger, + basePath: baseDir, + }); + const records = await reader.getUsage({ since, repoPath }); + expect(records).toHaveLength(0); + }); + + it("keeps entries with matching cwd", async () => { + baseDir = await writeLogFile([buildEntry({ cwd: "/repo/project" })]); + const reader = new ClaudeLogReader({ + logger: mockLogger, + basePath: baseDir, + }); + const records = await reader.getUsage({ since, repoPath }); + expect(records).toHaveLength(1); + expect(records[0]?.inputTokens).toBe(1); + }); +}); diff --git a/src/cli/ai-usage/clients/claude-log-reader.ts b/src/cli/ai-usage/clients/claude-log-reader.ts index ebc9362..c6702ea 100644 --- a/src/cli/ai-usage/clients/claude-log-reader.ts +++ b/src/cli/ai-usage/clients/claude-log-reader.ts @@ -24,7 +24,8 @@ type GetUsageOptions = { * /home/juha/code/foo -> -home-juha-code-foo */ const encodeRepoPath = (repoPath: string): string => { - return repoPath.replace(/\//g, "-"); + const normalizedPath = repoPath.replace(/\\/g, "/"); + return normalizedPath.replace(/\//g, "-"); }; export class ClaudeLogReader { @@ -128,8 +129,14 @@ export class ClaudeLogReader { continue; } - // Match repo path if cwd is present - if (entry.cwd && !entry.cwd.startsWith(repoPath)) { + // Skip if cwd is missing or doesn't match repo + if (!entry.cwd) { + if (this.debug) { + this.logger.debug("Skipping entry: cwd missing", { repoPath }); + } + continue; + } + if (!entry.cwd.startsWith(repoPath)) { if (this.debug) { this.logger.debug("Skipping entry: cwd mismatch", { cwd: entry.cwd, diff --git a/src/cli/ai-usage/clients/codex-log-reader.test.ts b/src/cli/ai-usage/clients/codex-log-reader.test.ts new file mode 100644 index 0000000..cf13831 --- /dev/null +++ b/src/cli/ai-usage/clients/codex-log-reader.test.ts @@ -0,0 +1,122 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { Logger } from "~clients/logger"; +import { TMP_ROOT } from "~tools/utils/fs"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; + +import { CodexLogReader } from "./codex-log-reader"; + +describe("CodexLogReader findLogFiles", () => { + let testDir = ""; + + const logger = new Logger({ + level: "error", + useColors: false, + useTimestamps: false, + }); + + beforeEach(async () => { + await fs.mkdir(TMP_ROOT, { recursive: true }); + testDir = await fs.mkdtemp(path.join(TMP_ROOT, "codex-log-reader-")); + }); + + afterEach(async () => { + if (testDir) { + await fs.rm(testDir, { recursive: true, force: true }); + } + testDir = ""; + }); + + it("includes non-zero-padded dates and skips invalid date directories", async () => { + const validDir = path.join(testDir, "2024", "1", "5"); + await fs.mkdir(validDir, { recursive: true }); + const validFile = path.join(validDir, "session.jsonl"); + await fs.writeFile(validFile, ""); + + const invalidMonthDir = path.join(testDir, "2024", "13", "1"); + await fs.mkdir(invalidMonthDir, { recursive: true }); + const invalidMonthFile = path.join(invalidMonthDir, "bad.jsonl"); + await fs.writeFile(invalidMonthFile, ""); + + const invalidDayDir = path.join(testDir, "2024", "2", "31"); + await fs.mkdir(invalidDayDir, { recursive: true }); + const invalidDayFile = path.join(invalidDayDir, "bad.jsonl"); + await fs.writeFile(invalidDayFile, ""); + + const invalidYearDir = path.join(testDir, "not-a-year", "1", "1"); + await fs.mkdir(invalidYearDir, { recursive: true }); + const invalidYearFile = path.join(invalidYearDir, "bad.jsonl"); + await fs.writeFile(invalidYearFile, ""); + + const reader = new CodexLogReader({ + logger, + basePath: testDir, + debug: false, + }); + + const files = await reader.findLogFiles(new Date(2024, 0, 1)); + + expect(files).toContain(validFile); + expect(files).not.toContain(invalidMonthFile); + expect(files).not.toContain(invalidDayFile); + expect(files).not.toContain(invalidYearFile); + }); +}); + +describe("CodexLogReader parseSession repo filtering", () => { + let testDir = ""; + + const logger = new Logger({ + level: "error", + useColors: false, + useTimestamps: false, + }); + + const writeSessionFile = async (lines: unknown[]) => { + await fs.mkdir(TMP_ROOT, { recursive: true }); + testDir = await fs.mkdtemp(path.join(TMP_ROOT, "codex-log-reader-")); + const filePath = path.join(testDir, "session.jsonl"); + const content = lines.map((line) => JSON.stringify(line)).join("\n"); + await fs.writeFile(filePath, content, "utf8"); + return filePath; + }; + + afterEach(async () => { + if (testDir) { + await fs.rm(testDir, { recursive: true, force: true }); + } + testDir = ""; + }); + + it("skips token_count entries when cwd is missing", async () => { + const turnContext = { + type: "turn_context", + timestamp: "2025-01-01T00:00:00.500Z", + payload: { model: "gpt-4.1" }, + }; + const event = { + type: "event_msg", + timestamp: "2025-01-01T00:00:01.000Z", + payload: { + type: "token_count", + info: { + last_token_usage: { + input_tokens: 3, + output_tokens: 2, + cached_input_tokens: 0, + }, + }, + }, + }; + + const filePath = await writeSessionFile([turnContext, event]); + const reader = new CodexLogReader({ logger }); + const records = await reader.parseSession( + filePath, + "/repo", + new Date("2024-01-01T00:00:00.000Z") + ); + + expect(records).toHaveLength(0); + }); +}); diff --git a/src/cli/ai-usage/clients/codex-log-reader.ts b/src/cli/ai-usage/clients/codex-log-reader.ts index a912db9..0e71988 100644 --- a/src/cli/ai-usage/clients/codex-log-reader.ts +++ b/src/cli/ai-usage/clients/codex-log-reader.ts @@ -87,7 +87,30 @@ export class CodexLogReader { } // Check if this date is within range - const dirDate = new Date(`${year}-${month}-${day}`); + const yearNum = Number(year); + const monthNum = Number(month); + const dayNum = Number(day); + + if ( + !Number.isInteger(yearNum) || + !Number.isInteger(monthNum) || + !Number.isInteger(dayNum) || + monthNum < 1 || + monthNum > 12 || + dayNum < 1 || + dayNum > 31 + ) { + continue; + } + + const dirDate = new Date(Date.UTC(yearNum, monthNum - 1, dayNum)); + if ( + dirDate.getUTCFullYear() !== yearNum || + dirDate.getUTCMonth() !== monthNum - 1 || + dirDate.getUTCDate() !== dayNum + ) { + continue; + } if (dirDate < since) { continue; } @@ -129,7 +152,9 @@ export class CodexLogReader { /** * Parse a session file and extract usage records. * Need to track session metadata and model across entries. - * Codex emits duplicate token_count events - we dedupe by tracking last seen values. + * Codex can emit duplicate token_count events; dedupe using cumulative usage when + * available (or exact line matches) to avoid dropping distinct calls with the same + * per-request token counts. */ async parseSession( filePath: string, @@ -147,12 +172,13 @@ export class CodexLogReader { }); for await (const line of rl) { - if (!line.trim()) { + const trimmedLine = line.trim(); + if (!trimmedLine) { continue; } try { - const parsed: unknown = JSON.parse(line); + const parsed: unknown = JSON.parse(trimmedLine); const baseResult = CodexLogEntrySchema.safeParse(parsed); if (!baseResult.success) { continue; @@ -204,8 +230,16 @@ export class CodexLogReader { continue; } - // Skip if cwd doesn't match repo - if (sessionData.cwd && !sessionData.cwd.startsWith(repoPath)) { + // Skip if cwd is missing or doesn't match repo + if (!sessionData.cwd) { + if (this.debug) { + this.logger.debug("Skipping Codex entry: cwd missing", { + repoPath, + }); + } + continue; + } + if (!sessionData.cwd.startsWith(repoPath)) { if (this.debug) { this.logger.debug("Skipping Codex entry: cwd mismatch", { cwd: sessionData.cwd, @@ -216,9 +250,12 @@ export class CodexLogReader { } const usage = payload.info.last_token_usage; + const totalUsage = payload.info.total_token_usage; - // Dedupe: Codex emits duplicate token_count events with identical values - const usageKey = `${usage.input_tokens}:${usage.output_tokens}:${usage.cached_input_tokens}`; + // Dedupe only when we have a strong signal (cumulative totals or exact line). + const usageKey = totalUsage + ? `total:${totalUsage.input_tokens}:${totalUsage.output_tokens}:${totalUsage.cached_input_tokens}|last:${usage.input_tokens}:${usage.output_tokens}:${usage.cached_input_tokens}` + : `line:${trimmedLine}`; if (usageKey === lastUsageKey) { continue; }