From 7c09e04a284af08106aa14d2322045120181ce1d Mon Sep 17 00:00:00 2001 From: rusliksu <25394128+rusliksu@users.noreply.github.com> Date: Sun, 24 May 2026 15:42:49 +0300 Subject: [PATCH] Improve Codex log coverage --- src/core/config-health-helpers.test.ts | 10 +- src/core/config-health-helpers.ts | 2 +- src/core/parser-codex.test.ts | 70 +++++++++++++- src/core/parser-codex.ts | 126 +++++++++++++++---------- src/core/parser-harnesses.ts | 6 +- src/core/schemas.ts | 1 + src/core/types/session-types.ts | 3 + 7 files changed, 159 insertions(+), 59 deletions(-) diff --git a/src/core/config-health-helpers.test.ts b/src/core/config-health-helpers.test.ts index ef35287..cb25d71 100644 --- a/src/core/config-health-helpers.test.ts +++ b/src/core/config-health-helpers.test.ts @@ -17,6 +17,7 @@ import { safeFileExists, buildFileTree, readSnippet, + resolveWorkspaceRoot, } from './config-health-helpers'; import { ConfigFileInfo } from './types'; @@ -42,6 +43,13 @@ afterEach(() => { } }); +describe('resolveWorkspaceRoot', () => { + it('uses existing Codex workspace paths as root paths', () => { + const root = makeTempDir(); + expect(resolveWorkspaceRoot('codex-proj-1234', { id: 'codex-proj-1234', name: 'proj', path: root })).toBe(root); + }); +}); + describe('scanConfigFiles', () => { it('detects documented custom agent profiles in .github/agents/*.md', () => { const root = makeTempDir(); @@ -320,4 +328,4 @@ describe('readSnippet', () => { const snippet = readSnippet(root, ['big.txt'], 50); expect(snippet.length).toBe(50); }); -}); \ No newline at end of file +}); diff --git a/src/core/config-health-helpers.ts b/src/core/config-health-helpers.ts index f857005..ff2598f 100644 --- a/src/core/config-health-helpers.ts +++ b/src/core/config-health-helpers.ts @@ -77,7 +77,7 @@ export function resolveWorkspaceRoot(id: string, ws: Workspace): string | null { return resolveClaudeRoot(ws.path); } if (id.startsWith('codex-') || id.startsWith('opencode-')) { - return null; + return fs.existsSync(ws.path) ? ws.path : null; } return resolveVsCodeRoot(ws.path) ?? resolveCLIRoot(ws.path); } diff --git a/src/core/parser-codex.test.ts b/src/core/parser-codex.test.ts index f92ba97..7f3dc3f 100644 --- a/src/core/parser-codex.test.ts +++ b/src/core/parser-codex.test.ts @@ -12,15 +12,16 @@ import * as fs from 'fs'; import * as os from 'os'; import * as path from 'path'; import { describe, it, expect } from 'vitest'; -import { parseCodexSessions } from './parser-codex'; +import { findCodexDirs, parseCodexSessions } from './parser-codex'; +import { MAX_FILE_SIZE } from './parser-shared'; -function withCodexFile(lines: object[], run: (sessionsDir: string) => void): void { +function withCodexFile(lines: object[], run: (sessionsDir: string, filePath: string) => void): void { const root = fs.mkdtempSync(path.join(os.tmpdir(), 'codex-parser-test-')); const dayDir = path.join(root, 'sessions', '2025', '06', '15'); fs.mkdirSync(dayDir, { recursive: true }); const file = path.join(dayDir, 'rollout-2025-06-15-test.jsonl'); fs.writeFileSync(file, lines.map(l => JSON.stringify(l)).join('\n'), 'utf-8'); - try { run(path.join(root, 'sessions')); } finally { fs.rmSync(root, { recursive: true, force: true }); } + try { run(path.join(root, 'sessions'), file); } finally { fs.rmSync(root, { recursive: true, force: true }); } } describe('parseCodexSessions', () => { @@ -64,4 +65,67 @@ describe('parseCodexSessions', () => { expect(sessions[0].modelUsage).toBeUndefined(); }); }); + + it('stores the Codex session cwd as workspaceRootPath', () => { + const cwd = fs.mkdtempSync(path.join(os.tmpdir(), 'codex-cwd-test-')); + withCodexFile([ + { type: 'session_meta', payload: { id: 'sess-codex-cwd', cwd } }, + { type: 'event_msg', timestamp: '2025-06-15T10:00:00Z', payload: { type: 'user_message', message: 'hi' } }, + { type: 'event_msg', timestamp: '2025-06-15T10:00:01Z', payload: { type: 'assistant_message', content: 'hello' } }, + ], (sessionsDir) => { + const sessions = parseCodexSessions(sessionsDir); + expect(sessions).toHaveLength(1); + expect(sessions[0].location).toBe('terminal'); + expect(sessions[0].workspaceRootPath).toBe(cwd); + }); + fs.rmSync(cwd, { recursive: true, force: true }); + }); + + it('parses Codex JSONL files that exceed the shared in-memory file cap', () => { + withCodexFile([ + { type: 'session_meta', payload: { id: 'sess-codex-large', cwd: '/Users/me/proj' } }, + { type: 'turn_context', payload: { model: 'gpt-5.3-codex' } }, + { type: 'event_msg', timestamp: '2025-06-15T10:00:00Z', payload: { type: 'user_message', message: 'hi' } }, + { type: 'event_msg', timestamp: '2025-06-15T10:00:01Z', payload: { type: 'assistant_message', content: 'hello' } }, + ], (sessionsDir, filePath) => { + fs.appendFileSync(filePath, '\n'); + const blankLine = Buffer.concat([Buffer.alloc(1024 * 1024, 0x20), Buffer.from('\n')]); + while (fs.statSync(filePath).size <= MAX_FILE_SIZE) { + fs.appendFileSync(filePath, blankLine); + } + + const sessions = parseCodexSessions(sessionsDir); + expect(sessions).toHaveLength(1); + expect(sessions[0].sessionId).toBe('sess-codex-large'); + expect(sessions[0].requests).toHaveLength(1); + }); + }); +}); + +describe('findCodexDirs', () => { + it('discovers active and archived Codex session directories', () => { + const root = fs.mkdtempSync(path.join(os.tmpdir(), 'codex-dirs-test-')); + const oldHome = process.env.HOME; + const oldUserProfile = process.env.USERPROFILE; + + try { + process.env.HOME = root; + process.env.USERPROFILE = root; + + const active = path.join(root, '.codex', 'sessions'); + const archivedUnderscore = path.join(root, '.codex', 'archived_sessions'); + const archivedHyphen = path.join(root, '.codex', 'archived-sessions'); + fs.mkdirSync(active, { recursive: true }); + fs.mkdirSync(archivedUnderscore, { recursive: true }); + fs.mkdirSync(archivedHyphen, { recursive: true }); + + expect(findCodexDirs()).toEqual([active, archivedUnderscore, archivedHyphen]); + } finally { + if (oldHome === undefined) delete process.env.HOME; + else process.env.HOME = oldHome; + if (oldUserProfile === undefined) delete process.env.USERPROFILE; + else process.env.USERPROFILE = oldUserProfile; + fs.rmSync(root, { recursive: true, force: true }); + } + }); }); diff --git a/src/core/parser-codex.ts b/src/core/parser-codex.ts index 80c3e4c..c6ed485 100644 --- a/src/core/parser-codex.ts +++ b/src/core/parser-codex.ts @@ -19,8 +19,9 @@ import * as fs from 'fs'; import * as path from 'path'; +import { StringDecoder } from 'string_decoder'; import { ModelUsage, Session, SessionRequest } from './types'; -import { assertTrustedPath, readFileSafe, createRequest, createSession, detectDevcontainerFromRequests } from './parser-shared'; +import { assertTrustedPath, createRequest, createSession, detectDevcontainerFromRequests } from './parser-shared'; import { canonicalizeReasoningEffort, extractReasoningEffortFromModelId } from './helpers'; interface CodexLine { @@ -101,16 +102,6 @@ function parseCodexLine(rawLine: string): CodexLine | null { } } -function parseCodexLines(raw: string): CodexLine[] { - const lines: CodexLine[] = []; - for (const rawLine of raw.split('\n')) { - if (!rawLine.trim()) continue; - const parsed = parseCodexLine(rawLine); - if (parsed) lines.push(parsed); - } - return lines; -} - function parseJsonRecord(raw: string): Record | null { try { const parsed: unknown = JSON.parse(raw); @@ -362,33 +353,76 @@ function handleResponseItem(payload: Record, state: CodexParseS if (itemType === 'function_call') handleFunctionCallResponseItem(payload, state); } -function extractSessionMeta(lines: CodexLine[], filePath: string): CodexSessionMeta { - let sessionId = ''; - let cwd = ''; - let source = ''; - let model = ''; +function updateSessionMeta(line: CodexLine, meta: CodexSessionMeta): void { + if (line.type === 'session_meta') { + const payload = line.payload || {}; + meta.sessionId = stringValue(payload.id) || meta.sessionId; + meta.cwd = stringValue(payload.cwd) || meta.cwd; + meta.source = stringValue(payload.source) || meta.source; + } + if (line.type === 'turn_context' && !meta.model) { + meta.model = stringValue(line.payload?.model); + } +} + +function handleCodexLine(line: CodexLine, state: CodexParseState, meta: CodexSessionMeta): void { + updateSessionMeta(line, meta); + const ts = line.timestamp ? new Date(line.timestamp).getTime() : null; + updateTimestamps(state, ts); + + if (line.type === 'event_msg') { + handleEventMsg(line.payload || {}, state, ts, meta.model); + return; + } + if (line.type === 'turn_context') { + handleTurnContext(line.payload || {}, state); + return; + } + if (line.type === 'response_item') handleResponseItem(line.payload || {}, state, ts, meta.model); +} + +function readCodexJsonlStreaming(filePath: string, onLine: (line: CodexLine) => void): void { + const fd = fs.openSync(filePath, 'r'); + const decoder = new StringDecoder('utf8'); + const buffer = Buffer.allocUnsafe(1024 * 1024); + let remainder = ''; - for (const line of lines) { - if (line.type === 'session_meta') { - const payload = line.payload || {}; - sessionId = stringValue(payload.id); - cwd = stringValue(payload.cwd); - source = stringValue(payload.source); + try { + while (true) { + const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, null); + if (bytesRead === 0) break; + const text = remainder + decoder.write(buffer.subarray(0, bytesRead)); + let start = 0; + let nextNewline = text.indexOf('\n', start); + while (nextNewline !== -1) { + const rawLine = text.slice(start, nextNewline); + if (rawLine.trim()) { + const parsed = parseCodexLine(rawLine); + if (parsed) onLine(parsed); + } + start = nextNewline + 1; + nextNewline = text.indexOf('\n', start); + } + remainder = text.slice(start); } - if (line.type === 'turn_context' && !model) { - model = stringValue(line.payload?.model); + + remainder += decoder.end(); + if (remainder.trim()) { + const parsed = parseCodexLine(remainder); + if (parsed) onLine(parsed); } + } finally { + fs.closeSync(fd); } - - if (!sessionId) sessionId = path.basename(filePath, '.jsonl'); - return { sessionId, cwd, source, model }; } export function findCodexDirs(): string[] { const home = process.env.HOME || process.env.USERPROFILE || ''; const dirs: string[] = []; - const sessionsDir = path.join(home, '.codex', 'sessions'); - if (fs.existsSync(sessionsDir)) dirs.push(sessionsDir); + for (const name of ['sessions', 'archived_sessions', 'archived-sessions']) { + const sessionsDir = path.join(home, '.codex', name); + if (fs.existsSync(sessionsDir)) dirs.push(sessionsDir); + } return dirs; } @@ -424,37 +458,24 @@ function findAllJsonlFiles(dir: string): string[] { function parseCodexSessionFile(filePath: string): Session | null { assertTrustedPath(filePath); - let raw: string; + const meta: CodexSessionMeta = { sessionId: '', cwd: '', source: '', model: '' }; + const state = createCodexState(''); + let parsedLineCount = 0; + try { - const content = readFileSafe(filePath); - if (content === null) return null; - raw = content; + readCodexJsonlStreaming(filePath, (line) => { + parsedLineCount++; + handleCodexLine(line, state, meta); + }); } catch { return null; } - const lines = parseCodexLines(raw); - if (lines.length === 0) return null; + if (parsedLineCount === 0) return null; + if (!meta.sessionId) meta.sessionId = path.basename(filePath, '.jsonl'); - const meta = extractSessionMeta(lines, filePath); const wsName = projectNameFromCwd(meta.cwd); const wsId = `codex-${wsName}-${meta.sessionId.slice(0, 8)}`; - const state = createCodexState(meta.model); - - for (const line of lines) { - const ts = line.timestamp ? new Date(line.timestamp).getTime() : null; - updateTimestamps(state, ts); - - if (line.type === 'event_msg') { - handleEventMsg(line.payload || {}, state, ts, meta.model); - continue; - } - if (line.type === 'turn_context') { - handleTurnContext(line.payload || {}, state); - continue; - } - if (line.type === 'response_item') handleResponseItem(line.payload || {}, state, ts, meta.model); - } flushCodexTurn(state, meta.model); if (state.requests.length === 0) return null; @@ -474,5 +495,6 @@ function parseCodexSessionFile(filePath: string): Session | null { modelUsage, endReason, hasDevcontainer: detectDevcontainerFromRequests(state.requests, meta.cwd), + workspaceRootPath: meta.cwd || undefined, }); } diff --git a/src/core/parser-harnesses.ts b/src/core/parser-harnesses.ts index dcf988f..a09adac 100644 --- a/src/core/parser-harnesses.ts +++ b/src/core/parser-harnesses.ts @@ -5,6 +5,7 @@ /* External harness collection registry for parser orchestration. */ +import * as fs from 'fs'; import { Workspace, Session } from './types'; import { findClaudeDirs, parseClaudeSessions, parseClaudeSessionsAsync } from './parser-claude'; import { findCodexDirs, parseCodexSessions } from './parser-codex'; @@ -26,7 +27,8 @@ interface ExternalHarnessCollector { function addSession(workspaces: WorkspaceMap, sessions: Session[], session: Session, rootPath: string): void { sessions.push(session); if (!workspaces.has(session.workspaceId)) { - workspaces.set(session.workspaceId, { id: session.workspaceId, name: session.workspaceName, path: rootPath }); + const sessionRootPath = session.workspaceRootPath && fs.existsSync(session.workspaceRootPath) ? session.workspaceRootPath : rootPath; + workspaces.set(session.workspaceId, { id: session.workspaceId, name: session.workspaceName, path: sessionRootPath }); } } @@ -118,4 +120,4 @@ export async function collectExternalHarnessesAsync( if (handlers.yieldToLoop) await handlers.yieldToLoop(); } -} \ No newline at end of file +} diff --git a/src/core/schemas.ts b/src/core/schemas.ts index 509e13b..02b332c 100644 --- a/src/core/schemas.ts +++ b/src/core/schemas.ts @@ -98,6 +98,7 @@ export const SessionSchema = z.object({ endReason: z.enum(['shutdown', 'active', 'aborted', 'unknown']).optional(), hasDevcontainer: z.boolean().optional(), customInstructionsBytes: z.number().optional(), + workspaceRootPath: z.string().optional(), }).passthrough(); /* ---- Validation helper ---- */ diff --git a/src/core/types/session-types.ts b/src/core/types/session-types.ts index f96c66d..e982e02 100644 --- a/src/core/types/session-types.ts +++ b/src/core/types/session-types.ts @@ -150,6 +150,9 @@ export interface Session { * (e.g. CLI session without `cwd`, or multi-root workspace). Used by the * `instruction-bloat` rule to detect always-on-context bloat. */ customInstructionsBytes?: number; + /** Resolved project root for CLI harnesses that record cwd separately from + * launch location/source. Used by config-health workspace scans. */ + workspaceRootPath?: string; /** How the session was launched (Claude only, currently). * interactive — user typed `claude` in a terminal or used Claude Desktop. * programmatic — spawned by another tool via the SDK (e.g. GitHub Copilot