From bdb3677451a1ca313669bfaad5899448ff1a154c Mon Sep 17 00:00:00 2001 From: Codex Date: Tue, 2 Jun 2026 00:10:08 -0700 Subject: [PATCH] fix: de-flake CI test suite (5 root causes) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Electron Build matrix's "Run tests" step failed intermittently from several independent flakes. Root-caused and fixed five: 1. Unhandled rejection crashes the whole run (production bug). createTerminalInvalidationHandler dispatched the refresh thunks fire-and-forget from a timer, but those thunks re-throw on failure, so a transient background-refresh failure became an unhandled promise rejection that fails `npm test` even though every test "passed". Contain the rejection in the handler (new onRefreshError dep) and at the App.tsx inventory dispatch site. Each contained rejection is tagged with its source (terminal-directory vs session-window) so the shared onRefreshError logs an accurate label. 2. AgentChatView reload test: mock-queue leak (test isolation). The "server-restart recovery" describe had no beforeEach, so a test's getAgentTimelinePage.mockResolvedValueOnce queue leaked into the next test under shuffled order. Add the same mock reset the first describe uses. 3. AgentChatView reload test: LazyMarkdown Suspense swap race (test only). Assistant bodies render through React.lazy + Suspense; the first render shows a fallback

that the lazy chunk later replaces, detaching the node `findByText` captured before toBeInTheDocument runs. Mock LazyMarkdown to render MarkdownRenderer synchronously, matching MessageBubble/tool-coalesce/ agent-chat-polish-flow tests. (Production behaviour is correct.) 4. FreshAgentView redundant snapshot fetch (production bug). The snapshot-load effect dispatches updatePaneContent to persist its own resolved resumeSessionId/status, and the whole paneContent object (plus those output fields) was in its dependency array — so the self-update retriggered the effect, firing a second snapshot fetch for the same session on every load. Depend only on the fields that identify which snapshot to load; read non-identity fields live via paneContentRef.current. 5. sessions.changed background-refresh unhandled rejection (production bug). Same failure class as #1: the App.tsx sessions.changed websocket handler dispatched queueActiveSessionWindowRefresh() fire-and-forget with no .catch(). That thunk re-throws when it falls through to fetchSessionWindow() without committed window data (e.g. a sessions.changed before the sidebar window commits, or after a failed direct fetch retry), so a transient refresh failure leaked an unhandled rejection. Contain it with the same .catch(log.debug) guard the inventory site uses, and cover it with a regression test that captures unhandledRejection. (Found by independent review of the original four-fix commit.) All proven with looped shuffled runs (e.g. #3: 0/120, #4: 0/40 + RED->GREEN single-fetch assertion, #5: RED leak captured -> GREEN, 20/20 shuffled). Full default-config unit suite green (305 files / 3526 tests). Typecheck clean; no new lint warnings. Independently reviewed (gpt-5.5, read-only): PASSED, one debug-log labeling nit, fixed above. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/App.tsx | 16 ++++-- src/components/fresh-agent/FreshAgentView.tsx | 10 ++-- src/lib/terminal-invalidation-handler.ts | 30 +++++++++-- .../components/App.ws-bootstrap.test.tsx | 51 ++++++++++++++++++ .../agent-chat/AgentChatView.reload.test.tsx | 29 ++++++++++ .../fresh-agent/FreshAgentView.test.tsx | 53 +++++++++++++++++++ .../lib/terminal-invalidation-handler.test.ts | 44 +++++++++++++++ 7 files changed, 224 insertions(+), 9 deletions(-) diff --git a/src/App.tsx b/src/App.tsx index c31ca209d..742d99065 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -792,6 +792,12 @@ export default function App() { patchSessionRunningStateFromTerminalMeta, queueActiveSessionWindowRefresh: () => queueActiveSessionWindowRefresh() as any, fetchTerminalDirectoryWindow: (payload) => fetchTerminalDirectoryWindow(payload) as any, + onRefreshError: (error, source) => log.debug( + source === 'session-window' + ? 'active session window background refresh failed' + : 'terminal directory background refresh failed', + error, + ), }) const unsubscribe = ws.onMessage((msg) => { @@ -839,7 +845,9 @@ export default function App() { const rev = typeof msg.revision === 'number' ? msg.revision : -1 if (rev > lastSessionsRevision) { lastSessionsRevision = rev - void appStore.dispatch(queueActiveSessionWindowRefresh() as any) + // Fire-and-forget refresh: the thunk re-throws on failure, so contain the + // rejection rather than leak an unhandled rejection (matches the inventory site). + void appStore.dispatch(queueActiveSessionWindowRefresh() as any).catch((error: unknown) => log.debug('active session window refresh failed', error)) } } if (msg.type === 'settings.updated') { @@ -911,11 +919,13 @@ export default function App() { upsert: terminalMeta, remove: removedTerminalMetaIds, })) + // Fire-and-forget refreshes: the thunks re-throw on failure, so + // contain the rejection rather than leak an unhandled rejection. void appStore.dispatch(fetchTerminalDirectoryWindow({ surface: 'sidebar', priority: 'visible', - }) as any) - void appStore.dispatch(queueActiveSessionWindowRefresh() as any) + }) as any).catch((error: unknown) => log.debug('terminal directory background refresh failed', error)) + void appStore.dispatch(queueActiveSessionWindowRefresh() as any).catch((error: unknown) => log.debug('active session window refresh failed', error)) } if (msg.type === 'codex.activity.list.response') { const requestId = typeof msg.requestId === 'string' ? msg.requestId : '' diff --git a/src/components/fresh-agent/FreshAgentView.tsx b/src/components/fresh-agent/FreshAgentView.tsx index 18f418916..c57e47421 100644 --- a/src/components/fresh-agent/FreshAgentView.tsx +++ b/src/components/fresh-agent/FreshAgentView.tsx @@ -693,14 +693,18 @@ export function FreshAgentView({ setLoadError(error instanceof Error ? error.message : 'Failed to load session') }) return () => controller.abort() + // Depend only on what identifies *which* snapshot to load. This effect + // dispatches updatePaneContent to persist its own resolved resumeSessionId/ + // status; listing the whole paneContent object (or those output fields) made + // that self-update retrigger the effect, firing a redundant second fetch for + // the same session. Current values for non-identity fields are read live via + // paneContentRef.current inside the effect. }, [ claudeSession?.lost, dispatch, - paneContent, paneContent.provider, - paneContent.resumeSessionId, + paneContent.createRequestId, paneContent.sessionId, - paneContent.status, paneContent.sessionType, paneId, autoTitleIdentity, diff --git a/src/lib/terminal-invalidation-handler.ts b/src/lib/terminal-invalidation-handler.ts index 82eea7cb5..6437c2191 100644 --- a/src/lib/terminal-invalidation-handler.ts +++ b/src/lib/terminal-invalidation-handler.ts @@ -5,6 +5,9 @@ type DispatchLike = (action: unknown) => unknown type RefreshThunk = unknown +/** Identifies which fire-and-forget background refresh produced a rejection. */ +export type RefreshSource = 'terminal-directory' | 'session-window' + type TerminalInvalidationDeps = { dispatch: DispatchLike upsertTerminalMeta: (payload: TerminalMetaRecord[]) => PayloadAction @@ -21,6 +24,14 @@ type TerminalInvalidationDeps = { setTimeout?: typeof setTimeout clearTimeout?: typeof clearTimeout refreshDelayMs?: number + /** + * Invoked when a fire-and-forget background refresh rejects. The refresh + * thunks re-throw on failure (callers that await them rely on this), but the + * handler dispatches them from a timer without awaiting, so it contains the + * rejection here instead of letting it surface as an unhandled rejection. + * `source` identifies which refresh failed so the callback can log accurately. + */ + onRefreshError?: (error: unknown, source: RefreshSource) => void } function isTerminalMetaRecord(value: unknown): value is TerminalMetaRecord { @@ -36,13 +47,26 @@ export function createTerminalInvalidationHandler(deps: TerminalInvalidationDeps const delayMs = deps.refreshDelayMs ?? 50 let refreshTimer: ReturnType | undefined + const dispatchBackgroundRefresh = (thunk: RefreshThunk, source: RefreshSource) => { + const result = deps.dispatch(thunk) as unknown + // Contain rejections from the fire-and-forget refresh dispatch: the thunks + // re-throw on failure, and a failed background refresh must never become an + // unhandled promise rejection (which crashes test runs and is silently + // uncaught in production). + if (result && typeof (result as { catch?: unknown }).catch === 'function') { + void (result as Promise).catch((error: unknown) => { + deps.onRefreshError?.(error, source) + }) + } + } + const runRefresh = () => { refreshTimer = undefined - deps.dispatch(deps.fetchTerminalDirectoryWindow({ + dispatchBackgroundRefresh(deps.fetchTerminalDirectoryWindow({ surface: 'sidebar', priority: 'visible', - })) - deps.dispatch(deps.queueActiveSessionWindowRefresh()) + }), 'terminal-directory') + dispatchBackgroundRefresh(deps.queueActiveSessionWindowRefresh(), 'session-window') } const scheduleRefresh = () => { diff --git a/test/unit/client/components/App.ws-bootstrap.test.tsx b/test/unit/client/components/App.ws-bootstrap.test.tsx index d10c09b2a..572593cc5 100644 --- a/test/unit/client/components/App.ws-bootstrap.test.tsx +++ b/test/unit/client/components/App.ws-bootstrap.test.tsx @@ -1556,6 +1556,57 @@ describe('App WS bootstrap recovery', () => { }) }) + it('contains a failing queued session-window refresh from a sessions.changed broadcast instead of leaking an unhandled rejection', async () => { + // Regression: the sessions.changed handler dispatched queueActiveSessionWindowRefresh() + // fire-and-forget with no .catch(). That thunk re-throws when it falls through to + // fetchSessionWindow() without committed window data (e.g. a sessions.changed before the + // sidebar window commits, or after a failed direct fetch retry), so a transient refresh + // failure leaked an unhandled rejection that fails the whole test run even though every + // test "passed" — the same failure class the terminal.inventory site already contains. + const store = createStore() + + // Reject every snapshot fetch. The bootstrap sidebar load fails but is contained by + // ensureSidebarSessionsWindow (so no window ever commits -> hasCommittedWindow stays + // false), and the queued refresh then takes the re-throwing fetchSessionWindow branch. + fetchSidebarSessionsSnapshot.mockRejectedValue(new Error('window snapshot unavailable')) + + const unhandled: unknown[] = [] + const onUnhandled = (reason: unknown) => { unhandled.push(reason) } + process.on('unhandledRejection', onUnhandled) + try { + render( + + + + ) + + await waitFor(() => { + expect(messageHandler).toBeTypeOf('function') + }) + + act(() => { + messageHandler?.({ type: 'sessions.changed', revision: 1 }) + }) + + // The queued refresh actually ran and failed (proves the path is exercised, not vacuous). + await waitFor(() => { + expect(store.getState().sessions.windows.sidebar?.error).toBe('window snapshot unavailable') + }) + + // Give Node a chance to surface any unhandled rejection from the fire-and-forget dispatch. + await new Promise((resolve) => setTimeout(resolve, 0)) + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect( + unhandled.filter( + (reason) => reason instanceof Error && reason.message.includes('window snapshot unavailable'), + ), + ).toHaveLength(0) + } finally { + process.off('unhandledRejection', onUnhandled) + } + }) + it('ignores legacy sessions.patch messages when bootstrapping against an already-ready socket', async () => { const baselineProjects = [ { diff --git a/test/unit/client/components/agent-chat/AgentChatView.reload.test.tsx b/test/unit/client/components/agent-chat/AgentChatView.reload.test.tsx index 7f1be8306..6fa11aed3 100644 --- a/test/unit/client/components/agent-chat/AgentChatView.reload.test.tsx +++ b/test/unit/client/components/agent-chat/AgentChatView.reload.test.tsx @@ -62,6 +62,22 @@ vi.mock('@/lib/api', async () => { } }) +// Render markdown bodies synchronously. The real LazyMarkdown wraps MarkdownRenderer +// in React.lazy + Suspense, so the first assistant-message render shows a fallback +//

that the lazy chunk later replaces with the real node. Tests that capture the +// fallback node via `await findByText(...)` and then assert `toBeInTheDocument()` race +// that swap (the captured node detaches), producing an order-dependent flake. Mocking +// LazyMarkdown to render MarkdownRenderer directly removes the swap. Matches the mock +// used by the other agent-chat tests (MessageBubble, tool-coalesce, agent-chat-polish-flow). +vi.mock('@/components/markdown/LazyMarkdown', async () => { + const { MarkdownRenderer } = await import('@/components/markdown/MarkdownRenderer') + return { + LazyMarkdown: ({ content }: { content: string }) => ( + + ), + } +}) + function makeStore() { return configureStore({ reducer: { @@ -1891,6 +1907,19 @@ function getPaneContent(store: ReturnType, tabId: string, pane } describe('AgentChatView server-restart recovery', () => { + // Reset the timeline API mocks before each test. Without this, a test's + // mockResolvedValueOnce queue leaks into the next test under shuffled order + // (e.g. one test consuming another's "Revision N body" page), producing an + // order-dependent flake. Mirrors the reset in the first describe's beforeEach. + beforeEach(() => { + localStorage.clear() + getAgentTimelinePage.mockReset() + getAgentTurnBody.mockReset() + getAgentChatCapabilities.mockReset() + setSessionMetadata.mockReset() + setSessionMetadata.mockResolvedValue(undefined) + }) + afterEach(() => { cleanup() wsSend.mockClear() diff --git a/test/unit/client/components/fresh-agent/FreshAgentView.test.tsx b/test/unit/client/components/fresh-agent/FreshAgentView.test.tsx index 15d36529a..c1b398c40 100644 --- a/test/unit/client/components/fresh-agent/FreshAgentView.test.tsx +++ b/test/unit/client/components/fresh-agent/FreshAgentView.test.tsx @@ -1153,6 +1153,59 @@ describe('FreshAgentView', () => { ])) }) + it('fetches the initial snapshot once and does not refetch from its own pane update', async () => { + const store = createStore() + // First fetch returns a distinct snapshot; the default mockResolvedValue + // ("Codex turn") would answer any *second* fetch. The snapshot-load effect + // persists resumeSessionId via updatePaneContent, and if that self-update + // retriggers the effect, the redundant second fetch overwrites the loaded + // content with the default — a wasteful double network request in production + // and an order-dependent flake in tests. + apiMock.getFreshAgentThreadSnapshot.mockResolvedValueOnce({ + status: 'idle', + summary: 'Codex summary', + capabilities: { send: true, interrupt: true, fork: true }, + turns: [ + { id: 'turn-user-1', role: 'user', items: [{ id: 'item-user-1', kind: 'text', text: 'Loaded user turn' }] }, + { id: 'turn-assistant-1', role: 'assistant', items: [{ id: 'item-assistant-1', kind: 'text', text: 'Loaded assistant turn' }] }, + ], + }) + store.dispatch(initLayout({ + tabId: 'tab-1', + paneId: 'pane-1', + content: { + kind: 'fresh-agent', + sessionType: 'freshcodex', + provider: 'codex', + createRequestId: 'req-single-fetch', + sessionId: 'thread-single-fetch', + status: 'idle', + }, + })) + + render( + + + , + ) + + // Wait until the effect has persisted resumeSessionId back into pane content + // (the self-update that previously retriggered the effect). + await waitFor(() => { + const layout = store.getState().panes.layouts['tab-1'] + const resumeSessionId = layout?.type === 'leaf' && layout.content.kind === 'fresh-agent' + ? layout.content.resumeSessionId + : undefined + expect(resumeSessionId).toBe('thread-single-fetch') + }) + // Let any spurious self-triggered refetch run before asserting. + await act(async () => { await Promise.resolve() }) + + expect(apiMock.getFreshAgentThreadSnapshot).toHaveBeenCalledTimes(1) + // The loaded snapshot stays rendered (not overwritten by a second fetch). + expect(screen.getByText('Loaded assistant turn')).toBeInTheDocument() + }) + it('resets auto-title for a new conversation even if the stale prior snapshot had user turns', async () => { const store = createStore() apiMock.getFreshAgentThreadSnapshot.mockResolvedValueOnce({ diff --git a/test/unit/client/lib/terminal-invalidation-handler.test.ts b/test/unit/client/lib/terminal-invalidation-handler.test.ts index 628976a6c..93d31c2a8 100644 --- a/test/unit/client/lib/terminal-invalidation-handler.test.ts +++ b/test/unit/client/lib/terminal-invalidation-handler.test.ts @@ -178,4 +178,48 @@ describe('createTerminalInvalidationHandler', () => { await vi.advanceTimersByTimeAsync(50) expect(refresh.fetchTerminalDirectoryWindow).toHaveBeenCalledTimes(1) }) + + it('contains background-refresh dispatch failures instead of leaking an unhandled rejection', async () => { + vi.useFakeTimers() + const refresh = createRefreshDoubles() + const refreshError = new Error('refresh failed') + // Mirror the real store: dispatching the refresh thunks returns a promise + // that rejects, because fetchTerminalDirectoryWindow / the active-window + // refresh re-throw on API failure. The handler dispatches them + // fire-and-forget from a timer, so it must contain the rejection — otherwise + // a transient background-refresh failure becomes an unhandled rejection that + // crashes the whole vitest run (and is silently uncaught in production). + const dispatch = vi.fn((action: unknown) => { + if (action === refresh.terminalDirectoryRefreshThunk || action === refresh.activeSessionWindowRefreshThunk) { + return Promise.reject(refreshError) + } + return undefined + }) + const onRefreshError = vi.fn() + const handler = createTerminalInvalidationHandler({ + dispatch, + upsertTerminalMeta, + removeTerminalMeta, + patchSessionRunningStateFromTerminalMeta, + queueActiveSessionWindowRefresh: refresh.queueActiveSessionWindowRefresh, + fetchTerminalDirectoryWindow: refresh.fetchTerminalDirectoryWindow, + setTimeout: globalThis.setTimeout, + clearTimeout: globalThis.clearTimeout, + refreshDelayMs: 50, + onRefreshError, + }) + + handler.handle({ type: 'terminals.changed', revision: 12 }) + await vi.advanceTimersByTimeAsync(50) + // Let the contained .catch() handlers settle. + await Promise.resolve() + await Promise.resolve() + + expect(onRefreshError).toHaveBeenCalledTimes(2) + // Each rejection is tagged with which refresh failed so a shared + // onRefreshError can log an accurate label instead of mislabeling a + // session-window failure as a terminal-directory one. + expect(onRefreshError).toHaveBeenCalledWith(refreshError, 'terminal-directory') + expect(onRefreshError).toHaveBeenCalledWith(refreshError, 'session-window') + }) })