From 0b30150e51cea6598a43f642d98d5410d9fdce35 Mon Sep 17 00:00:00 2001 From: Samuel Pennington Date: Fri, 8 May 2026 06:09:21 +0000 Subject: [PATCH 1/2] fix(cloud-task): drop unsafe append in log-gap inconsistency path When `reconcileCloudLogGapOnce` fetched local + S3 logs and they came back with fewer entries than `expectedCount`, the fallback appended `newEntries` (the snapshot's tail slice at positions [expectedCount-N, expectedCount]) on top of `session.events` and ratcheted `processedLineCount = latestCount + newEntries.length`. The appended slice isn't contiguous with `latestCount`, so events ended up with duplicates and gaps and the count drifted past entries we never had. With cloud-task watchers reconnecting every ~2 min on `error: 'terminated'`, this stacked indefinitely and OOM'd the V8 renderer heap (16 renderer crashes observed in a single morning). Now the inconsistency path logs the warning and returns; the next snapshot/log update retries once the source has caught up. Generated-By: PostHog Code Task-Id: 3294a546-b9be-404e-ba5c-5d762cc95bd8 --- .../features/sessions/service/service.test.ts | 44 +++++-------------- .../features/sessions/service/service.ts | 16 +++---- 2 files changed, 17 insertions(+), 43 deletions(-) diff --git a/apps/code/src/renderer/features/sessions/service/service.test.ts b/apps/code/src/renderer/features/sessions/service/service.test.ts index 953c1f3df..5ffe3ebf2 100644 --- a/apps/code/src/renderer/features/sessions/service/service.test.ts +++ b/apps/code/src/renderer/features/sessions/service/service.test.ts @@ -1527,7 +1527,7 @@ describe("SessionService", () => { expect(mockSessionStoreSetters.appendEvents).not.toHaveBeenCalled(); }); - it("processes a pending cloud log gap after active reconciliation finishes", async () => { + it("queues a pending cloud log gap when stale fetches can't fill it, without appending", async () => { const service = getSessionService(); let sessionState = createMockSession({ taskRunId: "run-123", @@ -1628,38 +1628,18 @@ describe("SessionService", () => { }); resolveFirstLocalLogs(""); + // The pending request must drain after the in-flight one resolves — + // verify the second readLocalLogs call eventually happens. await vi.waitFor(() => { - expect(mockSessionStoreSetters.appendEvents).toHaveBeenCalledTimes(2); - }); - expect(mockSessionStoreSetters.appendEvents).toHaveBeenNthCalledWith( - 1, - "run-123", - [ - expect.objectContaining({ - message: expect.objectContaining({ - params: { entry: firstEntry }, - }), - }), - ], - 6, - ); - expect(mockSessionStoreSetters.appendEvents).toHaveBeenNthCalledWith( - 2, - "run-123", - [ - expect.objectContaining({ - message: expect.objectContaining({ - params: { entry: secondEntry }, - }), - }), - expect.objectContaining({ - message: expect.objectContaining({ - params: { entry: thirdEntry }, - }), - }), - ], - 8, - ); + expect(mockTrpcLogs.readLocalLogs.query).toHaveBeenCalledTimes(2); + }); + // Stale fetches can't fill the gap; we must NOT append the snapshot's + // tail slice (positions [expectedCount-N, expectedCount]) on top of an + // events array that's still at processedLineCount=5 — that path used + // to corrupt the array with duplicates/gaps and ratchet + // processedLineCount past entries we don't actually have, leading to + // unbounded growth on long-running cloud runs. + expect(mockSessionStoreSetters.appendEvents).not.toHaveBeenCalled(); }); it("flips status to connected on _posthog/run_started", async () => { const service = getSessionService(); diff --git a/apps/code/src/renderer/features/sessions/service/service.ts b/apps/code/src/renderer/features/sessions/service/service.ts index 519f444f9..a84f5135a 100644 --- a/apps/code/src/renderer/features/sessions/service/service.ts +++ b/apps/code/src/renderer/features/sessions/service/service.ts @@ -3560,23 +3560,17 @@ export class SessionService { return; } + // The fetched logs lag behind expectedCount and `newEntries` is the latest + // tail slice of the snapshot — appending it here would create duplicates + // and gaps in `session.events` (and bump processedLineCount past entries + // we don't actually have). Skip; the next snapshot/log update will retry + // once the source has caught up. log.warn("Cloud task log count inconsistency", { taskRunId, currentCount, expectedCount, entriesReceived: newEntries.length, }); - let newEvents = convertStoredEntriesToEvents(newEntries); - newEvents = this.filterSkippedPromptEvents(taskRunId, session, newEvents); - if (hasSessionPromptEvent(newEvents)) { - sessionStoreSetters.clearTailOptimisticItems(taskRunId); - } - sessionStoreSetters.appendEvents( - taskRunId, - newEvents, - latestCount + newEntries.length, - ); - this.updatePromptStateFromEvents(taskRunId, newEvents); } private createBaseSession( From 7e676a9eadeabeef82848c2f1433d98cb99038f8 Mon Sep 17 00:00:00 2001 From: Samuel Pennington Date: Fri, 8 May 2026 06:17:09 +0000 Subject: [PATCH 2/2] log fetchedCount in cloud-log inconsistency warning Make it immediately clear from a single log line how far behind the S3/local fetch was relative to expectedCount. Generated-By: PostHog Code Task-Id: 3294a546-b9be-404e-ba5c-5d762cc95bd8 --- apps/code/src/renderer/features/sessions/service/service.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/code/src/renderer/features/sessions/service/service.ts b/apps/code/src/renderer/features/sessions/service/service.ts index a84f5135a..bcfda3422 100644 --- a/apps/code/src/renderer/features/sessions/service/service.ts +++ b/apps/code/src/renderer/features/sessions/service/service.ts @@ -3569,6 +3569,7 @@ export class SessionService { taskRunId, currentCount, expectedCount, + fetchedCount: rawEntries.length, entriesReceived: newEntries.length, }); }