Skip to content

Commit ede0cae

Browse files
d-csclaude
andcommitted
fix(run-engine): only count replica misses the primary can serve; skip fallback without a replica
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
1 parent a83d30a commit ede0cae

2 files changed

Lines changed: 14 additions & 11 deletions

File tree

internal-packages/run-engine/src/engine/index.ts

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ export class RunEngine {
282282
"run_engine.snapshots_since.replica_miss",
283283
{
284284
description:
285-
"getSnapshotsSince reads where the since snapshot was not yet on the read replica and the query was retried on the primary",
285+
"getSnapshotsSince reads where the since snapshot was not yet on the read replica and was served from the primary",
286286
}
287287
);
288288

@@ -1932,7 +1932,10 @@ export class RunEngine {
19321932
snapshotId: string;
19331933
tx?: PrismaClientOrTransaction;
19341934
}): Promise<RunExecutionData[] | null> {
1935-
const useReplica = !tx && this.options.readReplicaSnapshotsSinceEnabled === true;
1935+
const useReplica =
1936+
!tx &&
1937+
this.options.readReplicaSnapshotsSinceEnabled === true &&
1938+
this.readOnlyPrisma !== this.prisma;
19361939
const prisma = tx ?? (useReplica ? this.readOnlyPrisma : this.prisma);
19371940

19381941
const query = async (client: PrismaClientOrTransaction) => {
@@ -1946,15 +1949,16 @@ export class RunEngine {
19461949
if (useReplica && e instanceof ExecutionSnapshotNotFoundError) {
19471950
// Expected during replica lag: the runner learned the snapshot id from the writer
19481951
// before the replica caught up. Serve the read from the writer instead of failing
1949-
// the poll.
1950-
this.snapshotsSinceReplicaMissCounter.add(1);
1951-
this.logger.warn("getSnapshotsSince: snapshot not yet on replica, retrying on primary", {
1952-
runId,
1953-
snapshotId,
1954-
});
1955-
1952+
// the poll. Only count/warn when the writer actually has the snapshot - a permanent
1953+
// miss (bogus or pruned snapshot id) is a real error, not replica lag.
19561954
try {
1957-
return await query(this.prisma);
1955+
const result = await query(this.prisma);
1956+
this.snapshotsSinceReplicaMissCounter.add(1);
1957+
this.logger.warn("getSnapshotsSince: snapshot not yet on replica, served from primary", {
1958+
runId,
1959+
snapshotId,
1960+
});
1961+
return result;
19581962
} catch (retryError) {
19591963
this.logger.error("Failed to getSnapshotsSince", {
19601964
message: retryError instanceof Error ? retryError.message : retryError,

internal-packages/run-engine/src/engine/tests/getSnapshotsSince.test.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ import {
1515
} from "./helpers/snapshotTestHelpers.js";
1616
import { generateFriendlyId } from "@trigger.dev/core/v3/isomorphic";
1717

18-
1918
vi.setConfig({ testTimeout: 120_000 });
2019

2120
describe("RunEngine getSnapshotsSince", () => {

0 commit comments

Comments
 (0)