Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions packages/core/observability/sentry.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import { describe, expect, it } from "bun:test";
import { isBenignDeliveryFailure } from "@/core/observability/sentry";

describe("isBenignDeliveryFailure", () => {
it("flags Slack message_not_found on update as benign", () => {
expect(
isBenignDeliveryFailure({
op: "update",
error: "An API error occurred: message_not_found",
}),
).toBe(true);
});

it("flags Slack message_not_found on delete as benign", () => {
expect(
isBenignDeliveryFailure({
op: "delete",
error: "An API error occurred: message_not_found",
}),
).toBe(true);
});

it("flags Discord unknown_message on update as benign", () => {
expect(
isBenignDeliveryFailure({
op: "update",
error: "Unknown Message (unknown_message)",
}),
).toBe(true);
});

it("does not flag channel_not_found as benign (bot may have been removed)", () => {
expect(
isBenignDeliveryFailure({
op: "send",
error: "An API error occurred: channel_not_found",
}),
).toBe(false);
});

it("does not flag benign-looking errors on send", () => {
// Send failures should never be treated as benign — if a send fails,
// the user didn't get the message.
expect(
isBenignDeliveryFailure({
op: "send",
error: "message_not_found",
}),
).toBe(false);
});

it("does not flag generic auth errors as benign", () => {
expect(
isBenignDeliveryFailure({
op: "update",
error: "invalid_auth",
}),
).toBe(false);
});
});
33 changes: 33 additions & 0 deletions packages/core/observability/sentry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,17 @@ function buildFailureFingerprint(
function captureDeliveryFailure(failure: FailureRecord): void {
if (!initialized) return;

// Some delivery failures are expected race conditions, not bugs:
// - `message_not_found` on update/delete: the status message was already
// deleted or replaced (common when finalization races with teardown).
// - `unknown_message` / `not_found` on update/delete for Discord/Lark.
// Skipping these entirely keeps Sentry quota focused on real issues like
// auth/network/payload errors. We still keep them in the local
// `deliveryStats` counters for debugging.
if (!failure.rateLimited && isBenignDeliveryFailure(failure)) {
return;
}

const dedupKey = `${failure.platform}|${failure.op}|${failure.rateLimited ? "rl" : "err"}|${failure.channelId}`;
const interval = failure.rateLimited
? RATE_LIMIT_CAPTURE_INTERVAL_MS
Expand Down Expand Up @@ -197,3 +208,25 @@ function captureDeliveryFailure(failure: FailureRecord): void {
log.debug("Sentry capture failed", { error: String(hookErr) });
}
}

/**
* Error strings that represent expected races, not real bugs, for a given op.
* Keep this list narrow; when in doubt, send to Sentry rather than suppress.
*/
const BENIGN_UPDATE_DELETE_PATTERNS = [
// Slack
"message_not_found",
// Discord
"unknown_message",
// Lark (generic "not found" codes vary; substring match keeps it simple)
"not_found",
];

export function isBenignDeliveryFailure(failure: {
op: DeliveryOp;
error: string;
}): boolean {
if (failure.op !== "update" && failure.op !== "delete") return false;
const text = failure.error.toLowerCase();
return BENIGN_UPDATE_DELETE_PATTERNS.some((pattern) => text.includes(pattern));
}
41 changes: 41 additions & 0 deletions packages/shared/queue/coalesced-update-queue.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,45 @@ describe("CoalescedUpdateQueue", () => {

expect(seen).toEqual(["M1:a", "M2:b"]);
});

it("does not leak unhandled rejections when clear() drops waiting jobs", async () => {
// Reproduces ODE-DEAMON-5: Bottleneck rejects dropped jobs with
// "This limiter has been stopped." and a subsequent clear() would
// also reject with "stop() has already been called". Both must be
// swallowed internally so they don't surface as unhandled rejections.
const unhandled: unknown[] = [];
const onUnhandled = (reason: unknown) => {
unhandled.push(reason);
};
process.on("unhandledRejection", onUnhandled);

try {
const queue = new CoalescedUpdateQueue<string | undefined>(50, async (_key, payload) => {
// Simulate a slow worker so the job is still in the limiter's queue
// when clear() drops it.
await new Promise((resolve) => setTimeout(resolve, 25));
return payload;
});

// First enqueue starts running; subsequent ones wait in the queue and
// will be dropped by clear().
const p1 = queue.enqueue({ channelId: "C1", messageId: "M1" }, "a");
const p2 = queue.enqueue({ channelId: "C1", messageId: "M2" }, "b");
const p3 = queue.enqueue({ channelId: "C1", messageId: "M3" }, "c");

queue.clear();
// Idempotent clear — must not re-throw "stop() has already been called".
queue.clear();

// All pending callers receive undefined instead of hanging.
const results = await Promise.all([p1, p2, p3]);
expect(results).toEqual([undefined, undefined, undefined]);

// Let any async rejections settle before assertion.
await new Promise((resolve) => setTimeout(resolve, 50));
expect(unhandled).toEqual([]);
} finally {
process.off("unhandledRejection", onUnhandled);
}
});
});
30 changes: 23 additions & 7 deletions packages/shared/queue/coalesced-update-queue.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,29 @@ export class CoalescedUpdateQueue<TResult, TPayload = string> {
payload,
resolve,
});
void this.limiter.schedule(async () => {
const pending = this.pendingByKey.get(dedupKey);
if (!pending) return;
this.pendingByKey.delete(dedupKey);
const result = await this.worker(pending.key, pending.payload);
pending.resolve(result);
});
// Use `.catch` instead of `void` so that rejections from a stopped
// limiter (Bottleneck rejects dropped jobs with
// "This limiter has been stopped." and "stop() has already been
// called") don't leak as unhandled rejections. Any in-flight jobs
// dropped by `clear()` are already resolved via `pendingByKey` above,
// so swallowing the rejection here is safe.
this.limiter
.schedule(async () => {
const pending = this.pendingByKey.get(dedupKey);
if (!pending) return;
this.pendingByKey.delete(dedupKey);
const result = await this.worker(pending.key, pending.payload);
pending.resolve(result);
})
.catch(() => {
// Job was dropped during teardown or rejected by the worker.
// Ensure the enqueue promise still settles so callers don't hang.
const pending = this.pendingByKey.get(dedupKey);
if (pending) {
this.pendingByKey.delete(dedupKey);
pending.resolve(undefined as TResult);
}
});
});
}

Expand Down
Loading