Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/retry-sigsegv.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@trigger.dev/core": patch
---

Retry `TASK_PROCESS_SIGSEGV` task crashes under the user's retry policy instead of failing the run on the first segfault. SIGSEGV in Node tasks is frequently non-deterministic (native addon races, JIT/GC interaction, near-OOM in native code, host issues), so retrying on a fresh process often succeeds. The retry is gated by the task's existing `retry` config + `maxAttempts` — same path `TASK_PROCESS_SIGTERM` and uncaught exceptions already use — so tasks without a retry policy still fail fast.
34 changes: 33 additions & 1 deletion packages/core/src/v3/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,20 @@ export function truncateStack(stack: string | undefined): string {
].join("\n");
}

/**
* Truncates error messages that exceed MAX_MESSAGE_LENGTH to prevent OOM.
*/
export function truncateMessage(message: string | undefined): string {
if (!message) return "";
return message.length > MAX_MESSAGE_LENGTH
? message.slice(0, MAX_MESSAGE_LENGTH) + "...[truncated]"
: message;
}

/**
* Parses an unknown error into a TaskRunError structure.
* Handles InternalError, built-in Error, strings, and custom error objects.
*/
export function parseError(error: unknown): TaskRunError {
if (isInternalError(error)) {
return {
Expand Down Expand Up @@ -301,6 +308,10 @@ export function createJsonErrorObject(error: TaskRunError): SerializedError {
}

// Removes null characters and truncates oversized fields to prevent OOM
/**
* Sanitizes TaskRunError by removing null bytes and truncating long fields.
* Used to clean errors before storage or transmission.
*/
export function sanitizeError(error: TaskRunError): TaskRunError {
switch (error.type) {
case "BUILT_IN_ERROR": {
Expand Down Expand Up @@ -351,6 +362,11 @@ export function sanitizeError(error: TaskRunError): TaskRunError {
}
}

/**
* Determines whether an error should trigger a retry attempt.
* Returns true for errors that are retriable under the user's retry policy.
* Non-retriable errors (like OOM, SIGKILL_TIMEOUT) will fail the run immediately.
*/
export function shouldRetryError(error: TaskRunError): boolean {
switch (error.type) {
case "INTERNAL_ERROR": {
Expand All @@ -361,7 +377,6 @@ export function shouldRetryError(error: TaskRunError): boolean {
case "CONFIGURED_INCORRECTLY":
case "TASK_ALREADY_RUNNING":
case "TASK_PROCESS_SIGKILL_TIMEOUT":
case "TASK_PROCESS_SIGSEGV":
case "TASK_PROCESS_OOM_KILLED":
case "TASK_PROCESS_MAYBE_OOM_KILLED":
case "TASK_RUN_CANCELLED":
Expand Down Expand Up @@ -398,6 +413,7 @@ export function shouldRetryError(error: TaskRunError): boolean {
case "TASK_RUN_UNCAUGHT_EXCEPTION":
case "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE":
case "TASK_PROCESS_SIGTERM":
case "TASK_PROCESS_SIGSEGV":
return true;

default:
Expand All @@ -419,6 +435,10 @@ export function shouldRetryError(error: TaskRunError): boolean {
}
}

/**
* Checks if retry settings should be looked up for this error type.
* Some errors (like SIGSEGV, SIGTERM, uncaught exceptions) respect user retry config.
*/
export function shouldLookupRetrySettings(error: TaskRunError): boolean {
switch (error.type) {
case "INTERNAL_ERROR": {
Expand Down Expand Up @@ -448,6 +468,10 @@ export function shouldLookupRetrySettings(error: TaskRunError): boolean {
}
}

/**
* Corrects error stack traces by normalizing file paths and removing noise.
* Used to make stack traces more readable in logs and error UI.
*/
export function correctErrorStackTrace(
stackTrace: string,
projectDir?: string,
Expand Down Expand Up @@ -490,6 +514,10 @@ function correctStackTraceLine(line: string, projectDir?: string, isDev?: boolea
return line.trim();
}

/**
* Groups Zod validation issues by task index for better error reporting.
* Used when parsing task metadata fails.
*/
export function groupTaskMetadataIssuesByTask(tasks: any, issues: z.ZodIssue[]) {
return issues.reduce(
(acc, issue) => {
Expand Down Expand Up @@ -773,6 +801,10 @@ const findSignalInMessage = (message?: string, truncateLength = 100) => {
}
};

/**
* Enhances TaskRunError with additional context like signals, OOM detection.
* Used to enrich errors before displaying or logging.
*/
export function taskRunErrorEnhancer(error: TaskRunError): EnhanceError<TaskRunError> {
switch (error.type) {
case "BUILT_IN_ERROR": {
Expand Down
36 changes: 35 additions & 1 deletion packages/core/test/errors.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import { describe, it, expect } from "vitest";
import { truncateStack, truncateMessage, parseError, sanitizeError } from "../src/v3/errors.js";
import {
truncateStack,
truncateMessage,
parseError,
sanitizeError,
shouldRetryError,
shouldLookupRetrySettings,
} from "../src/v3/errors.js";
import type { TaskRunError } from "../src/v3/schemas/common.js";

// Helper: build a fake stack with N frames
function buildStack(messageLines: string[], frameCount: number): string {
Expand Down Expand Up @@ -238,3 +246,29 @@ describe("truncateStack message line bounding", () => {
expect(result).toContain("...[truncated]");
});
});

describe("shouldRetryError + shouldLookupRetrySettings", () => {
const internal = (code: string): TaskRunError =>
({ type: "INTERNAL_ERROR", code } as TaskRunError);

it("retries SIGSEGV (changed from non-retriable) and looks up retry settings", () => {
const err = internal("TASK_PROCESS_SIGSEGV");
expect(shouldRetryError(err)).toBe(true);
expect(shouldLookupRetrySettings(err)).toBe(true);
});

it("retries SIGTERM via the same path", () => {
const err = internal("TASK_PROCESS_SIGTERM");
expect(shouldRetryError(err)).toBe(true);
expect(shouldLookupRetrySettings(err)).toBe(true);
});

it("still does not retry SIGKILL timeout", () => {
expect(shouldRetryError(internal("TASK_PROCESS_SIGKILL_TIMEOUT"))).toBe(false);
});

it("still does not retry OOM kills (handled by the separate machine-bump path)", () => {
expect(shouldRetryError(internal("TASK_PROCESS_OOM_KILLED"))).toBe(false);
expect(shouldRetryError(internal("TASK_PROCESS_MAYBE_OOM_KILLED"))).toBe(false);
});
});
Loading