Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,13 @@ RUN apk add --no-cache curl
ENV NODE_ENV=production

COPY --from=build /usr/src/app ./
RUN pnpm install --frozen-lockfile
RUN pnpm install --frozen-lockfile \
# Run as the non-root `node` user (uid 1000, shipped by node:22-alpine).
# Ponder writes its cache under the workdir and pnpm reads /pnpm at runtime,
# so both must be owned by `node`.
&& chown -R node:node /usr/src/app /pnpm

USER node

HEALTHCHECK \
--start-period=24h \
Expand Down
106 changes: 89 additions & 17 deletions src/application/helpers/orderbookClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,16 @@ import { pgSchema, integer, text } from "drizzle-orm/pg-core";
import { encodeAbiParameters, keccak256, type Hex } from "viem";
import { type OrderType } from "../../utils/order-types";
import { COMPOSABLE_COW_HANDLER_ADDRESSES, ORDERBOOK_API_URLS } from "../../data";
import { BOOTSTRAP_MAX_PAGES, BOOTSTRAP_PAGE_SIZE, ORDERBOOK_HTTP_TIMEOUT_MS, SIGNING_SCHEME_EIP1271 } from "../../constants";
import {
BOOTSTRAP_MAX_PAGES,
BOOTSTRAP_PAGE_SIZE,
ORDERBOOK_HTTP_TIMEOUT_MS,
ORDERBOOK_MAX_RETRIES,
ORDERBOOK_RETRY_BASE_MS,
ORDERBOOK_RETRY_BUDGET_MS,
ORDERBOOK_RETRY_MAX_DELAY_MS,
SIGNING_SCHEME_EIP1271,
} from "../../constants";
import { decodeEip1271Signature } from "../decoders/erc1271Signature";
import { fetchWithTimeout, TimeoutError, withTimeout } from "./withTimeout";
import { log } from "./logger";
Expand Down Expand Up @@ -300,6 +309,75 @@ export async function fetchOwnerOrderStatuses(

// ─── API calls ───────────────────────────────────────────────────────────────

/**
* The orderbook API refused to answer (HTTP 429 or 5xx) after bounded retries.
* Distinct from "the API has no such order" (a UID simply absent from a 2xx
* body) so callers / dashboards can alarm on an unavailable API rather than
* silently treating it as "order not on API yet".
*/
export class OrderbookUnavailableError extends Error {
constructor(
public readonly status: number,
public readonly endpoint: string,
) {
super(`[COW:orderbook-unavailable] ${endpoint} responded ${status}`);
this.name = "OrderbookUnavailableError";
}
}

const sleep = (ms: number): Promise<void> => new Promise((resolve) => setTimeout(resolve, ms));

/** Parse a `Retry-After` header (delta-seconds or HTTP-date) into milliseconds; null if absent/unparseable. */
function parseRetryAfter(value: string | null): number | null {
if (!value) return null;
const seconds = Number(value);
if (Number.isFinite(seconds)) return Math.max(0, seconds * 1000);
const date = Date.parse(value);
if (!Number.isNaN(date)) return Math.max(0, date - Date.now());
return null;
}

/**
* `fetchWithTimeout` plus bounded retry/backoff for transient orderbook errors.
*
* Returns the Response on a 2xx. On 429 it honors `Retry-After` (capped at
* ORDERBOOK_RETRY_MAX_DELAY_MS); on 5xx it uses exponential backoff. Retries
* stop once ORDERBOOK_MAX_RETRIES is reached or the next sleep would push the
* loop past ORDERBOOK_RETRY_BUDGET_MS — at which point it throws
* OrderbookUnavailableError instead of holding the block transaction open.
* A TimeoutError from the underlying fetch propagates unchanged.
*/
async function fetchOrderbook(
url: string,
init: RequestInit | undefined,
endpoint: string,
): Promise<Response> {
let spent = 0;
for (let attempt = 0; ; attempt++) {
const response = await fetchWithTimeout(url, init, ORDERBOOK_HTTP_TIMEOUT_MS, endpoint);
if (response.ok) return response;

const retryable = response.status === 429 || response.status >= 500;
if (!retryable || attempt >= ORDERBOOK_MAX_RETRIES) {
throw new OrderbookUnavailableError(response.status, endpoint);
}

const retryAfterMs =
response.status === 429 ? parseRetryAfter(response.headers.get("retry-after")) : null;
const backoffMs = ORDERBOOK_RETRY_BASE_MS * 2 ** attempt;
const delay = Math.min(retryAfterMs ?? backoffMs, ORDERBOOK_RETRY_MAX_DELAY_MS);

// Fail fast rather than hold the block transaction open past our budget.
if (spent + delay > ORDERBOOK_RETRY_BUDGET_MS) {
throw new OrderbookUnavailableError(response.status, endpoint);
}

log("warn", "ob:retry", { endpoint, status: response.status, attempt: attempt + 1, delayMs: delay, retryAfterMs });
await sleep(delay);
spent += delay;
}
}

/** Fetch orders for an owner with pagination. maxPages limits how many pages are fetched (0 = unlimited).
* signingScheme, if provided, is appended as a query param — the API filters server-side when supported,
* reducing payload for owners with many ECDSA orders mixed with composable ones.
Expand All @@ -321,23 +399,18 @@ async function fetchAccountOrders(
if (signingScheme) params.set("signingScheme", signingScheme);
const url = `${apiBaseUrl}/api/v1/account/${owner}/orders?${params.toString()}`;
try {
const response = await fetchWithTimeout(
url,
undefined,
ORDERBOOK_HTTP_TIMEOUT_MS,
"ob:account",
);
if (!response.ok) {
log("warn", "ob:accountError", { status: response.status, owner });
break;
}
const response = await fetchOrderbook(url, undefined, "ob:account");
const page = (await response.json()) as OrderbookOrder[];
allOrders.push(...page);
pagesFetched++;
if (page.length < pageSize) break; // last page
if (maxPages > 0 && pagesFetched >= maxPages) break; // page cap reached
offset += page.length;
} catch (err) {
if (err instanceof OrderbookUnavailableError) {
log("error", "ob:unavailable", { endpoint: "ob:account", status: err.status, owner });
break;
}
if (err instanceof TimeoutError) {
log("warn", "ob:accountTimeout", { owner, offset, after: ORDERBOOK_HTTP_TIMEOUT_MS });
break;
Expand Down Expand Up @@ -368,23 +441,22 @@ async function fetchOrdersByUids(
const chunkResults = await Promise.all(
chunks.map(async (chunk, idx) => {
try {
const response = await fetchWithTimeout(
const response = await fetchOrderbook(
url,
{
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(chunk),
},
ORDERBOOK_HTTP_TIMEOUT_MS,
"ob:byUids",
);
if (!response.ok) {
log("warn", "ob:batchFetchError", { status: response.status, uids: chunk.length, offset: idx * BATCH_SIZE });
return [] as OrderbookOrder[];
}
const raw = (await response.json()) as { order: OrderbookOrder }[];
return raw.flatMap((item) => (item?.order != null ? [item.order] : []));
} catch (err) {
if (err instanceof OrderbookUnavailableError) {
log("error", "ob:unavailable", { endpoint: "ob:byUids", status: err.status, uids: chunk.length, offset: idx * BATCH_SIZE });
return [] as OrderbookOrder[];
}
if (err instanceof TimeoutError) {
log("warn", "ob:batchFetchTimeout", { uids: chunk.length, offset: idx * BATCH_SIZE, after: ORDERBOOK_HTTP_TIMEOUT_MS });
return [] as OrderbookOrder[];
Expand Down
16 changes: 16 additions & 0 deletions src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,22 @@ export const DETERMINISTIC_CANCEL_SWEEP_INTERVAL = 100n;
*/
export const ORDERBOOK_HTTP_TIMEOUT_MS = 10_000;

/**
* Bounded retry for transient orderbook failures (HTTP 429 / 5xx).
*
* These calls run inside Ponder block handlers that hold a DB transaction open,
* so the retry loop must stay short — we cannot honor a large `Retry-After` by
* sleeping (Postgres would terminate the connection). The loop adds at most
* ORDERBOOK_RETRY_BUDGET_MS of wall-clock; if a `Retry-After` (or backoff) would
* exceed the budget, we fail fast and let the next poll (~ORDERBOOK_POLL_INTERVAL
* blocks later) retry naturally — but the failure is logged as a rate-limit/
* server error, not as "order not on API yet".
*/
export const ORDERBOOK_MAX_RETRIES = 2; // ≤ 3 attempts total
export const ORDERBOOK_RETRY_BASE_MS = 250; // exponential backoff base
export const ORDERBOOK_RETRY_MAX_DELAY_MS = 2_000; // cap on a single sleep (incl. Retry-After)
export const ORDERBOOK_RETRY_BUDGET_MS = 4_000; // total wall-clock the retry loop may add

/**
* Hard wall-clock cap for a block handler's aggregate `context.client.multicall`
* call (OrderDiscoveryPoller, CancellationWatcher). viem has no per-call signal; the timer races the promise and
Expand Down
76 changes: 76 additions & 0 deletions tests/helpers/orderbookClient.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ vi.mock("ponder", () => ({
}));

import * as data from "../../src/data";
import { ORDERBOOK_MAX_RETRIES } from "../../src/constants";
import { fetchOrderStatusByUids, fetchOwnerOrderStatuses } from "../../src/application/helpers/orderbookClient";

// ─── Helpers ─────────────────────────────────────────────────────────────────
Expand Down Expand Up @@ -211,6 +212,81 @@ describe("fetchOrderStatusByUids", () => {
});
});

// ─── Resilience: 429 / 5xx handling ───────────────────────────────────────────

/** Capture structured `log()` output — the logger writes warn/error as JSON via console.error. */
function captureErrorLogs() {
const lines: Record<string, unknown>[] = [];
const spy = vi.spyOn(console, "error").mockImplementation((line: unknown) => {
try {
lines.push(JSON.parse(String(line)));
} catch {
/* non-JSON line — ignore */
}
});
return {
has: (msg: string) => lines.some((l) => l.msg === msg),
find: (msg: string) => lines.find((l) => l.msg === msg),
restore: () => spy.mockRestore(),
};
}

describe("orderbook resilience (429 / 5xx)", () => {
beforeAll(() => {
data.ORDERBOOK_API_URLS[TEST_CHAIN_ID] = "http://placeholder";
});

afterAll(() => {
delete (data.ORDERBOOK_API_URLS as Record<number, string | undefined>)[TEST_CHAIN_ID];
});

it("retries a 429 (honoring Retry-After) and succeeds on a later attempt", async () => {
let calls = 0;
const { url, close } = await startServer((_req, res) => {
calls++;
if (calls === 1) {
res.writeHead(429, { "retry-after": "0", "content-type": "application/json" });
res.end(JSON.stringify({ message: "rate limited" }));
return;
}
res.writeHead(200, { "content-type": "application/json" });
res.end(JSON.stringify([makeWrappedOrder(UID_A, "fulfilled")]));
});
data.ORDERBOOK_API_URLS[TEST_CHAIN_ID] = url;
const logs = captureErrorLogs();
try {
const result = await fetchOrderStatusByUids(makeContext(), TEST_CHAIN_ID, [UID_A]);
expect(calls).toBe(2);
expect(result.get(UID_A)?.status).toBe("fulfilled");
expect(logs.has("ob:unavailable")).toBe(false);
} finally {
logs.restore();
await close();
}
});

it("classifies a persistent 429 as ob:unavailable and stops after bounded retries", async () => {
let calls = 0;
const { url, close } = await startServer((_req, res) => {
calls++;
res.writeHead(429, { "retry-after": "0", "content-type": "application/json" });
res.end(JSON.stringify({ message: "rate limited" }));
});
data.ORDERBOOK_API_URLS[TEST_CHAIN_ID] = url;
const logs = captureErrorLogs();
try {
const result = await fetchOrderStatusByUids(makeContext(), TEST_CHAIN_ID, [UID_A]);
expect(calls).toBe(ORDERBOOK_MAX_RETRIES + 1); // bounded: 1 initial + retries
expect(result.has(UID_A)).toBe(false); // absent from map…
expect(logs.find("ob:unavailable")?.status).toBe(429); // …but the cause is logged distinctly
} finally {
logs.restore();
await close();
}
});

});

// ─── fetchOwnerOrderStatuses tests ────────────────────────────────────────────

const FAKE_OWNER = "0xaabbccddEEff0011223344556677889900aabbcc" as Hex;
Expand Down
Loading