From 8dcebcbff2588ee4cf1a0e5f741ffa062d03af21 Mon Sep 17 00:00:00 2001 From: djstrong Date: Mon, 20 Apr 2026 22:49:32 +0200 Subject: [PATCH 01/11] feat(ensrainbow): implement background database bootstrapping and new readiness endpoint - ENSRainbow now starts its HTTP server immediately, allowing for non-blocking container startup. - Introduced a new `GET /ready` endpoint to indicate database readiness, returning `200 { status: "ok" }` once ready, and `503 Service Unavailable` during bootstrap. - Updated API routes to return structured `ServiceUnavailableError` responses while the database is bootstrapping. - Replaced the previous Docker entrypoint script with a new command that handles the database download and validation in the background. - Enhanced the ENSIndexer to wait for the new `/ready` endpoint instead of `/health`. - Migration note: switch from polling `GET /health` to `GET /ready` for database readiness checks. --- .changeset/ready-endpoint-bg-bootstrap.md | 15 + .../src/lib/ensrainbow/singleton.ts | 10 +- apps/ensrainbow/Dockerfile | 26 +- apps/ensrainbow/package.json | 1 + apps/ensrainbow/scripts/entrypoint.sh | 158 ---------- apps/ensrainbow/src/cli.ts | 84 ++++- .../src/commands/entrypoint-command.test.ts | 106 +++++++ .../src/commands/entrypoint-command.ts | 286 ++++++++++++++++++ .../src/commands/server-command.test.ts | 110 ++++++- .../ensrainbow/src/commands/server-command.ts | 2 +- apps/ensrainbow/src/lib/api.ts | 68 ++++- apps/ensrainbow/src/lib/server.ts | 111 ++++++- .../src/content/docs/ensrainbow/usage/api.mdx | 48 ++- packages/ensrainbow-sdk/src/client.test.ts | 30 ++ packages/ensrainbow-sdk/src/client.ts | 88 +++++- packages/ensrainbow-sdk/src/consts.ts | 1 + 16 files changed, 940 insertions(+), 204 deletions(-) create mode 100644 .changeset/ready-endpoint-bg-bootstrap.md delete mode 100644 apps/ensrainbow/scripts/entrypoint.sh create mode 100644 apps/ensrainbow/src/commands/entrypoint-command.test.ts create mode 100644 apps/ensrainbow/src/commands/entrypoint-command.ts diff --git a/.changeset/ready-endpoint-bg-bootstrap.md b/.changeset/ready-endpoint-bg-bootstrap.md new file mode 100644 index 0000000000..02e5585c2c --- /dev/null +++ b/.changeset/ready-endpoint-bg-bootstrap.md @@ -0,0 +1,15 @@ +--- +"ensrainbow": minor +"@ensnode/ensrainbow-sdk": minor +"ensindexer": patch +--- + +ENSRainbow now starts its HTTP server immediately and downloads/validates its database in the background, instead of blocking container startup behind a netcat placeholder. + +- **New `GET /ready` endpoint**: returns `200 { status: "ok" }` once the database is attached, or `503 Service Unavailable` while ENSRainbow is still bootstrapping. `/health` is now a pure liveness probe that succeeds as soon as the HTTP server is listening. +- **503 responses for API routes during bootstrap**: `/v1/heal`, `/v1/labels/count`, `/v1/config`, and `/v1/version` return a structured `ServiceUnavailableError` (`errorCode: 503`) until the database is ready. +- **New Docker entrypoint**: the container now runs `pnpm --filter ensrainbow run entrypoint` (implemented in Node via `tsx src/cli.ts entrypoint`), which replaces `scripts/entrypoint.sh` and the `netcat` workaround. +- **SDK client**: added `EnsRainbowApiClient.ready()`, plus `EnsRainbow.ReadyResponse` / `EnsRainbow.ServiceUnavailableError` types and `ErrorCode.ServiceUnavailable`. +- **ENSIndexer**: `waitForEnsRainbowToBeReady` now polls `/ready` (via `ensRainbowClient.ready()`) instead of `/health`, so it correctly waits for the database to finish bootstrapping. + +**Migration**: if you previously polled `GET /health` to gate traffic on database readiness, switch to `GET /ready` (or `client.ready()`). `/health` is still available and still returns `200`, but it now indicates liveness only. diff --git a/apps/ensindexer/src/lib/ensrainbow/singleton.ts b/apps/ensindexer/src/lib/ensrainbow/singleton.ts index c6785560c0..11ac5a197f 100644 --- a/apps/ensindexer/src/lib/ensrainbow/singleton.ts +++ b/apps/ensindexer/src/lib/ensrainbow/singleton.ts @@ -56,18 +56,18 @@ export function waitForEnsRainbowToBeReady(): Promise { ensRainbowInstance: ensRainbowUrl.href, }); - waitForEnsRainbowToBeReadyPromise = pRetry(async () => ensRainbowClient.health(), { + waitForEnsRainbowToBeReadyPromise = pRetry(async () => ensRainbowClient.ready(), { retries: 60, // This allows for a total of over 1 hour of retries with 1 minute between attempts. minTimeout: secondsToMilliseconds(60), maxTimeout: secondsToMilliseconds(60), onFailedAttempt: ({ error, attemptNumber, retriesLeft }) => { logger.warn({ - msg: `ENSRainbow health check failed`, + msg: `ENSRainbow readiness check failed`, attempt: attemptNumber, retriesLeft, error: retriesLeft === 0 ? error : undefined, ensRainbowInstance: ensRainbowUrl.href, - advice: `This might be due to ENSRainbow having a cold start, which can take 30+ minutes.`, + advice: `This might be due to ENSRainbow still bootstrapping its database, which can take 30+ minutes during a cold start.`, }); }, }) @@ -81,12 +81,12 @@ export function waitForEnsRainbowToBeReady(): Promise { const errorMessage = error instanceof Error ? error.message : "Unknown error"; logger.error({ - msg: `ENSRainbow health check failed after multiple attempts`, + msg: `ENSRainbow readiness check failed after multiple attempts`, error, ensRainbowInstance: ensRainbowUrl.href, }); - // Throw the error to terminate the ENSIndexer process due to the failed health check of a critical dependency + // Throw the error to terminate the ENSIndexer process due to the failed readiness check of a critical dependency throw new Error(errorMessage, { cause: error instanceof Error ? error : undefined, }); diff --git a/apps/ensrainbow/Dockerfile b/apps/ensrainbow/Dockerfile index b4b4736c7f..bd0f719832 100644 --- a/apps/ensrainbow/Dockerfile +++ b/apps/ensrainbow/Dockerfile @@ -1,14 +1,10 @@ # Runtime image for ENSRainbow FROM node:24-slim AS runtime -# Install only essential system dependencies for runtime -# netcat-openbsd: Used during container initialization to keep the service port open -# while the database is being downloaded and validated (which can take up to 20 minutes). -# Without a listener on the port during this phase, Render's health checks fail and orchestration -# systems may mark the container as unhealthy or restart it prematurely. See scripts/entrypoint.sh for implementation details. -# Note: The netcat listener only keeps the port open and accepts connections; it does not respond -# to HTTP requests, so it will not work with Docker HEALTHCHECK commands that expect HTTP responses. See https://github.com/namehash/ensnode/issues/1610 -RUN apt-get update && apt-get install -y wget tar netcat-openbsd && rm -rf /var/lib/apt/lists/* +# Install only essential system dependencies for runtime. +# `wget` and `tar` are required by scripts/download-prebuilt-database.sh, which the in-process +# entrypoint spawns to fetch the pre-built database archive. +RUN apt-get update && apt-get install -y wget tar && rm -rf /var/lib/apt/lists/* # Set up pnpm ENV PNPM_HOME="/pnpm" @@ -34,16 +30,18 @@ COPY apps/ensrainbow/tsconfig.json apps/ensrainbow/ COPY apps/ensrainbow/vitest.config.ts apps/ensrainbow/ # Make scripts executable -RUN chmod +x /app/apps/ensrainbow/scripts/entrypoint.sh RUN chmod +x /app/apps/ensrainbow/scripts/download-prebuilt-database.sh # Set environment variables ENV NODE_ENV=production -# PORT will be used by entrypoint.sh, defaulting to 3223 if not set at runtime -# DB_SCHEMA_VERSION, LABEL_SET_ID, LABEL_SET_VERSION must be provided at runtime to the entrypoint +# PORT is consumed by the entrypoint command, defaulting to 3223 if not set at runtime. +# DB_SCHEMA_VERSION, LABEL_SET_ID, LABEL_SET_VERSION must be provided at runtime to the entrypoint. -# Default port, can be overridden by PORT env var for the entrypoint/serve command +# Default port, can be overridden by PORT env var for the entrypoint command EXPOSE 3223 -# Set the entrypoint -ENTRYPOINT ["/app/apps/ensrainbow/scripts/entrypoint.sh"] +# The entrypoint binds the HTTP server immediately (so /health and /ready respond while the +# database is still being downloaded) and runs download + validation in the background. +# See src/commands/entrypoint-command.ts for implementation details. +WORKDIR /app/apps/ensrainbow +ENTRYPOINT ["pnpm", "run", "entrypoint"] diff --git a/apps/ensrainbow/package.json b/apps/ensrainbow/package.json index 0dd0f7df6c..8e318e123b 100644 --- a/apps/ensrainbow/package.json +++ b/apps/ensrainbow/package.json @@ -13,6 +13,7 @@ "homepage": "https://github.com/namehash/ensnode/tree/main/apps/ensrainbow", "scripts": { "serve": "tsx src/cli.ts serve", + "entrypoint": "tsx src/cli.ts entrypoint", "ingest": "tsx src/cli.ts ingest", "ingest-ensrainbow": "tsx src/cli.ts ingest-ensrainbow", "validate": "tsx src/cli.ts validate", diff --git a/apps/ensrainbow/scripts/entrypoint.sh b/apps/ensrainbow/scripts/entrypoint.sh deleted file mode 100644 index 8bcc767ef0..0000000000 --- a/apps/ensrainbow/scripts/entrypoint.sh +++ /dev/null @@ -1,158 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# Default values (can be overridden by environment variables) -DB_SCHEMA_VERSION="${DB_SCHEMA_VERSION:-}" -LABEL_SET_ID="${LABEL_SET_ID:-}" -LABEL_SET_VERSION="${LABEL_SET_VERSION:-}" -PORT="${PORT:-3223}" -DATA_DIR_NAME="data" # Name of the data directory within /app/apps/ensrainbow -APP_DIR="/app/apps/ensrainbow" -FINAL_DATA_DIR="${APP_DIR}/${DATA_DIR_NAME}" -DOWNLOAD_TEMP_DIR="${DOWNLOAD_TEMP_DIR:-/tmp/ensrainbow_download_temp}" -MARKER_FILE="${FINAL_DATA_DIR}/ensrainbow_db_ready" - -# Path for the data subdirectory, relative to APP_DIR. -# This assumes data is in ${APP_DIR}/${DATA_DIR_NAME}/data-${LABEL_SET_ID}/ -DB_SUBDIR_PATH="${DATA_DIR_NAME}/data-${LABEL_SET_ID}_${LABEL_SET_VERSION}" - -# Ensure required variables for download are set if we might download -if [ ! -f "${MARKER_FILE}" ]; then - if [ -z "$DB_SCHEMA_VERSION" ] || [ -z "$LABEL_SET_ID" ] || [ -z "$LABEL_SET_VERSION" ]; then - echo "Error: DB_SCHEMA_VERSION, LABEL_SET_ID, and LABEL_SET_VERSION environment variables must be set for initial ENSRainbow database download." - exit 1 - fi -fi - -echo "ENSRainbow Startup Script" -echo "-------------------------" -echo "Database Schema Version: $DB_SCHEMA_VERSION" -echo "Label Set ID: $LABEL_SET_ID" -echo "Label Set Version: $LABEL_SET_VERSION" -echo "Target Port: $PORT" -echo "Application Directory: $APP_DIR" -echo "Final Data Directory: $FINAL_DATA_DIR" -echo "Marker File: $MARKER_FILE" -echo "Database Directory: $DB_SUBDIR_PATH" -echo "-------------------------" - -# Start netcat listener in background -# See Dockerfile for documentation on why netcat is used during initialization -echo "Starting netcat listener on port ${PORT}..." -nc -lk -p "${PORT}" & -NC_PID=$! - -# Function to cleanup netcat on exit -cleanup_nc() { - if [ -n "${NC_PID:-}" ] && kill -0 "${NC_PID}" 2>/dev/null; then - echo "Killing netcat listener (PID: ${NC_PID})..." - kill "${NC_PID}" 2>/dev/null || true - wait "${NC_PID}" 2>/dev/null || true - fi -} - -# Register cleanup function to run on script exit -trap cleanup_nc EXIT - -# Change to the application directory for pnpm commands -cd "${APP_DIR}" - -# Check if data directory and marker file exist and if data is valid -if [ -d "${FINAL_DATA_DIR}" ] && [ -f "${MARKER_FILE}" ]; then - echo "Existing data directory and marker file found at ${FINAL_DATA_DIR}." - echo "Running database validation (lite) on existing data in ${DB_SUBDIR_PATH}..." - if pnpm run validate:lite --data-dir "${DB_SUBDIR_PATH}"; then - echo "Existing database is valid. Skipping download and extraction." - else - echo "Existing database validation failed. Will attempt to re-download." - echo "Cleaning up existing data directory before re-download..." - rm -rf "${FINAL_DATA_DIR}/"* # Remove potentially corrupt data - # The marker file is implicitly removed with FINAL_DATA_DIR - fi -fi - -# If marker file doesn't exist (meaning data is not ready or was cleared) -if [ ! -f "${MARKER_FILE}" ]; then - echo "Database not found or not ready. Proceeding with download and extraction." - - # 1. Ensure required variables for download are set (double check, crucial if logic path leads here) - if [ -z "$DB_SCHEMA_VERSION" ] || [ -z "$LABEL_SET_ID" ] || [ -z "$LABEL_SET_VERSION" ]; then - echo "Critical Error: DB_SCHEMA_VERSION, LABEL_SET_ID, and LABEL_SET_VERSION must be set to download the database." - exit 1 - fi - - # 2. Clean up any existing data and prepare directories - echo "Preparing directories for download..." - rm -rf "${FINAL_DATA_DIR}"/* # Ensure clean state if previous attempt failed mid-way - mkdir -p "${FINAL_DATA_DIR}" - rm -rf "${DOWNLOAD_TEMP_DIR}" # Clean up temp dir from previous runs if any - mkdir -p "${DOWNLOAD_TEMP_DIR}" - - # 3. Download the database archive - echo "Downloading pre-built ENSRainbowdatabase from labelset server (Schema: $DB_SCHEMA_VERSION, Label Set ID: - $LABEL_SET_ID, Label Set Version: $LABEL_SET_VERSION)..." - if ! OUT_DIR="${DOWNLOAD_TEMP_DIR}" \ - ENSRAINBOW_LABELSET_SERVER_URL="${ENSRAINBOW_LABELSET_SERVER_URL:-}" \ - "${APP_DIR}/scripts/download-prebuilt-database.sh" "$DB_SCHEMA_VERSION" "$LABEL_SET_ID" "$LABEL_SET_VERSION"; then - echo "Error: Failed to download database." - ls -R "${DOWNLOAD_TEMP_DIR}" # List contents for debugging - rm -rf "${DOWNLOAD_TEMP_DIR}" - exit 1 - fi - - DB_ARCHIVE_BASENAME="${LABEL_SET_ID}_${LABEL_SET_VERSION}.tgz" - DB_ARCHIVE_PATH="${DOWNLOAD_TEMP_DIR}/databases/${DB_SCHEMA_VERSION}/${DB_ARCHIVE_BASENAME}" - - if [ ! -f "$DB_ARCHIVE_PATH" ]; then - echo "Error: Expected database archive file not found at $DB_ARCHIVE_PATH after download attempt." - ls -R "${DOWNLOAD_TEMP_DIR}" - rm -rf "${DOWNLOAD_TEMP_DIR}" - exit 1 - fi - echo "Database archive downloaded to: $DB_ARCHIVE_PATH" - - # 4. Extract the database archive - echo "Extracting database archive..." - if ! tar -xzf "${DB_ARCHIVE_PATH}" -C "${FINAL_DATA_DIR}" --strip-components=1; then - echo "Error: Failed to extract database archive." - rm -f "${DB_ARCHIVE_PATH}" - rm -rf "${DOWNLOAD_TEMP_DIR}" - exit 1 - fi - echo "Database archive extracted to ${FINAL_DATA_DIR}" - - # 5. Clean up downloaded archive and temporary directory - echo "Cleaning up downloaded files..." - rm -f "${DB_ARCHIVE_PATH}" - rm -rf "${DOWNLOAD_TEMP_DIR}" - echo "Cleanup complete." - - # 6. Validate the newly extracted database - echo "Running database validation (lite) on newly extracted data in ${DB_SUBDIR_PATH}..." - if pnpm run validate:lite --data-dir "${DB_SUBDIR_PATH}"; then - echo "Newly extracted database is valid." - # Create marker file upon successful download, extraction, and validation - echo "Creating marker file: ${MARKER_FILE}" - touch "${MARKER_FILE}" - else - echo "Error: Newly extracted database validation failed! Data may be corrupted." - echo "Please check logs and database archive source. The marker file will not be created." - # Depending on policy, you might want to exit 1 here or clean up FINAL_DATA_DIR - exit 1 # Exit if validation fails to prevent running with bad data - fi -fi # End of download and extraction block - -# 7. Kill netcat before starting the server -echo "Stopping netcat listener before starting server..." -# Remove the EXIT trap before manual cleanup to avoid double cleanup on signals. -trap - EXIT -cleanup_nc -# Clear the PID now that we've manually cleaned up -NC_PID="" - -# 8. Start the ENSRainbow server -echo "Starting ENSRainbow server on port ${PORT} using data from ${APP_DIR}/${DB_SUBDIR_PATH}..." -echo "Sleeping for 2 seconds to allow netcat to release the port..." -sleep 2 -# pnpm commands were run from APP_DIR, ensure serve also sees --data-dir correctly -exec pnpm run serve --port "${PORT}" --data-dir "${DB_SUBDIR_PATH}" diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts index 1ac7c08e57..99a2873ee5 100644 --- a/apps/ensrainbow/src/cli.ts +++ b/apps/ensrainbow/src/cli.ts @@ -11,11 +11,12 @@ import type { ArgumentsCamelCase, Argv } from "yargs"; import { hideBin } from "yargs/helpers"; import yargs from "yargs/yargs"; -import { buildLabelSetId } from "@ensnode/ensnode-sdk"; +import { buildLabelSetId, buildLabelSetVersion } from "@ensnode/ensnode-sdk"; import { PortNumberSchema } from "@ensnode/ensnode-sdk/internal"; import { type ConvertSqlCommandCliArgs, convertCommand } from "@/commands/convert-command-sql"; import { type ConvertCsvCommandCliArgs, convertCsvCommand } from "@/commands/convert-csv-command"; +import { entrypointCommand } from "@/commands/entrypoint-command"; import { type IngestProtobufCommandCliArgs, ingestProtobufCommand, @@ -28,6 +29,22 @@ export interface CLIOptions { exitProcess?: boolean; } +/** + * yargs-parsed argument shape for the `entrypoint` command. + * + * `label-set-id` and `label-set-version` are coerced to their branded types via + * `buildLabelSetId` / `buildLabelSetVersion`, so the CLI layer works with primitive types + * and hands branded values to {@link entrypointCommand}. + */ +interface EntrypointCommandCliArgs { + port: number; + "data-dir": string; + "db-schema-version": number; + "label-set-id": string; + "label-set-version": number; + "download-temp-dir"?: string; +} + export function createCLI(options: CLIOptions = {}) { const { exitProcess = true } = options; @@ -111,6 +128,71 @@ export function createCLI(options: CLIOptions = {}) { await serverCommand(serveCommandConfig); }, ) + .command( + "entrypoint", + "Start the ENS Rainbow API server immediately and bootstrap the database in the background", + (yargs: Argv) => { + return yargs + .option("port", { + type: "number", + description: "Port to listen on (overrides PORT env var if both are set)", + default: envConfig.port, + coerce: (port: number) => { + const result = PortNumberSchema.safeParse(port); + if (!result.success) { + const firstError = result.error.issues[0]; + throw new Error(`Invalid port: ${firstError?.message ?? "invalid port number"}`); + } + return result.data; + }, + }) + .option("data-dir", { + type: "string", + description: "Directory containing LevelDB data", + default: envConfig.dataDir, + }) + .option("db-schema-version", { + type: "number", + description: + "Expected database schema version (falls back to DB_SCHEMA_VERSION env var)", + default: envConfig.dbSchemaVersion, + }) + .option("label-set-id", { + type: "string", + description: "Label set id to download (falls back to LABEL_SET_ID env var)", + default: process.env.LABEL_SET_ID, + demandOption: !process.env.LABEL_SET_ID, + }) + .coerce("label-set-id", buildLabelSetId) + .option("label-set-version", { + type: "string", + description: + "Label set version to download (falls back to LABEL_SET_VERSION env var)", + default: process.env.LABEL_SET_VERSION, + demandOption: !process.env.LABEL_SET_VERSION, + }) + .coerce("label-set-version", buildLabelSetVersion) + .option("download-temp-dir", { + type: "string", + description: + "Temporary directory used to stage downloaded archives before extraction " + + "(defaults to /.download-temp)", + default: process.env.DOWNLOAD_TEMP_DIR, + }); + }, + async (argv: ArgumentsCamelCase) => { + const dataDir = parseDataDirFromCli(argv["data-dir"]); + await entrypointCommand({ + port: argv.port, + dataDir, + dbSchemaVersion: argv["db-schema-version"], + labelSetId: argv["label-set-id"], + labelSetVersion: argv["label-set-version"], + downloadTempDir: argv["download-temp-dir"], + labelsetServerUrl: process.env.ENSRAINBOW_LABELSET_SERVER_URL, + }); + }, + ) .command( "validate", "Validate the integrity of the LevelDB database", diff --git a/apps/ensrainbow/src/commands/entrypoint-command.test.ts b/apps/ensrainbow/src/commands/entrypoint-command.test.ts new file mode 100644 index 0000000000..45b4ec8467 --- /dev/null +++ b/apps/ensrainbow/src/commands/entrypoint-command.test.ts @@ -0,0 +1,106 @@ +import { existsSync } from "node:fs"; +import { mkdir, rm, writeFile } from "node:fs/promises"; +import { join, resolve } from "node:path"; + +import { buildLabelSetId, buildLabelSetVersion } from "@ensnode/ensnode-sdk"; +import type { EnsRainbow } from "@ensnode/ensrainbow-sdk"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; + +import type { AbsolutePath, DbSchemaVersion } from "@/config/types"; +import { DB_SCHEMA_VERSION, ENSRainbowDB } from "@/lib/database"; +import { DbNotReadyError, ENSRainbowServer } from "@/lib/server"; + +import { + DB_READY_MARKER_FILENAME, + type EntrypointCommandHandle, + entrypointCommand, +} from "./entrypoint-command"; + +/** + * These tests exercise the idempotent bootstrap path of the entrypoint command, where the marker + * file and a valid on-disk database already exist. We do not exercise the actual download script + * here (it requires network + a labelset server). + */ +describe("entrypointCommand (existing DB on disk)", () => { + const testDataDir = resolve(process.cwd(), "test-data-entrypoint"); + const labelSetId = buildLabelSetId("entrypoint-test"); + const labelSetVersion = buildLabelSetVersion(0); + const dbSubdir = join(testDataDir, `data-${labelSetId}_${labelSetVersion}`); + const markerFile = join(testDataDir, DB_READY_MARKER_FILENAME); + const port = 3226; + const endpoint = `http://localhost:${port}`; + + let handle: EntrypointCommandHandle | undefined; + + beforeEach(async () => { + await rm(testDataDir, { recursive: true, force: true }); + await mkdir(testDataDir, { recursive: true }); + + // Seed a valid-looking database and marker so the entrypoint skips the download step. + const db = await ENSRainbowDB.create(dbSubdir); + await db.setPrecalculatedRainbowRecordCount(0); + await db.markIngestionFinished(); + await db.setLabelSetId(labelSetId); + await db.setHighestLabelSetVersion(labelSetVersion); + await db.close(); + + await writeFile(markerFile, ""); + }); + + afterEach(async () => { + if (handle) { + await handle.close(); + handle = undefined; + } + await rm(testDataDir, { recursive: true, force: true }); + }); + + it("starts the HTTP server immediately and marks /ready after attaching the existing DB", async () => { + handle = await entrypointCommand({ + port, + dataDir: testDataDir as AbsolutePath, + dbSchemaVersion: DB_SCHEMA_VERSION as DbSchemaVersion, + labelSetId, + labelSetVersion, + registerSignalHandlers: false, + }); + + // /health should respond as soon as entrypointCommand returns (HTTP server is already bound). + const healthRes = await fetch(`${endpoint}/health`); + expect(healthRes.status).toBe(200); + const healthData = (await healthRes.json()) as EnsRainbow.HealthResponse; + expect(healthData).toEqual({ status: "ok" }); + const readyRes = await fetch(`${endpoint}/ready`); + expect(readyRes.status).toBe(503); + + await handle.bootstrapComplete; + + const readyRes2 = await fetch(`${endpoint}/ready`); + expect(readyRes2.status).toBe(200); + + const configRes = await fetch(`${endpoint}/v1/config`); + expect(configRes.status).toBe(200); + const configData = (await configRes.json()) as EnsRainbow.ENSRainbowPublicConfig; + expect(configData.labelSet.labelSetId).toBe(labelSetId); + expect(configData.recordsCount).toBe(0); + + // Marker should still be present after a successful idempotent attach. + expect(existsSync(markerFile)).toBe(true); + }); +}); + +describe("ENSRainbowServer (pending state smoke test)", () => { + it("createPending returns a server with isReady() === false and heal throwing DbNotReadyError", async () => { + const server = ENSRainbowServer.createPending(); + + expect(server.isReady()).toBe(false); + expect(server.serverLabelSet).toBeUndefined(); + + await expect( + server.heal("0x0000000000000000000000000000000000000000000000000000000000000000", { + labelSetId: undefined, + }), + ).rejects.toBeInstanceOf(DbNotReadyError); + }); +}); + diff --git a/apps/ensrainbow/src/commands/entrypoint-command.ts b/apps/ensrainbow/src/commands/entrypoint-command.ts new file mode 100644 index 0000000000..3351678130 --- /dev/null +++ b/apps/ensrainbow/src/commands/entrypoint-command.ts @@ -0,0 +1,286 @@ +import { spawn } from "node:child_process"; +import { existsSync, mkdirSync, rmSync } from "node:fs"; +import { writeFile } from "node:fs/promises"; +import { dirname, join, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { serve } from "@hono/node-server"; + +import { stringifyConfig } from "@ensnode/ensnode-sdk/internal"; +import type { EnsRainbow } from "@ensnode/ensrainbow-sdk"; + +import { buildEnsRainbowPublicConfig } from "@/config/public"; +import type { AbsolutePath, DbSchemaVersion } from "@/config/types"; +import { createApi } from "@/lib/api"; +import { ENSRainbowDB } from "@/lib/database"; +import { buildDbConfig, ENSRainbowServer } from "@/lib/server"; +import { logger } from "@/utils/logger"; + +export interface EntrypointCommandOptions { + port: number; + dataDir: AbsolutePath; + dbSchemaVersion: DbSchemaVersion; + labelSetId: string; + labelSetVersion: number; + /** + * Temporary directory used to stage downloaded archives before extraction. + * Defaults to `/.download-temp`. + */ + downloadTempDir?: string; + /** + * Labelset server URL override. If unset, the download script uses its default. + */ + labelsetServerUrl?: string | undefined; + /** + * Whether to register SIGTERM/SIGINT shutdown handlers. Defaults to `true`. + * Tests should pass `false` to avoid leaking handlers across cases. + */ + registerSignalHandlers?: boolean; +} + +/** + * Handle returned by {@link entrypointCommand} so callers (tests, embeddings) can close the + * HTTP server and DB, and observe when the background bootstrap pipeline has finished. + */ +export interface EntrypointCommandHandle { + /** + * Resolves once the background bootstrap has completed successfully. Never rejects: on + * failure the process is terminated via `process.exit(1)` (or the caller's exit-like hook). + */ + readonly bootstrapComplete: Promise; + /** + * Closes the HTTP server and attached database. + */ + close(): Promise; +} + +/** + * Name of the marker file written to `dataDir` once the database has been successfully + * downloaded, extracted, and validated. Matches the name used by the legacy `entrypoint.sh` + * so existing volumes remain compatible. + */ +export const DB_READY_MARKER_FILENAME = "ensrainbow_db_ready"; + +/** + * Runs the full ENSRainbow container lifecycle: + * + * 1. Starts the Hono HTTP server immediately with an unattached database (so `/health` and + * `/ready` respond right away, and orchestrator health checks don't kill the container). + * 2. Schedules a background bootstrap via `setTimeout(..., 0)` that downloads the pre-built + * database archive, extracts it, validates it (lite), opens LevelDB, and attaches it to the + * server. Any failure in this pipeline calls `process.exit(1)` so the orchestrator restarts + * the container. + * 3. Wires SIGTERM/SIGINT handlers for graceful shutdown (HTTP server + DB). + */ +export async function entrypointCommand( + options: EntrypointCommandOptions, +): Promise { + console.log("ENSRainbow running with config:"); + console.log(stringifyConfig(options, { pretty: true })); + + logger.info( + `ENSRainbow entrypoint starting HTTP server on port ${options.port} ` + + `(database will be bootstrapped in the background)`, + ); + + const ensRainbowServer = ENSRainbowServer.createPending(); + + let cachedPublicConfig: EnsRainbow.ENSRainbowPublicConfig | null = null; + const app = createApi(ensRainbowServer, () => cachedPublicConfig); + + const httpServer = serve({ + fetch: app.fetch, + port: options.port, + }); + + let alreadyClosed = false; + const close = async () => { + if (alreadyClosed) return; + alreadyClosed = true; + logger.info("Shutting down server..."); + try { + await httpServer.close(); + await ensRainbowServer.close(); + logger.info("Server shutdown complete"); + } catch (error) { + logger.error(error, "Error during shutdown:"); + throw error; + } + }; + + if (options.registerSignalHandlers !== false) { + process.on("SIGTERM", close); + process.on("SIGINT", close); + } + + const bootstrapComplete = new Promise((resolvePromise) => { + // Schedule the bootstrap on the next tick so the HTTP server can accept connections first. + setTimeout(() => { + runDbBootstrap(options, ensRainbowServer) + .then((publicConfig) => { + cachedPublicConfig = publicConfig; + logger.info( + "ENSRainbow database bootstrap complete. Service is ready to serve heal requests.", + ); + resolvePromise(); + }) + .catch((error) => { + logger.error(error, "ENSRainbow database bootstrap failed - exiting"); + process.exit(1); + }); + }, 0); + }); + + return { bootstrapComplete, close }; +} + +/** + * Full database bootstrap pipeline. Idempotent: if the marker file already exists and the + * on-disk database passes lite validation, the download and extraction steps are skipped. + * + * @returns the `ENSRainbowPublicConfig` for the attached database. + */ +async function runDbBootstrap( + options: EntrypointCommandOptions, + ensRainbowServer: ENSRainbowServer, +): Promise { + const { dataDir, dbSchemaVersion, labelSetId, labelSetVersion } = options; + const downloadTempDir = options.downloadTempDir ?? join(dataDir, ".download-temp"); + const markerFile = join(dataDir, DB_READY_MARKER_FILENAME); + const dbSubdir = join(dataDir, `data-${labelSetId}_${labelSetVersion}`); + + mkdirSync(dataDir, { recursive: true }); + + if (existsSync(markerFile) && existsSync(dbSubdir)) { + logger.info( + `Found existing ENSRainbow marker at ${markerFile}; attempting to open existing database at ${dbSubdir}`, + ); + try { + const db = await ENSRainbowDB.open(dbSubdir); + await ensRainbowServer.attachDb(db); + return buildEnsRainbowPublicConfig(await buildDbConfig(ensRainbowServer)); + } catch (error) { + logger.warn( + error, + "Existing ENSRainbow database failed to open or validate; re-downloading from scratch", + ); + // Fall through to re-download below. + } + } + + await downloadAndExtractDatabase({ + dataDir, + dbSchemaVersion, + labelSetId, + labelSetVersion, + downloadTempDir, + labelsetServerUrl: options.labelsetServerUrl, + }); + + logger.info(`Opening newly extracted database at ${dbSubdir}`); + const db = await ENSRainbowDB.open(dbSubdir); + + await ensRainbowServer.attachDb(db); + + // Write marker after a successful attach so restarts can skip the download step. + await writeFile(markerFile, ""); + + return buildEnsRainbowPublicConfig(await buildDbConfig(ensRainbowServer)); +} + +interface DownloadAndExtractParams { + dataDir: string; + dbSchemaVersion: DbSchemaVersion; + labelSetId: string; + labelSetVersion: number; + downloadTempDir: string; + labelsetServerUrl?: string | undefined; +} + +async function downloadAndExtractDatabase(params: DownloadAndExtractParams): Promise { + const { dataDir, dbSchemaVersion, labelSetId, labelSetVersion, downloadTempDir } = params; + + // Clean up any stale state from a previous aborted bootstrap attempt. + rmSync(downloadTempDir, { recursive: true, force: true }); + mkdirSync(downloadTempDir, { recursive: true }); + + const downloadScript = resolveDownloadScriptPath(); + logger.info( + `Downloading ENSRainbow database (schema=${dbSchemaVersion}, id=${labelSetId}, version=${labelSetVersion}) via ${downloadScript}`, + ); + + await spawnChild( + "bash", + [ + downloadScript, + String(dbSchemaVersion), + labelSetId, + String(labelSetVersion), + ], + { + OUT_DIR: downloadTempDir, + ...(params.labelsetServerUrl + ? { ENSRAINBOW_LABELSET_SERVER_URL: params.labelsetServerUrl } + : {}), + }, + ); + + const archivePath = join( + downloadTempDir, + "databases", + String(dbSchemaVersion), + `${labelSetId}_${labelSetVersion}.tgz`, + ); + if (!existsSync(archivePath)) { + throw new Error( + `Expected database archive file not found at ${archivePath} after download completed`, + ); + } + + logger.info(`Extracting ${archivePath} into ${dataDir}`); + mkdirSync(dataDir, { recursive: true }); + await spawnChild("tar", ["-xzf", archivePath, "-C", dataDir, "--strip-components=1"], {}); + + rmSync(downloadTempDir, { recursive: true, force: true }); +} + +/** + * Resolve the absolute path to `download-prebuilt-database.sh`. + * + * The compiled command lives at `apps/ensrainbow/src/commands/entrypoint-command.ts` in source + * and at `dist/commands/entrypoint-command.js` at runtime; in both cases the sibling `scripts` + * directory is two levels up. + */ +function resolveDownloadScriptPath(): string { + const here = dirname(fileURLToPath(import.meta.url)); + // From `src/commands/` or `dist/commands/` go up two levels to reach the app root. + return resolve(here, "..", "..", "scripts", "download-prebuilt-database.sh"); +} + +function spawnChild( + command: string, + args: string[], + extraEnv: Record, +): Promise { + return new Promise((resolvePromise, reject) => { + const child = spawn(command, args, { + stdio: "inherit", + env: { ...process.env, ...extraEnv }, + }); + + child.on("error", reject); + child.on("exit", (code, signal) => { + if (code === 0) { + resolvePromise(); + return; + } + reject( + new Error( + `Command '${command} ${args.join(" ")}' exited with ${ + signal ? `signal ${signal}` : `code ${code}` + }`, + ), + ); + }); + }); +} diff --git a/apps/ensrainbow/src/commands/server-command.test.ts b/apps/ensrainbow/src/commands/server-command.test.ts index 9113f37d86..9e18b9b108 100644 --- a/apps/ensrainbow/src/commands/server-command.test.ts +++ b/apps/ensrainbow/src/commands/server-command.test.ts @@ -35,7 +35,7 @@ describe("Server Command Tests", () => { const ensRainbowServer = await ENSRainbowServer.init(db); const dbConfig = await buildDbConfig(ensRainbowServer); const publicConfig = buildEnsRainbowPublicConfig(dbConfig); - app = createApi(ensRainbowServer, publicConfig); + app = createApi(ensRainbowServer, () => publicConfig); // Start the server on a different port than what ENSRainbow defaults to server = serve({ @@ -128,6 +128,15 @@ describe("Server Command Tests", () => { }); }); + describe("GET /ready", () => { + it("should return ok status when the server has an attached database", async () => { + const response = await fetch(`http://localhost:${nonDefaultPort}/ready`); + expect(response.status).toBe(200); + const data = (await response.json()) as EnsRainbow.ReadyResponse; + expect(data).toEqual({ status: "ok" } satisfies EnsRainbow.ReadyResponse); + }); + }); + describe("GET /v1/labels/count", () => { it("should return count snapshot from startup (same as /v1/config)", async () => { // Count is fixed at server start; changing the DB does not affect the response @@ -205,6 +214,105 @@ describe("Server Command Tests", () => { }); }); + describe("Pending server (no DB attached yet)", () => { + const pendingPort = 3225; + let pendingApp: Hono; + let pendingServer: ReturnType; + let pendingEnsRainbowServer: ENSRainbowServer; + let pendingPublicConfig: EnsRainbow.ENSRainbowPublicConfig | null; + + beforeAll(async () => { + pendingEnsRainbowServer = ENSRainbowServer.createPending(); + pendingPublicConfig = null; + pendingApp = createApi(pendingEnsRainbowServer, () => pendingPublicConfig); + pendingServer = serve({ + fetch: pendingApp.fetch, + port: pendingPort, + }); + }); + + afterAll(async () => { + try { + if (pendingServer) await pendingServer.close(); + await pendingEnsRainbowServer.close(); + } catch (error) { + console.error("Pending server cleanup failed:", error); + } + }); + + it("GET /health returns 200 immediately without a DB", async () => { + const response = await fetch(`http://localhost:${pendingPort}/health`); + expect(response.status).toBe(200); + const data = (await response.json()) as EnsRainbow.HealthResponse; + expect(data).toEqual({ status: "ok" } satisfies EnsRainbow.HealthResponse); + }); + + it("GET /ready returns 503 while the DB is not attached", async () => { + const response = await fetch(`http://localhost:${pendingPort}/ready`); + expect(response.status).toBe(503); + const data = (await response.json()) as EnsRainbow.ServiceUnavailableError; + expect(data.status).toBe(StatusCode.Error); + expect(data.errorCode).toBe(ErrorCode.ServiceUnavailable); + }); + + it("GET /v1/heal/:labelhash returns 503 while the DB is not attached", async () => { + const someLabelhash = labelhashLiteralLabel(asLiteralLabel("test")); + const response = await fetch(`http://localhost:${pendingPort}/v1/heal/${someLabelhash}`); + expect(response.status).toBe(503); + const data = (await response.json()) as EnsRainbow.ServiceUnavailableError; + expect(data.errorCode).toBe(ErrorCode.ServiceUnavailable); + }); + + it("GET /v1/labels/count and /v1/config return 503 while the DB is not attached", async () => { + const [countRes, configRes] = await Promise.all([ + fetch(`http://localhost:${pendingPort}/v1/labels/count`), + fetch(`http://localhost:${pendingPort}/v1/config`), + ]); + expect(countRes.status).toBe(503); + expect(configRes.status).toBe(503); + }); + + it("After attachDb, /ready returns 200 and /v1/heal serves labels", async () => { + const attachDataDir = "test-data-server-pending-attach"; + await fs.rm(attachDataDir, { recursive: true, force: true }); + + const attachDb = await ENSRainbowDB.create(attachDataDir); + try { + await attachDb.setPrecalculatedRainbowRecordCount(1); + await attachDb.markIngestionFinished(); + await attachDb.setLabelSetId("pending-test"); + await attachDb.setHighestLabelSetVersion(0); + await attachDb.addRainbowRecord("pending-label", 0); + + await pendingEnsRainbowServer.attachDb(attachDb); + pendingPublicConfig = buildEnsRainbowPublicConfig( + await buildDbConfig(pendingEnsRainbowServer), + ); + + const readyRes = await fetch(`http://localhost:${pendingPort}/ready`); + expect(readyRes.status).toBe(200); + + const labelhash = labelhashLiteralLabel(asLiteralLabel("pending-label")); + const healRes = await fetch(`http://localhost:${pendingPort}/v1/heal/${labelhash}`); + expect(healRes.status).toBe(200); + const healData = (await healRes.json()) as EnsRainbow.HealResponse; + expect(healData).toEqual({ + status: StatusCode.Success, + label: "pending-label", + } satisfies EnsRainbow.HealSuccess); + + const configRes = await fetch(`http://localhost:${pendingPort}/v1/config`); + expect(configRes.status).toBe(200); + const configData = (await configRes.json()) as EnsRainbow.ENSRainbowPublicConfig; + expect(configData.recordsCount).toBe(1); + expect(configData.labelSet.labelSetId).toBe("pending-test"); + } finally { + await pendingEnsRainbowServer.close(); + await fs.rm(attachDataDir, { recursive: true, force: true }); + } + }); + }); + describe("CORS headers for /v1/* routes", () => { it("should return CORS headers for /v1/* routes", async () => { const validLabel = "test-label"; diff --git a/apps/ensrainbow/src/commands/server-command.ts b/apps/ensrainbow/src/commands/server-command.ts index 1288c9b469..1300ce7f6f 100644 --- a/apps/ensrainbow/src/commands/server-command.ts +++ b/apps/ensrainbow/src/commands/server-command.ts @@ -30,7 +30,7 @@ export async function serverCommand(options: ServerCommandOptions): Promise publicConfig); const httpServer = serve({ fetch: app.fetch, diff --git a/apps/ensrainbow/src/lib/api.ts b/apps/ensrainbow/src/lib/api.ts index 0cd6ee12d2..0e89d6fff0 100644 --- a/apps/ensrainbow/src/lib/api.ts +++ b/apps/ensrainbow/src/lib/api.ts @@ -15,16 +15,42 @@ import { import { type EnsRainbow, ErrorCode, StatusCode } from "@ensnode/ensrainbow-sdk"; import { DB_SCHEMA_VERSION } from "@/lib/database"; -import type { ENSRainbowServer } from "@/lib/server"; +import { DbNotReadyError, type ENSRainbowServer } from "@/lib/server"; import { getErrorMessage } from "@/utils/error-utils"; import { logger } from "@/utils/logger"; +/** + * Supplier of the current public config for the API. + * + * Returns `null` while the server is still bootstrapping its database. Once the database is + * attached, the supplier returns the final `ENSRainbowPublicConfig` (cached by the caller). + */ +export type PublicConfigSupplier = () => EnsRainbow.ENSRainbowPublicConfig | null; + +/** + * Shared 503 response body for endpoints that require the database to be ready. + */ +const BOOTSTRAPPING_MESSAGE = "ENSRainbow is still bootstrapping its database"; + +function buildServiceUnavailableBody( + message: string = BOOTSTRAPPING_MESSAGE, +): EnsRainbow.ServiceUnavailableError { + return { + status: StatusCode.Error, + error: message, + errorCode: ErrorCode.ServiceUnavailable, + }; +} + /** * Creates and configures the ENS Rainbow API routes. + * + * When `publicConfigSupplier` returns `null`, routes that depend on the database respond with + * HTTP 503 so that clients polling `/ready` can wait for the bootstrap to complete. */ export function createApi( server: ENSRainbowServer, - publicConfig: EnsRainbow.ENSRainbowPublicConfig, + publicConfigSupplier: PublicConfigSupplier, ): Hono { const api = new Hono(); @@ -40,6 +66,10 @@ export function createApi( ); api.get("/v1/heal/:labelhash", async (c: HonoContext) => { + if (!server.isReady()) { + return c.json(buildServiceUnavailableBody(), ErrorCode.ServiceUnavailable); + } + const labelhash = c.req.param("labelhash") as `0x${string}`; const labelSetVersionParam = c.req.query("label_set_version"); @@ -80,8 +110,17 @@ export function createApi( ); } - const result = await server.heal(labelhash, clientLabelSet); - return c.json(result, result.errorCode); + try { + const result = await server.heal(labelhash, clientLabelSet); + return c.json(result, result.errorCode); + } catch (error) { + // Handles the race where isReady() was true at route entry but the server transitioned + // out of ready (e.g. during shutdown). + if (error instanceof DbNotReadyError) { + return c.json(buildServiceUnavailableBody(), ErrorCode.ServiceUnavailable); + } + throw error; + } }); api.get("/health", (c: HonoContext) => { @@ -89,7 +128,20 @@ export function createApi( return c.json(result); }); + api.get("/ready", (c: HonoContext) => { + if (!server.isReady()) { + return c.json(buildServiceUnavailableBody(), ErrorCode.ServiceUnavailable); + } + const result: EnsRainbow.ReadyResponse = { status: "ok" }; + return c.json(result); + }); + api.get("/v1/labels/count", (c: HonoContext) => { + const publicConfig = publicConfigSupplier(); + if (publicConfig === null) { + return c.json(buildServiceUnavailableBody(), ErrorCode.ServiceUnavailable); + } + const countResponse: EnsRainbow.CountSuccess = { status: StatusCode.Success, count: publicConfig.recordsCount, @@ -99,6 +151,10 @@ export function createApi( }); api.get("/v1/config", (c: HonoContext) => { + const publicConfig = publicConfigSupplier(); + if (publicConfig === null) { + return c.json(buildServiceUnavailableBody(), ErrorCode.ServiceUnavailable); + } return c.json(publicConfig); }); @@ -106,6 +162,10 @@ export function createApi( * @deprecated Use GET /v1/config instead. This endpoint will be removed in a future version. */ api.get("/v1/version", (c: HonoContext) => { + if (!server.isReady() || server.serverLabelSet === undefined) { + return c.json(buildServiceUnavailableBody(), ErrorCode.ServiceUnavailable); + } + const result: EnsRainbow.VersionResponse = { status: StatusCode.Success, versionInfo: { diff --git a/apps/ensrainbow/src/lib/server.ts b/apps/ensrainbow/src/lib/server.ts index b3da287c5a..775af0cf7c 100644 --- a/apps/ensrainbow/src/lib/server.ts +++ b/apps/ensrainbow/src/lib/server.ts @@ -18,9 +18,15 @@ import { logger } from "@/utils/logger"; /** * Reads label set and record count from an initialized ENSRainbowServer. - * @throws Error if the record count cannot be read from the database. + * @throws Error if the server is not ready or the record count cannot be read from the database. */ export async function buildDbConfig(server: ENSRainbowServer): Promise { + if (!server.isReady()) { + throw new Error( + "Cannot build DB config: ENSRainbowServer has no database attached yet (still bootstrapping)", + ); + } + const countResult = await server.labelCount(); if (countResult.status === StatusCode.Error) { throw new Error( @@ -28,25 +34,52 @@ export async function buildDbConfig(server: ENSRainbowServer): Promise ); } + // isReady() was true so serverLabelSet is defined. return { - labelSet: server.serverLabelSet, + labelSet: server.serverLabelSet as EnsRainbowServerLabelSet, recordsCount: countResult.count, }; } +/** + * Thrown when a handler needs the database but the server has not finished bootstrapping yet. + * + * HTTP routes map this to a 503 Service Unavailable response so that clients polling `/ready` + * can retry instead of treating it as a fatal server error. + */ +export class DbNotReadyError extends Error { + constructor(message = "ENSRainbow is still bootstrapping its database") { + super(message); + this.name = "DbNotReadyError"; + Object.setPrototypeOf(this, DbNotReadyError.prototype); + } +} + export class ENSRainbowServer { - private readonly db: ENSRainbowDB; - public readonly serverLabelSet: EnsRainbowServerLabelSet; + private db: ENSRainbowDB | undefined; + private _serverLabelSet: EnsRainbowServerLabelSet | undefined; - private constructor(db: ENSRainbowDB, serverLabelSet: EnsRainbowServerLabelSet) { + private constructor(db?: ENSRainbowDB, serverLabelSet?: EnsRainbowServerLabelSet) { this.db = db; - this.serverLabelSet = serverLabelSet; + this._serverLabelSet = serverLabelSet; + } + + /** + * The label set of the attached database. Only defined once the server is ready. + */ + public get serverLabelSet(): EnsRainbowServerLabelSet | undefined { + return this._serverLabelSet; + } + + /** + * Whether the server has an attached, validated database and is ready to heal labels. + */ + public isReady(): boolean { + return this.db !== undefined && this._serverLabelSet !== undefined; } /** - * Creates a new ENSRainbowServer instance - * @param db The ENSRainbowDB instance - * @param logLevel Optional log level + * Creates a new ENSRainbowServer instance with an already-opened database. * @throws Error if a "lite" validation of the database fails */ public static async init(db: ENSRainbowDB): Promise { @@ -62,6 +95,45 @@ export class ENSRainbowServer { return new ENSRainbowServer(db, serverLabelSet); } + /** + * Creates a new ENSRainbowServer in a "pending" state without a database attached. + * + * The HTTP server can start serving `/health` and `/ready` immediately while a background task + * downloads and validates the database. Once ready, call {@link attachDb} to transition the + * server into its ready state. + */ + public static createPending(): ENSRainbowServer { + return new ENSRainbowServer(); + } + + /** + * Attaches a validated database to a previously-pending server instance, making it ready. + * + * @throws Error if the server already has a database attached or if the database fails lite validation. + */ + public async attachDb(db: ENSRainbowDB): Promise { + if (this.db !== undefined) { + throw new Error("ENSRainbowServer already has a database attached"); + } + + if (!(await db.validate({ lite: true }))) { + throw new Error("Database is in an invalid state"); + } + + this._serverLabelSet = await db.getLabelSet(); + this.db = db; + } + + /** + * Returns the attached database or throws {@link DbNotReadyError} if not yet ready. + */ + private requireDb(): ENSRainbowDB { + if (this.db === undefined) { + throw new DbNotReadyError(); + } + return this.db; + } + /** * Determines if a versioned rainbow record should be treated as unhealable * based on the client's label set version requirements, ignoring the label set ID. @@ -81,6 +153,9 @@ export class ENSRainbowServer { labelHash: LabelHash, clientLabelSet: EnsRainbowClientLabelSet, ): Promise { + const db = this.requireDb(); + const serverLabelSet = this._serverLabelSet as EnsRainbowServerLabelSet; + let labelHashBytes: ByteArray; try { labelHashBytes = labelHashToBytes(labelHash); @@ -94,7 +169,7 @@ export class ENSRainbowServer { } try { - validateSupportedLabelSetAndVersion(this.serverLabelSet, clientLabelSet); + validateSupportedLabelSetAndVersion(serverLabelSet, clientLabelSet); } catch (error) { logger.info(getErrorMessage(error)); return { @@ -105,7 +180,7 @@ export class ENSRainbowServer { } try { - const versionedRainbowRecord = await this.db.getVersionedRainbowRecord(labelHashBytes); + const versionedRainbowRecord = await db.getVersionedRainbowRecord(labelHashBytes); if ( versionedRainbowRecord === null || ENSRainbowServer.needToSimulateAsUnhealable(versionedRainbowRecord, clientLabelSet) @@ -138,8 +213,9 @@ export class ENSRainbowServer { } async labelCount(): Promise { + const db = this.requireDb(); try { - const precalculatedCount = await this.db.getPrecalculatedRainbowRecordCount(); + const precalculatedCount = await db.getPrecalculatedRainbowRecordCount(); return { status: StatusCode.Success, count: precalculatedCount, @@ -162,4 +238,15 @@ export class ENSRainbowServer { } satisfies EnsRainbow.CountServerError; } } + + /** + * Closes the attached database (if any). Safe to call on a pending server. + */ + async close(): Promise { + if (this.db) { + await this.db.close(); + this.db = undefined; + this._serverLabelSet = undefined; + } + } } diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/usage/api.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/usage/api.mdx index 53d120fa31..a954d8ad10 100644 --- a/docs/ensnode.io/src/content/docs/ensrainbow/usage/api.mdx +++ b/docs/ensnode.io/src/content/docs/ensrainbow/usage/api.mdx @@ -10,15 +10,39 @@ sidebar: | Endpoint | Purpose | Key Response Types | |----------|---------|--------------------| -| `GET /health` | Liveness probe for orchestration | `{ status: "ok" }` | +| `GET /health` | Liveness probe — succeeds as soon as the HTTP server is accepting requests | `{ status: "ok" }` | +| `GET /ready` | Readiness probe — succeeds once the database has been downloaded, validated, and opened | `ReadyResponse`, `ServiceUnavailableError` | | `GET /v1/heal/{labelhash}` | Heal a single labelhash | `HealSuccess`, `HealError` | -| `GET /v1/labels/count` | Current healable label count | `CountSuccess`, `CountServerError` | -| `GET /v1/version` | Server & database schema info plus label set details | `VersionInfo` | +| `GET /v1/labels/count` | Current healable label count | `CountSuccess`, `CountServerError`, `ServiceUnavailableError` | +| `GET /v1/config` | Public configuration of the running instance | `ENSRainbowPublicConfig`, `ServiceUnavailableError` | :::tip[Deterministic Results] Pin `labelSetVersion` in your client if you need **deterministic results** across time. See the [Label Sets & Versioning guide](/ensrainbow/concepts/label-sets-and-versioning/) for details. ::: +## Liveness vs. Readiness + +ENSRainbow's container starts its HTTP server immediately so that orchestrators (Docker, Kubernetes, ECS, …) can reach it for liveness checks while the database is still being downloaded and validated on first boot (which can take 30+ minutes for large label sets). + +- **`GET /health`** — pure **liveness** probe. Returns `200 { status: "ok" }` as soon as the HTTP server is bound. It does **not** guarantee the database is loaded. +- **`GET /ready`** — **readiness** probe. Returns `200 { status: "ok" }` only after the database has been downloaded, lite-validated, and opened by the process. While the server is still bootstrapping, it returns `503 Service Unavailable` with a structured error body so clients can poll with backoff. + +While the database is not yet ready, the following routes return `503` with the body: + +```json +{ + "status": "error", + "error": "ENSRainbow is still bootstrapping its database", + "errorCode": 503 +} +``` + +- `GET /v1/heal/{labelhash}` +- `GET /v1/labels/count` +- `GET /v1/config` + +Clients (including ENSIndexer) should poll `/ready` rather than `/health` before issuing heal requests. The official `ensrainbow-sdk` exposes `client.ready()` exactly for this purpose. + ## Health Check ```bash @@ -27,6 +51,24 @@ curl https://api.ensrainbow.io/health Response: `{"status":"ok"}` +## Readiness Check + +```bash +curl https://api.ensrainbow.io/ready +``` + +Ready response: `{"status":"ok"}` + +Not-ready response (HTTP 503): + +```json +{ + "status": "error", + "error": "ENSRainbow is still bootstrapping its database", + "errorCode": 503 +} +``` + ## Heal Label ### Basic Usage diff --git a/packages/ensrainbow-sdk/src/client.test.ts b/packages/ensrainbow-sdk/src/client.test.ts index 7133797b15..164c0c7e13 100644 --- a/packages/ensrainbow-sdk/src/client.test.ts +++ b/packages/ensrainbow-sdk/src/client.test.ts @@ -271,6 +271,36 @@ describe("EnsRainbowApiClient", () => { } satisfies EnsRainbow.HealthResponse); }); + describe("ready", () => { + it("should resolve when the server is ready (HTTP 200)", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + status: 200, + json: () => + Promise.resolve({ + status: "ok", + } satisfies EnsRainbow.ReadyResponse), + }); + + const response = await client.ready(); + + expect(mockFetch).toHaveBeenCalledWith(new URL("/ready", DEFAULT_ENSRAINBOW_URL)); + expect(response).toEqual({ + status: "ok", + } satisfies EnsRainbow.ReadyResponse); + }); + + it("should throw when the server is not ready yet (HTTP 503)", async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 503, + statusText: "Service Unavailable", + }); + + await expect(client.ready()).rejects.toThrow(/503/); + }); + }); + it("should return version information", async () => { mockFetch.mockResolvedValueOnce({ json: () => diff --git a/packages/ensrainbow-sdk/src/client.ts b/packages/ensrainbow-sdk/src/client.ts index 09bb88ee40..8263c67b92 100644 --- a/packages/ensrainbow-sdk/src/client.ts +++ b/packages/ensrainbow-sdk/src/client.ts @@ -33,6 +33,14 @@ export namespace EnsRainbow { health(): Promise; + /** + * Check whether the ENSRainbow service has finished bootstrapping and is ready to serve requests. + * + * Throws when the service is not ready (e.g. 503 while the database is still being downloaded + * or validated) so callers can retry. + */ + ready(): Promise; + /** * Get the version information of the ENSRainbow service * @@ -51,6 +59,23 @@ export namespace EnsRainbow { status: "ok"; } + /** + * Response returned by `GET /ready` when the ENSRainbow service is ready to serve requests. + */ + export interface ReadyResponse { + status: "ok"; + } + + /** + * Generic error shape used by endpoints that return 503 Service Unavailable while the + * database is still bootstrapping (downloading, extracting, or validating). + */ + export interface ServiceUnavailableError { + status: typeof StatusCode.Error; + error: string; + errorCode: typeof ErrorCode.ServiceUnavailable; + } + export interface BaseHealResponse { status: Status; label?: Label | never; @@ -89,17 +114,29 @@ export namespace EnsRainbow { errorCode: typeof ErrorCode.BadRequest; } + export interface HealServiceUnavailableError + extends BaseHealResponse { + status: typeof StatusCode.Error; + label?: never; + error: string; + errorCode: typeof ErrorCode.ServiceUnavailable; + } + export type HealResponse = | HealSuccess | HealNotFoundError | HealServerError - | HealBadRequestError; + | HealBadRequestError + | HealServiceUnavailableError; export type HealError = Exclude; /** - * Server errors should not be cached. + * Server errors and transient bootstrap errors should not be cached. */ - export type CacheableHealResponse = Exclude; + export type CacheableHealResponse = Exclude< + HealResponse, + HealServerError | HealServiceUnavailableError + >; export interface BaseCountResponse { status: Status; @@ -128,7 +165,19 @@ export namespace EnsRainbow { errorCode: typeof ErrorCode.ServerError; } - export type CountResponse = CountSuccess | CountServerError; + export interface CountServiceUnavailableError + extends BaseCountResponse { + status: typeof StatusCode.Error; + count?: never; + timestamp?: never; + error: string; + errorCode: typeof ErrorCode.ServiceUnavailable; + } + + export type CountResponse = + | CountSuccess + | CountServerError + | CountServiceUnavailableError; /** * ENSRainbow version information. @@ -392,6 +441,31 @@ export class EnsRainbowApiClient implements EnsRainbow.ApiClient { return response.json() as Promise; } + /** + * Check whether the ENSRainbow service is ready (database is downloaded, validated, and open). + * + * Unlike {@link EnsRainbowApiClient.health}, which is a pure liveness probe that succeeds as soon + * as the HTTP server is accepting requests, `ready()` only resolves once the service has finished + * bootstrapping its database. Clients that require a usable database (e.g. ENSIndexer) should + * poll this method instead of `health()` during startup. + * + * @throws if the server is not ready yet (HTTP 503) or the request otherwise fails — callers are + * expected to retry with backoff. + */ + async ready(): Promise { + const response = await fetch(new URL("/ready", this.options.endpointUrl)); + + if (!response.ok) { + throw new Error( + `ENSRainbow is not ready yet (HTTP ${response.status}${ + response.statusText ? ` ${response.statusText}` : "" + })`, + ); + } + + return response.json() as Promise; + } + /** * Get the public configuration of the ENSRainbow service. */ @@ -477,5 +551,9 @@ export const isHealError = ( export const isCacheableHealResponse = ( response: EnsRainbow.HealResponse, ): response is EnsRainbow.CacheableHealResponse => { - return response.status === StatusCode.Success || response.errorCode !== ErrorCode.ServerError; + if (response.status === StatusCode.Success) return true; + return ( + response.errorCode !== ErrorCode.ServerError && + response.errorCode !== ErrorCode.ServiceUnavailable + ); }; diff --git a/packages/ensrainbow-sdk/src/consts.ts b/packages/ensrainbow-sdk/src/consts.ts index 480d7753eb..6cb8590727 100644 --- a/packages/ensrainbow-sdk/src/consts.ts +++ b/packages/ensrainbow-sdk/src/consts.ts @@ -9,4 +9,5 @@ export const ErrorCode = { BadRequest: 400, NotFound: 404, ServerError: 500, + ServiceUnavailable: 503, } as const; From 5ec830f0d27455c49b92fb543a8b5e1af5c6c14f Mon Sep 17 00:00:00 2001 From: djstrong Date: Mon, 20 Apr 2026 23:24:03 +0200 Subject: [PATCH 02/11] feat(ensrainbow): enhance readiness checks and API response handling - Updated the `/ready` endpoint to require both database attachment and public config population for a successful `200` response. - Adjusted API routes to return `ServiceUnavailableError` for `/v1/heal`, `/v1/labels/count`, and `/v1/config` during bootstrap. - Introduced a grace period for child process termination during shutdown. - Changed the `label-set-version` option type from string to number for better validation. --- .changeset/ready-endpoint-bg-bootstrap.md | 2 +- apps/ensrainbow/src/cli.ts | 2 +- apps/ensrainbow/src/commands/entrypoint-command.ts | 2 +- apps/ensrainbow/src/lib/api.ts | 7 ++++++- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.changeset/ready-endpoint-bg-bootstrap.md b/.changeset/ready-endpoint-bg-bootstrap.md index 02e5585c2c..0fb7ef9e06 100644 --- a/.changeset/ready-endpoint-bg-bootstrap.md +++ b/.changeset/ready-endpoint-bg-bootstrap.md @@ -7,7 +7,7 @@ ENSRainbow now starts its HTTP server immediately and downloads/validates its database in the background, instead of blocking container startup behind a netcat placeholder. - **New `GET /ready` endpoint**: returns `200 { status: "ok" }` once the database is attached, or `503 Service Unavailable` while ENSRainbow is still bootstrapping. `/health` is now a pure liveness probe that succeeds as soon as the HTTP server is listening. -- **503 responses for API routes during bootstrap**: `/v1/heal`, `/v1/labels/count`, `/v1/config`, and `/v1/version` return a structured `ServiceUnavailableError` (`errorCode: 503`) until the database is ready. +- **503 responses for API routes during bootstrap**: `/v1/heal`, `/v1/labels/count`, and `/v1/config` return a structured `ServiceUnavailableError` (`errorCode: 503`) until the database is ready. - **New Docker entrypoint**: the container now runs `pnpm --filter ensrainbow run entrypoint` (implemented in Node via `tsx src/cli.ts entrypoint`), which replaces `scripts/entrypoint.sh` and the `netcat` workaround. - **SDK client**: added `EnsRainbowApiClient.ready()`, plus `EnsRainbow.ReadyResponse` / `EnsRainbow.ServiceUnavailableError` types and `ErrorCode.ServiceUnavailable`. - **ENSIndexer**: `waitForEnsRainbowToBeReady` now polls `/ready` (via `ensRainbowClient.ready()`) instead of `/health`, so it correctly waits for the database to finish bootstrapping. diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts index 99a2873ee5..0dcce5aeb9 100644 --- a/apps/ensrainbow/src/cli.ts +++ b/apps/ensrainbow/src/cli.ts @@ -165,7 +165,7 @@ export function createCLI(options: CLIOptions = {}) { }) .coerce("label-set-id", buildLabelSetId) .option("label-set-version", { - type: "string", + type: "number", description: "Label set version to download (falls back to LABEL_SET_VERSION env var)", default: process.env.LABEL_SET_VERSION, diff --git a/apps/ensrainbow/src/commands/entrypoint-command.ts b/apps/ensrainbow/src/commands/entrypoint-command.ts index 3351678130..42f29e87a7 100644 --- a/apps/ensrainbow/src/commands/entrypoint-command.ts +++ b/apps/ensrainbow/src/commands/entrypoint-command.ts @@ -45,7 +45,7 @@ export interface EntrypointCommandOptions { export interface EntrypointCommandHandle { /** * Resolves once the background bootstrap has completed successfully. Never rejects: on - * failure the process is terminated via `process.exit(1)` (or the caller's exit-like hook). + * failure the process is terminated via `process.exit(1)`. */ readonly bootstrapComplete: Promise; /** diff --git a/apps/ensrainbow/src/lib/api.ts b/apps/ensrainbow/src/lib/api.ts index bcb7952c48..4dd78a6534 100644 --- a/apps/ensrainbow/src/lib/api.ts +++ b/apps/ensrainbow/src/lib/api.ts @@ -126,7 +126,12 @@ export function createApi( }); api.get("/ready", (c: HonoContext) => { - if (!server.isReady()) { + // Require both the database to be attached AND the public config to be populated so that + // `/ready === 200` is a true gate for `/v1/heal`, `/v1/labels/count`, and `/v1/config`. + // Without the publicConfigSupplier check there is a brief window inside the entrypoint + // bootstrap where attachDb() has flipped isReady() to true but cachedPublicConfig is still + // null, so /ready would report ok while /v1/config still returns 503. + if (!server.isReady() || publicConfigSupplier() === null) { return c.json(buildServiceUnavailableBody(), ErrorCode.ServiceUnavailable); } const result: EnsRainbow.ReadyResponse = { status: "ok" }; From 243667f1f9040ecd37cb17ba8c28bd4c009dbbda Mon Sep 17 00:00:00 2001 From: djstrong Date: Mon, 20 Apr 2026 23:25:58 +0200 Subject: [PATCH 03/11] feat(ensrainbow): implement graceful shutdown during bootstrap - Added support for graceful shutdown during the bootstrap process, allowing in-flight operations to be aborted and child processes to be terminated properly. - Introduced a new `BootstrapAbortedError` to handle shutdown scenarios during database operations. - Updated the entrypoint command to ensure that partially-opened LevelDB handles are closed promptly, preventing resource leaks. - Enhanced the database bootstrap pipeline to respect abort signals, ensuring a clean exit during shutdown. --- .changeset/ready-endpoint-bg-bootstrap.md | 1 + .../src/commands/entrypoint-command.ts | 137 ++++++++++++++++-- 2 files changed, 125 insertions(+), 13 deletions(-) diff --git a/.changeset/ready-endpoint-bg-bootstrap.md b/.changeset/ready-endpoint-bg-bootstrap.md index 0fb7ef9e06..4b8a802237 100644 --- a/.changeset/ready-endpoint-bg-bootstrap.md +++ b/.changeset/ready-endpoint-bg-bootstrap.md @@ -9,6 +9,7 @@ ENSRainbow now starts its HTTP server immediately and downloads/validates its da - **New `GET /ready` endpoint**: returns `200 { status: "ok" }` once the database is attached, or `503 Service Unavailable` while ENSRainbow is still bootstrapping. `/health` is now a pure liveness probe that succeeds as soon as the HTTP server is listening. - **503 responses for API routes during bootstrap**: `/v1/heal`, `/v1/labels/count`, and `/v1/config` return a structured `ServiceUnavailableError` (`errorCode: 503`) until the database is ready. - **New Docker entrypoint**: the container now runs `pnpm --filter ensrainbow run entrypoint` (implemented in Node via `tsx src/cli.ts entrypoint`), which replaces `scripts/entrypoint.sh` and the `netcat` workaround. +- **Graceful shutdown during bootstrap**: SIGTERM/SIGINT now abort an in-flight bootstrap. Spawned `download`/`tar` child processes are terminated (SIGTERM → SIGKILL after a 5s grace period) and any partially-opened LevelDB handle is closed before the HTTP server and DB-backed server shut down, so the container exits promptly without leaking child processes or LevelDB locks. - **SDK client**: added `EnsRainbowApiClient.ready()`, plus `EnsRainbow.ReadyResponse` / `EnsRainbow.ServiceUnavailableError` types and `ErrorCode.ServiceUnavailable`. - **ENSIndexer**: `waitForEnsRainbowToBeReady` now polls `/ready` (via `ensRainbowClient.ready()`) instead of `/health`, so it correctly waits for the database to finish bootstrapping. diff --git a/apps/ensrainbow/src/commands/entrypoint-command.ts b/apps/ensrainbow/src/commands/entrypoint-command.ts index 42f29e87a7..05e5a8685f 100644 --- a/apps/ensrainbow/src/commands/entrypoint-command.ts +++ b/apps/ensrainbow/src/commands/entrypoint-command.ts @@ -16,6 +16,19 @@ import { ENSRainbowDB } from "@/lib/database"; import { buildDbConfig, ENSRainbowServer } from "@/lib/server"; import { logger } from "@/utils/logger"; +/** + * Grace period given to a spawned child process after SIGTERM before we escalate to SIGKILL + * during shutdown. + */ +const CHILD_PROCESS_KILL_GRACE_MS = 5_000; + +class BootstrapAbortedError extends Error { + constructor() { + super("ENSRainbow bootstrap aborted due to shutdown"); + this.name = "BootstrapAbortedError"; + } +} + export interface EntrypointCommandOptions { port: number; dataDir: AbsolutePath; @@ -70,7 +83,9 @@ export const DB_READY_MARKER_FILENAME = "ensrainbow_db_ready"; * database archive, extracts it, validates it (lite), opens LevelDB, and attaches it to the * server. Any failure in this pipeline calls `process.exit(1)` so the orchestrator restarts * the container. - * 3. Wires SIGTERM/SIGINT handlers for graceful shutdown (HTTP server + DB). + * 3. Wires SIGTERM/SIGINT handlers for graceful shutdown. On shutdown the bootstrap is aborted + * (spawned children are killed and any partially-opened LevelDB handle is released) before + * the HTTP server and DB-backed server are closed, so we don't leak locks or prolong exit. */ export async function entrypointCommand( options: EntrypointCommandOptions, @@ -93,12 +108,29 @@ export async function entrypointCommand( port: options.port, }); + // Shared between close() and the bootstrap pipeline. `close()` aborts this to kill any + // in-flight spawned child processes and make the bootstrap bail out as soon as possible. + const bootstrapAborter = new AbortController(); + + // Resolves as soon as the bootstrap task has settled (success, failure, or abort). Used by + // close() to wait for cleanup of in-flight children and partially-opened DB handles before + // returning. Distinct from `bootstrapComplete`, which only resolves on successful bootstrap. + let signalBootstrapSettled!: () => void; + const bootstrapSettled = new Promise((resolvePromise) => { + signalBootstrapSettled = resolvePromise; + }); + let alreadyClosed = false; const close = async () => { if (alreadyClosed) return; alreadyClosed = true; logger.info("Shutting down server..."); + bootstrapAborter.abort(); try { + // Wait for the bootstrap pipeline to settle (including spawned child cleanup and any + // partially-opened DB being closed) before shutting down the HTTP server and DB, so + // we don't leave child processes or LevelDB locks behind. + await bootstrapSettled; await httpServer.close(); await ensRainbowServer.close(); logger.info("Server shutdown complete"); @@ -116,7 +148,7 @@ export async function entrypointCommand( const bootstrapComplete = new Promise((resolvePromise) => { // Schedule the bootstrap on the next tick so the HTTP server can accept connections first. setTimeout(() => { - runDbBootstrap(options, ensRainbowServer) + runDbBootstrap(options, ensRainbowServer, bootstrapAborter.signal) .then((publicConfig) => { cachedPublicConfig = publicConfig; logger.info( @@ -125,8 +157,15 @@ export async function entrypointCommand( resolvePromise(); }) .catch((error) => { + if (error instanceof BootstrapAbortedError || bootstrapAborter.signal.aborted) { + logger.info("ENSRainbow database bootstrap aborted due to shutdown"); + return; + } logger.error(error, "ENSRainbow database bootstrap failed - exiting"); process.exit(1); + }) + .finally(() => { + signalBootstrapSettled(); }); }, 0); }); @@ -138,11 +177,16 @@ export async function entrypointCommand( * Full database bootstrap pipeline. Idempotent: if the marker file already exists and the * on-disk database passes lite validation, the download and extraction steps are skipped. * + * Honors `signal`: between steps we check for abort and, if a DB has been opened but not yet + * attached to the server, close it so the LevelDB lock is released promptly. Once the DB is + * attached to `ensRainbowServer`, its close is owned by `ENSRainbowServer.close()`. + * * @returns the `ENSRainbowPublicConfig` for the attached database. */ async function runDbBootstrap( options: EntrypointCommandOptions, ensRainbowServer: ENSRainbowServer, + signal: AbortSignal, ): Promise { const { dataDir, dbSchemaVersion, labelSetId, labelSetVersion } = options; const downloadTempDir = options.downloadTempDir ?? join(dataDir, ".download-temp"); @@ -156,10 +200,18 @@ async function runDbBootstrap( `Found existing ENSRainbow marker at ${markerFile}; attempting to open existing database at ${dbSubdir}`, ); try { + throwIfAborted(signal); const db = await ENSRainbowDB.open(dbSubdir); + if (signal.aborted) { + await safeClose(db); + throw new BootstrapAbortedError(); + } await ensRainbowServer.attachDb(db); return buildEnsRainbowPublicConfig(await buildDbConfig(ensRainbowServer)); } catch (error) { + if (error instanceof BootstrapAbortedError || signal.aborted) { + throw error; + } logger.warn( error, "Existing ENSRainbow database failed to open or validate; re-downloading from scratch", @@ -168,6 +220,7 @@ async function runDbBootstrap( } } + throwIfAborted(signal); await downloadAndExtractDatabase({ dataDir, dbSchemaVersion, @@ -175,10 +228,16 @@ async function runDbBootstrap( labelSetVersion, downloadTempDir, labelsetServerUrl: options.labelsetServerUrl, + signal, }); + throwIfAborted(signal); logger.info(`Opening newly extracted database at ${dbSubdir}`); const db = await ENSRainbowDB.open(dbSubdir); + if (signal.aborted) { + await safeClose(db); + throw new BootstrapAbortedError(); + } await ensRainbowServer.attachDb(db); @@ -188,6 +247,20 @@ async function runDbBootstrap( return buildEnsRainbowPublicConfig(await buildDbConfig(ensRainbowServer)); } +function throwIfAborted(signal: AbortSignal): void { + if (signal.aborted) { + throw new BootstrapAbortedError(); + } +} + +async function safeClose(db: ENSRainbowDB): Promise { + try { + await db.close(); + } catch (error) { + logger.warn(error, "Failed to close partially-opened ENSRainbow database during shutdown"); + } +} + interface DownloadAndExtractParams { dataDir: string; dbSchemaVersion: DbSchemaVersion; @@ -195,10 +268,12 @@ interface DownloadAndExtractParams { labelSetVersion: number; downloadTempDir: string; labelsetServerUrl?: string | undefined; + signal: AbortSignal; } async function downloadAndExtractDatabase(params: DownloadAndExtractParams): Promise { - const { dataDir, dbSchemaVersion, labelSetId, labelSetVersion, downloadTempDir } = params; + const { dataDir, dbSchemaVersion, labelSetId, labelSetVersion, downloadTempDir, signal } = + params; // Clean up any stale state from a previous aborted bootstrap attempt. rmSync(downloadTempDir, { recursive: true, force: true }); @@ -211,18 +286,14 @@ async function downloadAndExtractDatabase(params: DownloadAndExtractParams): Pro await spawnChild( "bash", - [ - downloadScript, - String(dbSchemaVersion), - labelSetId, - String(labelSetVersion), - ], + [downloadScript, String(dbSchemaVersion), labelSetId, String(labelSetVersion)], { OUT_DIR: downloadTempDir, ...(params.labelsetServerUrl ? { ENSRAINBOW_LABELSET_SERVER_URL: params.labelsetServerUrl } : {}), }, + signal, ); const archivePath = join( @@ -239,7 +310,12 @@ async function downloadAndExtractDatabase(params: DownloadAndExtractParams): Pro logger.info(`Extracting ${archivePath} into ${dataDir}`); mkdirSync(dataDir, { recursive: true }); - await spawnChild("tar", ["-xzf", archivePath, "-C", dataDir, "--strip-components=1"], {}); + await spawnChild( + "tar", + ["-xzf", archivePath, "-C", dataDir, "--strip-components=1"], + {}, + signal, + ); rmSync(downloadTempDir, { recursive: true, force: true }); } @@ -261,15 +337,50 @@ function spawnChild( command: string, args: string[], extraEnv: Record, + signal: AbortSignal, ): Promise { return new Promise((resolvePromise, reject) => { + if (signal.aborted) { + reject(new BootstrapAbortedError()); + return; + } + const child = spawn(command, args, { stdio: "inherit", env: { ...process.env, ...extraEnv }, }); - child.on("error", reject); - child.on("exit", (code, signal) => { + // On abort: send SIGTERM for a graceful exit, and escalate to SIGKILL after a grace period + // if the child is still alive. The `unref`'d timeout ensures we don't hold the event loop + // open if the child exits cleanly before the grace period elapses. + let killTimer: NodeJS.Timeout | undefined; + const onAbort = () => { + if (child.exitCode !== null || child.signalCode !== null) return; + child.kill("SIGTERM"); + killTimer = setTimeout(() => { + if (child.exitCode === null && child.signalCode === null) { + child.kill("SIGKILL"); + } + }, CHILD_PROCESS_KILL_GRACE_MS); + killTimer.unref(); + }; + signal.addEventListener("abort", onAbort, { once: true }); + + const cleanup = () => { + signal.removeEventListener("abort", onAbort); + if (killTimer) clearTimeout(killTimer); + }; + + child.on("error", (err) => { + cleanup(); + reject(err); + }); + child.on("exit", (code, exitSignal) => { + cleanup(); + if (signal.aborted) { + reject(new BootstrapAbortedError()); + return; + } if (code === 0) { resolvePromise(); return; @@ -277,7 +388,7 @@ function spawnChild( reject( new Error( `Command '${command} ${args.join(" ")}' exited with ${ - signal ? `signal ${signal}` : `code ${code}` + exitSignal ? `signal ${exitSignal}` : `code ${code}` }`, ), ); From 1f9d822e4d3801515c7b9157d602b45740c9aeda Mon Sep 17 00:00:00 2001 From: djstrong Date: Mon, 20 Apr 2026 23:31:49 +0200 Subject: [PATCH 04/11] refactor(ensrainbow): implement closeHttpServer utility for graceful shutdown - Introduced a new utility function `closeHttpServer` to handle the closure of HTTP servers gracefully, ensuring all active connections are completed before shutdown. - Updated the entrypoint and server commands to utilize `closeHttpServer`, improving resource management during server shutdown. - Adjusted tests to reflect changes in the readiness check, ensuring consistent behavior during bootstrap. --- .../src/commands/entrypoint-command.test.ts | 7 ++--- .../src/commands/entrypoint-command.ts | 3 ++- .../ensrainbow/src/commands/server-command.ts | 3 ++- apps/ensrainbow/src/utils/http-server.ts | 26 +++++++++++++++++++ 4 files changed, 32 insertions(+), 7 deletions(-) create mode 100644 apps/ensrainbow/src/utils/http-server.ts diff --git a/apps/ensrainbow/src/commands/entrypoint-command.test.ts b/apps/ensrainbow/src/commands/entrypoint-command.test.ts index 45b4ec8467..9edd919af6 100644 --- a/apps/ensrainbow/src/commands/entrypoint-command.test.ts +++ b/apps/ensrainbow/src/commands/entrypoint-command.test.ts @@ -70,13 +70,10 @@ describe("entrypointCommand (existing DB on disk)", () => { expect(healthRes.status).toBe(200); const healthData = (await healthRes.json()) as EnsRainbow.HealthResponse; expect(healthData).toEqual({ status: "ok" }); - const readyRes = await fetch(`${endpoint}/ready`); - expect(readyRes.status).toBe(503); - await handle.bootstrapComplete; - const readyRes2 = await fetch(`${endpoint}/ready`); - expect(readyRes2.status).toBe(200); + const readyRes = await fetch(`${endpoint}/ready`); + expect(readyRes.status).toBe(200); const configRes = await fetch(`${endpoint}/v1/config`); expect(configRes.status).toBe(200); diff --git a/apps/ensrainbow/src/commands/entrypoint-command.ts b/apps/ensrainbow/src/commands/entrypoint-command.ts index 05e5a8685f..32ba8b84e4 100644 --- a/apps/ensrainbow/src/commands/entrypoint-command.ts +++ b/apps/ensrainbow/src/commands/entrypoint-command.ts @@ -14,6 +14,7 @@ import type { AbsolutePath, DbSchemaVersion } from "@/config/types"; import { createApi } from "@/lib/api"; import { ENSRainbowDB } from "@/lib/database"; import { buildDbConfig, ENSRainbowServer } from "@/lib/server"; +import { closeHttpServer } from "@/utils/http-server"; import { logger } from "@/utils/logger"; /** @@ -131,7 +132,7 @@ export async function entrypointCommand( // partially-opened DB being closed) before shutting down the HTTP server and DB, so // we don't leave child processes or LevelDB locks behind. await bootstrapSettled; - await httpServer.close(); + await closeHttpServer(httpServer); await ensRainbowServer.close(); logger.info("Server shutdown complete"); } catch (error) { diff --git a/apps/ensrainbow/src/commands/server-command.ts b/apps/ensrainbow/src/commands/server-command.ts index 1300ce7f6f..8ea9889b7c 100644 --- a/apps/ensrainbow/src/commands/server-command.ts +++ b/apps/ensrainbow/src/commands/server-command.ts @@ -8,6 +8,7 @@ import { buildEnsRainbowPublicConfig } from "@/config/public"; import { createApi } from "@/lib/api"; import { ENSRainbowDB } from "@/lib/database"; import { buildDbConfig, ENSRainbowServer } from "@/lib/server"; +import { closeHttpServer } from "@/utils/http-server"; import { logger } from "@/utils/logger"; export type ServerCommandOptions = ServeCommandConfig; @@ -41,7 +42,7 @@ export async function serverCommand(options: ServerCommandOptions): Promise { logger.info("Shutting down server..."); try { - await httpServer.close(); + await closeHttpServer(httpServer); await db.close(); logger.info("Server shutdown complete"); } catch (error) { diff --git a/apps/ensrainbow/src/utils/http-server.ts b/apps/ensrainbow/src/utils/http-server.ts new file mode 100644 index 0000000000..c2c686d659 --- /dev/null +++ b/apps/ensrainbow/src/utils/http-server.ts @@ -0,0 +1,26 @@ +import type { Server as HttpServer } from "node:http"; +import type { Http2Server, Http2SecureServer } from "node:http2"; + +/** + * Promisified wrapper around Node's callback-based `http.Server.close()`. + * + * `@hono/node-server`'s `serve()` returns a plain Node `http.Server | Http2Server | + * Http2SecureServer`, whose `close(callback?)` is NOT promise-returning (it returns `this`). + * Directly `await`ing `httpServer.close()` therefore resolves immediately against a + * non-thenable server object, so in-flight requests would be racing any subsequent teardown + * (DB close, etc.). This wrapper makes the await actually wait until all active connections + * have finished, or `close` has errored. + */ +export function closeHttpServer( + server: HttpServer | Http2Server | Http2SecureServer, +): Promise { + return new Promise((resolve, reject) => { + server.close((error) => { + if (error) { + reject(error); + return; + } + resolve(); + }); + }); +} From d3f9c0d9da42d20bc4e919053428a634a13f7d89 Mon Sep 17 00:00:00 2001 From: djstrong Date: Mon, 20 Apr 2026 23:33:46 +0200 Subject: [PATCH 05/11] fix(ensrainbow): improve database handling during bootstrap failure - Enhanced error handling in the entrypoint command to ensure proper closure of existing database connections when a bootstrap operation fails. - Introduced logic to manage the state of the database connection, allowing for a clean re-download of the database if the initial opening fails. - Updated logging to provide clearer warnings during database operation failures, improving debugging and resource management. --- .../src/commands/entrypoint-command.ts | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/apps/ensrainbow/src/commands/entrypoint-command.ts b/apps/ensrainbow/src/commands/entrypoint-command.ts index 32ba8b84e4..be9c0c2460 100644 --- a/apps/ensrainbow/src/commands/entrypoint-command.ts +++ b/apps/ensrainbow/src/commands/entrypoint-command.ts @@ -200,19 +200,42 @@ async function runDbBootstrap( logger.info( `Found existing ENSRainbow marker at ${markerFile}; attempting to open existing database at ${dbSubdir}`, ); + // Track the opened DB and whether ownership has transferred to the server so the catch + // block below can release the correct resources before falling back to re-download. + let existingDb: ENSRainbowDB | undefined; + let existingDbAttached = false; try { throwIfAborted(signal); - const db = await ENSRainbowDB.open(dbSubdir); + existingDb = await ENSRainbowDB.open(dbSubdir); if (signal.aborted) { - await safeClose(db); + await safeClose(existingDb); throw new BootstrapAbortedError(); } - await ensRainbowServer.attachDb(db); + await ensRainbowServer.attachDb(existingDb); + existingDbAttached = true; return buildEnsRainbowPublicConfig(await buildDbConfig(ensRainbowServer)); } catch (error) { if (error instanceof BootstrapAbortedError || signal.aborted) { throw error; } + // Release the DB handle (so the LevelDB LOCK is freed before we re-extract into the + // same path), and wipe the stale on-disk files so the tar re-extraction starts from a + // clean state. If attach succeeded, ownership has transferred to the server, so route + // the close through `ensRainbowServer.close()` which also resets its internal state so + // the re-downloaded DB can be re-attached below. + if (existingDbAttached) { + try { + await ensRainbowServer.close(); + } catch (closeError) { + logger.warn( + closeError, + "Failed to close server while falling back to re-download; continuing", + ); + } + } else if (existingDb !== undefined) { + await safeClose(existingDb); + } + rmSync(dbSubdir, { recursive: true, force: true }); logger.warn( error, "Existing ENSRainbow database failed to open or validate; re-downloading from scratch", From 74cf754e3f17cd0f150058ff0c774ea7ea571681 Mon Sep 17 00:00:00 2001 From: djstrong Date: Mon, 20 Apr 2026 23:38:09 +0200 Subject: [PATCH 06/11] fix(ensrainbow): improve database closure handling in the close method - Updated the `close` method to ensure readiness state is flipped before awaiting the database closure, preventing concurrent handlers from accessing a closing database. - Introduced a local reference to the database for safer closure management, enhancing error handling during shutdown scenarios. --- apps/ensrainbow/src/lib/server.ts | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/apps/ensrainbow/src/lib/server.ts b/apps/ensrainbow/src/lib/server.ts index 775af0cf7c..a12eb4d219 100644 --- a/apps/ensrainbow/src/lib/server.ts +++ b/apps/ensrainbow/src/lib/server.ts @@ -241,12 +241,19 @@ export class ENSRainbowServer { /** * Closes the attached database (if any). Safe to call on a pending server. + * + * Flips readiness (`this.db`/`this._serverLabelSet` → `undefined`) BEFORE awaiting the + * underlying DB close so concurrent handlers checking `isReady()` or calling `requireDb()` + * see the "not ready" state immediately and don't obtain a handle to a DB that is being + * torn down. Handlers already past `requireDb()` still hold their local reference, but new + * ones get a clean `DbNotReadyError` (mapped to 503 at the API layer) instead of racing + * LevelDB mid-close. */ async close(): Promise { - if (this.db) { - await this.db.close(); - this.db = undefined; - this._serverLabelSet = undefined; - } + const capturedDb = this.db; + if (capturedDb === undefined) return; + this.db = undefined; + this._serverLabelSet = undefined; + await capturedDb.close(); } } From 89ac55f34bf25b72c6b481458d0af2fa0b8be561 Mon Sep 17 00:00:00 2001 From: djstrong Date: Mon, 20 Apr 2026 23:52:10 +0200 Subject: [PATCH 07/11] fix(ensrainbow): enhance shutdown handling and error management - Updated the entrypoint command to resolve the bootstrap promise on shutdown abort, improving error handling during database bootstrap. - Refactored the server command to ensure graceful shutdown by preventing multiple shutdown calls and handling errors more effectively. - Enhanced the `closeHttpServer` utility to treat already stopped servers as a no-op, improving robustness during shutdown scenarios. --- .../src/commands/entrypoint-command.ts | 6 ++-- .../ensrainbow/src/commands/server-command.ts | 32 +++++++++++++------ apps/ensrainbow/src/utils/http-server.ts | 6 ++++ 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/apps/ensrainbow/src/commands/entrypoint-command.ts b/apps/ensrainbow/src/commands/entrypoint-command.ts index be9c0c2460..849772bfcd 100644 --- a/apps/ensrainbow/src/commands/entrypoint-command.ts +++ b/apps/ensrainbow/src/commands/entrypoint-command.ts @@ -58,8 +58,9 @@ export interface EntrypointCommandOptions { */ export interface EntrypointCommandHandle { /** - * Resolves once the background bootstrap has completed successfully. Never rejects: on - * failure the process is terminated via `process.exit(1)`. + * Resolves once the background bootstrap has either completed successfully or been aborted + * during shutdown. Never rejects: on non-abort failure the process is terminated via + * `process.exit(1)`. */ readonly bootstrapComplete: Promise; /** @@ -160,6 +161,7 @@ export async function entrypointCommand( .catch((error) => { if (error instanceof BootstrapAbortedError || bootstrapAborter.signal.aborted) { logger.info("ENSRainbow database bootstrap aborted due to shutdown"); + resolvePromise(); return; } logger.error(error, "ENSRainbow database bootstrap failed - exiting"); diff --git a/apps/ensrainbow/src/commands/server-command.ts b/apps/ensrainbow/src/commands/server-command.ts index 8ea9889b7c..286b4f9efc 100644 --- a/apps/ensrainbow/src/commands/server-command.ts +++ b/apps/ensrainbow/src/commands/server-command.ts @@ -39,20 +39,32 @@ export async function serverCommand(options: ServerCommandOptions): Promise | undefined; const shutdown = async () => { - logger.info("Shutting down server..."); - try { - await closeHttpServer(httpServer); - await db.close(); - logger.info("Server shutdown complete"); - } catch (error) { - logger.error(error, "Error during shutdown:"); - throw error; + if (shutdownPromise) { + return shutdownPromise; } + + logger.info("Shutting down server..."); + shutdownPromise = (async () => { + try { + await closeHttpServer(httpServer); + await db.close(); + logger.info("Server shutdown complete"); + } catch (error) { + logger.error(error, "Error during shutdown:"); + } + })(); + + return shutdownPromise; }; - process.on("SIGTERM", shutdown); - process.on("SIGINT", shutdown); + process.on("SIGTERM", () => { + void shutdown(); + }); + process.on("SIGINT", () => { + void shutdown(); + }); } catch (error) { await db.close(); throw error; diff --git a/apps/ensrainbow/src/utils/http-server.ts b/apps/ensrainbow/src/utils/http-server.ts index c2c686d659..1ff3d011af 100644 --- a/apps/ensrainbow/src/utils/http-server.ts +++ b/apps/ensrainbow/src/utils/http-server.ts @@ -17,6 +17,12 @@ export function closeHttpServer( return new Promise((resolve, reject) => { server.close((error) => { if (error) { + // `server.close()` is idempotent from a lifecycle perspective. If shutdown races with + // another close path and the server is already stopped, treat this as a no-op. + if ("code" in error && error.code === "ERR_SERVER_NOT_RUNNING") { + resolve(); + return; + } reject(error); return; } From a438a6acfa134a866c101d95fa4eaa08ddd498ff Mon Sep 17 00:00:00 2001 From: djstrong Date: Fri, 24 Apr 2026 10:25:52 +0200 Subject: [PATCH 08/11] fix(ensrainbow): enhance signal handling and database extraction cleanup --- .../src/commands/entrypoint-command.test.ts | 171 +++++++++++++++++- .../src/commands/entrypoint-command.ts | 18 +- .../src/commands/server-command.test.ts | 5 +- packages/ensrainbow-sdk/src/client.ts | 8 + 4 files changed, 196 insertions(+), 6 deletions(-) diff --git a/apps/ensrainbow/src/commands/entrypoint-command.test.ts b/apps/ensrainbow/src/commands/entrypoint-command.test.ts index 9edd919af6..51ddd74f50 100644 --- a/apps/ensrainbow/src/commands/entrypoint-command.test.ts +++ b/apps/ensrainbow/src/commands/entrypoint-command.test.ts @@ -1,21 +1,58 @@ import { existsSync } from "node:fs"; import { mkdir, rm, writeFile } from "node:fs/promises"; -import { join, resolve } from "node:path"; +import { EventEmitter } from "node:events"; +import { dirname, join, resolve } from "node:path"; import { buildLabelSetId, buildLabelSetVersion } from "@ensnode/ensnode-sdk"; import type { EnsRainbow } from "@ensnode/ensrainbow-sdk"; -import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import type { AbsolutePath, DbSchemaVersion } from "@/config/types"; import { DB_SCHEMA_VERSION, ENSRainbowDB } from "@/lib/database"; import { DbNotReadyError, ENSRainbowServer } from "@/lib/server"; +let closeHttpServerImpl: undefined | ((server: unknown) => Promise); +vi.mock("@/utils/http-server", async () => { + const actual = await vi.importActual("@/utils/http-server"); + return { + ...actual, + closeHttpServer: async (server: unknown) => { + if (closeHttpServerImpl) return closeHttpServerImpl(server); + return actual.closeHttpServer(server as never); + }, + }; +}); + import { DB_READY_MARKER_FILENAME, type EntrypointCommandHandle, + __TESTING__, entrypointCommand, } from "./entrypoint-command"; +let spawnImpl: + | undefined + | (( + command: string, + args: string[], + options: { stdio: "inherit"; env: Record }, + ) => any); + +vi.mock("node:child_process", () => { + return { + spawn: ( + command: string, + args: string[], + options: { stdio: "inherit"; env: Record }, + ) => { + if (!spawnImpl) { + throw new Error("spawnImpl not set in test"); + } + return spawnImpl(command, args, options); + }, + }; +}); + /** * These tests exercise the idempotent bootstrap path of the entrypoint command, where the marker * file and a valid on-disk database already exist. We do not exercise the actual download script @@ -86,6 +123,71 @@ describe("entrypointCommand (existing DB on disk)", () => { }); }); +describe("entrypointCommand (signal handlers)", () => { + const testDataDir = resolve(process.cwd(), "test-data-entrypoint-signals"); + const labelSetId = buildLabelSetId("entrypoint-signal-test"); + const labelSetVersion = buildLabelSetVersion(0); + const dbSubdir = join(testDataDir, `data-${labelSetId}_${labelSetVersion}`); + const markerFile = join(testDataDir, DB_READY_MARKER_FILENAME); + const port = 3227; + + beforeEach(async () => { + await rm(testDataDir, { recursive: true, force: true }); + await mkdir(testDataDir, { recursive: true }); + + const db = await ENSRainbowDB.create(dbSubdir); + await db.setPrecalculatedRainbowRecordCount(0); + await db.markIngestionFinished(); + await db.setLabelSetId(labelSetId); + await db.setHighestLabelSetVersion(labelSetVersion); + await db.close(); + + await writeFile(markerFile, ""); + }); + + afterEach(async () => { + closeHttpServerImpl = undefined; + await rm(testDataDir, { recursive: true, force: true }); + }); + + it("wraps SIGTERM/SIGINT handlers so shutdown failures don't become unhandled rejections", async () => { + closeHttpServerImpl = async () => { + throw new Error("closeHttpServer failed"); + }; + + let sigtermHandler: undefined | (() => void); + const onceSpy = vi + .spyOn(process, "once") + .mockImplementation(((event: string, listener: (...args: any[]) => void) => { + if (event === "SIGTERM") sigtermHandler = listener as () => void; + // Delegate to the original implementation so other listeners still work. + return EventEmitter.prototype.once.call(process, event, listener); + }) as typeof process.once); + + const unhandledRejection = vi.fn(); + process.once("unhandledRejection", unhandledRejection); + + const localHandle = await entrypointCommand({ + port, + dataDir: testDataDir as AbsolutePath, + dbSchemaVersion: DB_SCHEMA_VERSION as DbSchemaVersion, + labelSetId, + labelSetVersion, + // Leave registerSignalHandlers enabled (default true) + }); + + expect(sigtermHandler).toBeTypeOf("function"); + sigtermHandler?.(); + await new Promise((r) => setTimeout(r, 0)); + + expect(unhandledRejection).not.toHaveBeenCalled(); + + onceSpy.mockRestore(); + // Cleanup (may still throw due to closeHttpServerImpl); swallow for this test. + await localHandle.close().catch(() => {}); + }); +}); + describe("ENSRainbowServer (pending state smoke test)", () => { it("createPending returns a server with isReady() === false and heal throwing DbNotReadyError", async () => { const server = ENSRainbowServer.createPending(); @@ -101,3 +203,68 @@ describe("ENSRainbowServer (pending state smoke test)", () => { }); }); +describe("downloadAndExtractDatabase (stale dbSubdir cleanup)", () => { + const testDataDir = resolve(process.cwd(), "test-data-entrypoint-extract"); + const downloadTempDir = join(testDataDir, ".download-temp"); + const dbSchemaVersion = DB_SCHEMA_VERSION as DbSchemaVersion; + const labelSetId = buildLabelSetId("entrypoint-extract-test"); + const labelSetVersion = buildLabelSetVersion(0); + const dbSubdir = join(testDataDir, `data-${labelSetId}_${labelSetVersion}`); + + beforeEach(async () => { + await rm(testDataDir, { recursive: true, force: true }); + await mkdir(testDataDir, { recursive: true }); + await mkdir(dbSubdir, { recursive: true }); + await writeFile(join(dbSubdir, "STALE_FILE"), "stale"); + }); + + afterEach(async () => { + await rm(testDataDir, { recursive: true, force: true }); + }); + + it("removes existing dbSubdir before spawning tar", async () => { + let tarSawDbSubdir = true; + spawnImpl = (command: string) => { + const child = new EventEmitter() as any; + child.exitCode = null; + child.signalCode = null; + child.kill = () => true; + + queueMicrotask(async () => { + try { + if (command === "bash") { + const archivePath = join( + downloadTempDir, + "databases", + String(dbSchemaVersion), + `${labelSetId}_${labelSetVersion}.tgz`, + ); + await mkdir(dirname(archivePath), { recursive: true }); + await writeFile(archivePath, "not-a-real-tarball"); + } else if (command === "tar") { + tarSawDbSubdir = existsSync(dbSubdir); + } + + child.exitCode = 0; + child.emit("exit", 0, null); + } catch (error) { + child.emit("error", error); + } + }); + + return child; + }; + + await __TESTING__.downloadAndExtractDatabase({ + dataDir: testDataDir, + dbSchemaVersion, + labelSetId, + labelSetVersion, + downloadTempDir, + signal: new AbortController().signal, + }); + + expect(tarSawDbSubdir).toBe(false); + }); +}); + diff --git a/apps/ensrainbow/src/commands/entrypoint-command.ts b/apps/ensrainbow/src/commands/entrypoint-command.ts index 849772bfcd..56f01af414 100644 --- a/apps/ensrainbow/src/commands/entrypoint-command.ts +++ b/apps/ensrainbow/src/commands/entrypoint-command.ts @@ -143,8 +143,14 @@ export async function entrypointCommand( }; if (options.registerSignalHandlers !== false) { - process.on("SIGTERM", close); - process.on("SIGINT", close); + const closeFromSignal = () => { + // Node does not await signal handlers; never allow shutdown errors to become + // unhandled promise rejections during process teardown. + void close().catch(() => {}); + }; + + process.once("SIGTERM", closeFromSignal); + process.once("SIGINT", closeFromSignal); } const bootstrapComplete = new Promise((resolvePromise) => { @@ -336,6 +342,10 @@ async function downloadAndExtractDatabase(params: DownloadAndExtractParams): Pro logger.info(`Extracting ${archivePath} into ${dataDir}`); mkdirSync(dataDir, { recursive: true }); + // If a previous bootstrap attempt was aborted mid-extraction, tar won't remove already-written + // files and a partial database can remain permanently corrupt. Clear the target before extract. + const dbSubdir = join(dataDir, `data-${labelSetId}_${labelSetVersion}`); + rmSync(dbSubdir, { recursive: true, force: true }); await spawnChild( "tar", ["-xzf", archivePath, "-C", dataDir, "--strip-components=1"], @@ -346,6 +356,10 @@ async function downloadAndExtractDatabase(params: DownloadAndExtractParams): Pro rmSync(downloadTempDir, { recursive: true, force: true }); } +export const __TESTING__ = { + downloadAndExtractDatabase, +}; + /** * Resolve the absolute path to `download-prebuilt-database.sh`. * diff --git a/apps/ensrainbow/src/commands/server-command.test.ts b/apps/ensrainbow/src/commands/server-command.test.ts index e91085e2a7..76e667421c 100644 --- a/apps/ensrainbow/src/commands/server-command.test.ts +++ b/apps/ensrainbow/src/commands/server-command.test.ts @@ -11,6 +11,7 @@ import { buildEnsRainbowPublicConfig } from "@/config/public"; import { createApi } from "@/lib/api"; import { ENSRainbowDB } from "@/lib/database"; import { buildDbConfig, ENSRainbowServer } from "@/lib/server"; +import { closeHttpServer } from "@/utils/http-server"; describe("Server Command Tests", () => { let db: ENSRainbowDB; @@ -57,7 +58,7 @@ describe("Server Command Tests", () => { afterAll(async () => { // Cleanup try { - if (server) await server.close(); + if (server) await closeHttpServer(server); if (db) await db.close(); await fs.rm(TEST_DB_DIR, { recursive: true, force: true }); } catch (error) { @@ -219,7 +220,7 @@ describe("Server Command Tests", () => { afterAll(async () => { try { - if (pendingServer) await pendingServer.close(); + if (pendingServer) await closeHttpServer(pendingServer); await pendingEnsRainbowServer.close(); } catch (error) { console.error("Pending server cleanup failed:", error); diff --git a/packages/ensrainbow-sdk/src/client.ts b/packages/ensrainbow-sdk/src/client.ts index e8bb56bd0f..79b087b428 100644 --- a/packages/ensrainbow-sdk/src/client.ts +++ b/packages/ensrainbow-sdk/src/client.ts @@ -398,6 +398,14 @@ export class EnsRainbowApiClient implements EnsRainbow.ApiClient { async health(): Promise { const response = await fetch(new URL("/health", this.options.endpointUrl)); + if (!response.ok) { + throw new Error( + `ENSRainbow health check failed (HTTP ${response.status}${ + response.statusText ? ` ${response.statusText}` : "" + })`, + ); + } + return response.json() as Promise; } From ac844cebd913d0f227529d189efaa6056fd9183f Mon Sep 17 00:00:00 2001 From: djstrong Date: Fri, 24 Apr 2026 10:46:48 +0200 Subject: [PATCH 09/11] fix(ensrainbow): improve logging and error handling during database operations - Updated logging in the entrypoint command to use the logger for better consistency and clarity. - Enhanced error handling in the database bootstrap process to ensure proper closure of database connections on abort signals. - Refactored the `close` method in the server class to prevent multiple shutdown calls and ensure safe database closure. - Cleaned up code formatting for improved readability in various functions. --- .../src/commands/entrypoint-command.test.ts | 63 ++++++++++--------- .../src/commands/entrypoint-command.ts | 51 +++++++++------ apps/ensrainbow/src/lib/server.ts | 2 +- apps/ensrainbow/src/utils/http-server.ts | 2 +- packages/ensrainbow-sdk/src/client.test.ts | 3 + packages/ensrainbow-sdk/src/client.ts | 22 ++++--- 6 files changed, 86 insertions(+), 57 deletions(-) diff --git a/apps/ensrainbow/src/commands/entrypoint-command.test.ts b/apps/ensrainbow/src/commands/entrypoint-command.test.ts index 51ddd74f50..985189e2d4 100644 --- a/apps/ensrainbow/src/commands/entrypoint-command.test.ts +++ b/apps/ensrainbow/src/commands/entrypoint-command.test.ts @@ -1,11 +1,12 @@ +import { EventEmitter } from "node:events"; import { existsSync } from "node:fs"; import { mkdir, rm, writeFile } from "node:fs/promises"; -import { EventEmitter } from "node:events"; import { dirname, join, resolve } from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + import { buildLabelSetId, buildLabelSetVersion } from "@ensnode/ensnode-sdk"; import type { EnsRainbow } from "@ensnode/ensrainbow-sdk"; -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import type { AbsolutePath, DbSchemaVersion } from "@/config/types"; import { DB_SCHEMA_VERSION, ENSRainbowDB } from "@/lib/database"; @@ -24,9 +25,9 @@ vi.mock("@/utils/http-server", async () => { }); import { + __TESTING__, DB_READY_MARKER_FILENAME, type EntrypointCommandHandle, - __TESTING__, entrypointCommand, } from "./entrypoint-command"; @@ -156,35 +157,42 @@ describe("entrypointCommand (signal handlers)", () => { }; let sigtermHandler: undefined | (() => void); - const onceSpy = vi - .spyOn(process, "once") - .mockImplementation(((event: string, listener: (...args: any[]) => void) => { - if (event === "SIGTERM") sigtermHandler = listener as () => void; - // Delegate to the original implementation so other listeners still work. - return EventEmitter.prototype.once.call(process, event, listener); - }) as typeof process.once); + const onceSpy = vi.spyOn(process, "once").mockImplementation((( + event: string, + listener: (...args: any[]) => void, + ) => { + if (event === "SIGTERM") sigtermHandler = listener as () => void; + // Delegate to the original implementation so other listeners still work. + return EventEmitter.prototype.once.call(process, event, listener); + }) as typeof process.once); const unhandledRejection = vi.fn(); process.once("unhandledRejection", unhandledRejection); - const localHandle = await entrypointCommand({ - port, - dataDir: testDataDir as AbsolutePath, - dbSchemaVersion: DB_SCHEMA_VERSION as DbSchemaVersion, - labelSetId, - labelSetVersion, - // Leave registerSignalHandlers enabled (default true) - }); - - expect(sigtermHandler).toBeTypeOf("function"); - sigtermHandler?.(); - await new Promise((r) => setTimeout(r, 0)); - - expect(unhandledRejection).not.toHaveBeenCalled(); + let localHandle: EntrypointCommandHandle | undefined; + try { + localHandle = await entrypointCommand({ + port, + dataDir: testDataDir as AbsolutePath, + dbSchemaVersion: DB_SCHEMA_VERSION as DbSchemaVersion, + labelSetId, + labelSetVersion, + // Leave registerSignalHandlers enabled (default true) + }); - onceSpy.mockRestore(); - // Cleanup (may still throw due to closeHttpServerImpl); swallow for this test. - await localHandle.close().catch(() => {}); + expect(sigtermHandler).toBeTypeOf("function"); + sigtermHandler?.(); + // Ensure shutdown chain has settled before asserting on unhandled rejections. + await localHandle.close().catch(() => {}); + + expect(unhandledRejection).not.toHaveBeenCalled(); + } finally { + process.removeListener("unhandledRejection", unhandledRejection); + onceSpy.mockRestore(); + if (localHandle) { + await localHandle.close().catch(() => {}); + } + } }); }); @@ -267,4 +275,3 @@ describe("downloadAndExtractDatabase (stale dbSubdir cleanup)", () => { expect(tarSawDbSubdir).toBe(false); }); }); - diff --git a/apps/ensrainbow/src/commands/entrypoint-command.ts b/apps/ensrainbow/src/commands/entrypoint-command.ts index 56f01af414..e57a43bf5e 100644 --- a/apps/ensrainbow/src/commands/entrypoint-command.ts +++ b/apps/ensrainbow/src/commands/entrypoint-command.ts @@ -92,8 +92,8 @@ export const DB_READY_MARKER_FILENAME = "ensrainbow_db_ready"; export async function entrypointCommand( options: EntrypointCommandOptions, ): Promise { - console.log("ENSRainbow running with config:"); - console.log(stringifyConfig(options, { pretty: true })); + logger.info("ENSRainbow running with config:"); + logger.info(stringifyConfig(options, { pretty: true })); logger.info( `ENSRainbow entrypoint starting HTTP server on port ${options.port} ` + @@ -266,17 +266,38 @@ async function runDbBootstrap( logger.info(`Opening newly extracted database at ${dbSubdir}`); const db = await ENSRainbowDB.open(dbSubdir); - if (signal.aborted) { - await safeClose(db); - throw new BootstrapAbortedError(); - } + let dbAttached = false; + try { + if (signal.aborted) { + throw new BootstrapAbortedError(); + } - await ensRainbowServer.attachDb(db); + await ensRainbowServer.attachDb(db); + dbAttached = true; + + if (signal.aborted) { + throw new BootstrapAbortedError(); + } - // Write marker after a successful attach so restarts can skip the download step. - await writeFile(markerFile, ""); + // Write marker after a successful attach so restarts can skip the download step. + await writeFile(markerFile, ""); - return buildEnsRainbowPublicConfig(await buildDbConfig(ensRainbowServer)); + return buildEnsRainbowPublicConfig(await buildDbConfig(ensRainbowServer)); + } catch (error) { + if (!dbAttached) { + await safeClose(db); + } else if (error instanceof BootstrapAbortedError || signal.aborted) { + try { + await ensRainbowServer.close(); + } catch (closeError) { + logger.warn( + closeError, + "Failed to close server while aborting after DB attach; continuing", + ); + } + } + throw error; + } } function throwIfAborted(signal: AbortSignal): void { @@ -304,8 +325,7 @@ interface DownloadAndExtractParams { } async function downloadAndExtractDatabase(params: DownloadAndExtractParams): Promise { - const { dataDir, dbSchemaVersion, labelSetId, labelSetVersion, downloadTempDir, signal } = - params; + const { dataDir, dbSchemaVersion, labelSetId, labelSetVersion, downloadTempDir, signal } = params; // Clean up any stale state from a previous aborted bootstrap attempt. rmSync(downloadTempDir, { recursive: true, force: true }); @@ -346,12 +366,7 @@ async function downloadAndExtractDatabase(params: DownloadAndExtractParams): Pro // files and a partial database can remain permanently corrupt. Clear the target before extract. const dbSubdir = join(dataDir, `data-${labelSetId}_${labelSetVersion}`); rmSync(dbSubdir, { recursive: true, force: true }); - await spawnChild( - "tar", - ["-xzf", archivePath, "-C", dataDir, "--strip-components=1"], - {}, - signal, - ); + await spawnChild("tar", ["-xzf", archivePath, "-C", dataDir, "--strip-components=1"], {}, signal); rmSync(downloadTempDir, { recursive: true, force: true }); } diff --git a/apps/ensrainbow/src/lib/server.ts b/apps/ensrainbow/src/lib/server.ts index a12eb4d219..ccbee191a4 100644 --- a/apps/ensrainbow/src/lib/server.ts +++ b/apps/ensrainbow/src/lib/server.ts @@ -251,9 +251,9 @@ export class ENSRainbowServer { */ async close(): Promise { const capturedDb = this.db; - if (capturedDb === undefined) return; this.db = undefined; this._serverLabelSet = undefined; + if (capturedDb === undefined) return; await capturedDb.close(); } } diff --git a/apps/ensrainbow/src/utils/http-server.ts b/apps/ensrainbow/src/utils/http-server.ts index 1ff3d011af..cbb9628dcc 100644 --- a/apps/ensrainbow/src/utils/http-server.ts +++ b/apps/ensrainbow/src/utils/http-server.ts @@ -1,5 +1,5 @@ import type { Server as HttpServer } from "node:http"; -import type { Http2Server, Http2SecureServer } from "node:http2"; +import type { Http2SecureServer, Http2Server } from "node:http2"; /** * Promisified wrapper around Node's callback-based `http.Server.close()`. diff --git a/packages/ensrainbow-sdk/src/client.test.ts b/packages/ensrainbow-sdk/src/client.test.ts index bad905a03d..2096ead148 100644 --- a/packages/ensrainbow-sdk/src/client.test.ts +++ b/packages/ensrainbow-sdk/src/client.test.ts @@ -258,6 +258,9 @@ describe("EnsRainbowApiClient", () => { it("should return a positive health check", async () => { mockFetch.mockResolvedValueOnce({ + ok: true, + status: 200, + statusText: "OK", json: () => Promise.resolve({ status: "ok", diff --git a/packages/ensrainbow-sdk/src/client.ts b/packages/ensrainbow-sdk/src/client.ts index 79b087b428..7b906b9e85 100644 --- a/packages/ensrainbow-sdk/src/client.ts +++ b/packages/ensrainbow-sdk/src/client.ts @@ -166,10 +166,7 @@ export namespace EnsRainbow { errorCode: typeof ErrorCode.ServiceUnavailable; } - export type CountResponse = - | CountSuccess - | CountServerError - | CountServiceUnavailableError; + export type CountResponse = CountSuccess | CountServerError | CountServiceUnavailableError; /** * Complete public configuration object for ENSRainbow. @@ -417,17 +414,24 @@ export class EnsRainbowApiClient implements EnsRainbow.ApiClient { * bootstrapping its database. Clients that require a usable database (e.g. ENSIndexer) should * poll this method instead of `health()` during startup. * - * @throws if the server is not ready yet (HTTP 503) or the request otherwise fails — callers are - * expected to retry with backoff. + * @throws if the server is not ready yet (HTTP 503) or if the readiness check otherwise fails + * (e.g. misrouting/404, server error/500). Callers that treat this as a retryable probe should + * retry with backoff. */ async ready(): Promise { const response = await fetch(new URL("/ready", this.options.endpointUrl)); if (!response.ok) { + const statusSuffix = `HTTP ${response.status}${ + response.statusText ? ` ${response.statusText}` : "" + }`; + + if (response.status === 503) { + throw new Error(`ENSRainbow readiness check: service not ready yet (${statusSuffix})`); + } + throw new Error( - `ENSRainbow is not ready yet (HTTP ${response.status}${ - response.statusText ? ` ${response.statusText}` : "" - })`, + `ENSRainbow readiness check failed (${statusSuffix}). This usually indicates a non-readiness issue (e.g. wrong base URL, misrouting, or a server error).`, ); } From 2b498f9bcdf95cac5b1cec54bb419fe352b53a9a Mon Sep 17 00:00:00 2001 From: djstrong Date: Fri, 24 Apr 2026 11:56:35 +0200 Subject: [PATCH 10/11] fix(ensrainbow): refine readiness checks and documentation updates - Updated comments and documentation to clarify the readiness checks and bootstrap process. - Enhanced the `/ready` endpoint to ensure it accurately reflects the state of both the database and public configuration. - Improved clarity in the `EntrypointCommandHandle` interface regarding bootstrap completion and error handling. - Adjusted the CLI options documentation to better describe the post-coercion values. - Minor adjustments to comments in the database bootstrap pipeline for better understanding. --- .../src/lib/ensrainbow/singleton.ts | 4 +- apps/ensrainbow/src/cli.ts | 6 +- .../src/commands/entrypoint-command.ts | 76 +++++-------------- apps/ensrainbow/src/lib/api.ts | 9 +-- apps/ensrainbow/src/lib/server.ts | 8 +- .../docs/ensrainbow/concepts/glossary.mdx | 4 +- .../src/content/docs/ensrainbow/index.mdx | 2 +- .../content/docs/ensrainbow/usage/index.mdx | 2 +- 8 files changed, 34 insertions(+), 77 deletions(-) diff --git a/apps/ensindexer/src/lib/ensrainbow/singleton.ts b/apps/ensindexer/src/lib/ensrainbow/singleton.ts index 11ac5a197f..bf7834497a 100644 --- a/apps/ensindexer/src/lib/ensrainbow/singleton.ts +++ b/apps/ensindexer/src/lib/ensrainbow/singleton.ts @@ -40,8 +40,8 @@ let waitForEnsRainbowToBeReadyPromise: Promise | undefined; * * Note: It may take 30+ minutes for the ENSRainbow instance to become ready in * a cold start scenario. We use retries with a fixed interval between attempts - * for the ENSRainbow health check to allow for ample time for ENSRainbow to - * become ready. + * for the ENSRainbow readiness check to allow for ample time for bootstrap to + * complete. * * @throws When ENSRainbow fails to become ready after all configured retry attempts. * This error will trigger termination of the ENSIndexer process. diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts index 0dcce5aeb9..904a0deecb 100644 --- a/apps/ensrainbow/src/cli.ts +++ b/apps/ensrainbow/src/cli.ts @@ -32,9 +32,9 @@ export interface CLIOptions { /** * yargs-parsed argument shape for the `entrypoint` command. * - * `label-set-id` and `label-set-version` are coerced to their branded types via - * `buildLabelSetId` / `buildLabelSetVersion`, so the CLI layer works with primitive types - * and hands branded values to {@link entrypointCommand}. + * This interface represents the post-coercion values consumed by the handler. + * Runtime coercion is performed by yargs via `buildLabelSetId` and + * `buildLabelSetVersion`. */ interface EntrypointCommandCliArgs { port: number; diff --git a/apps/ensrainbow/src/commands/entrypoint-command.ts b/apps/ensrainbow/src/commands/entrypoint-command.ts index e57a43bf5e..9cd1839428 100644 --- a/apps/ensrainbow/src/commands/entrypoint-command.ts +++ b/apps/ensrainbow/src/commands/entrypoint-command.ts @@ -53,14 +53,12 @@ export interface EntrypointCommandOptions { } /** - * Handle returned by {@link entrypointCommand} so callers (tests, embeddings) can close the - * HTTP server and DB, and observe when the background bootstrap pipeline has finished. + * Handle returned by {@link entrypointCommand}. */ export interface EntrypointCommandHandle { /** - * Resolves once the background bootstrap has either completed successfully or been aborted - * during shutdown. Never rejects: on non-abort failure the process is terminated via - * `process.exit(1)`. + * Resolves when bootstrap finishes or is aborted by shutdown. + * Never rejects: non-abort failures terminate the process via `process.exit(1)`. */ readonly bootstrapComplete: Promise; /** @@ -77,17 +75,7 @@ export interface EntrypointCommandHandle { export const DB_READY_MARKER_FILENAME = "ensrainbow_db_ready"; /** - * Runs the full ENSRainbow container lifecycle: - * - * 1. Starts the Hono HTTP server immediately with an unattached database (so `/health` and - * `/ready` respond right away, and orchestrator health checks don't kill the container). - * 2. Schedules a background bootstrap via `setTimeout(..., 0)` that downloads the pre-built - * database archive, extracts it, validates it (lite), opens LevelDB, and attaches it to the - * server. Any failure in this pipeline calls `process.exit(1)` so the orchestrator restarts - * the container. - * 3. Wires SIGTERM/SIGINT handlers for graceful shutdown. On shutdown the bootstrap is aborted - * (spawned children are killed and any partially-opened LevelDB handle is released) before - * the HTTP server and DB-backed server are closed, so we don't leak locks or prolong exit. + * Starts HTTP immediately, bootstraps DB in the background, and wires graceful shutdown. */ export async function entrypointCommand( options: EntrypointCommandOptions, @@ -110,13 +98,10 @@ export async function entrypointCommand( port: options.port, }); - // Shared between close() and the bootstrap pipeline. `close()` aborts this to kill any - // in-flight spawned child processes and make the bootstrap bail out as soon as possible. + // Shared abort signal for `close()` and bootstrap work. const bootstrapAborter = new AbortController(); - // Resolves as soon as the bootstrap task has settled (success, failure, or abort). Used by - // close() to wait for cleanup of in-flight children and partially-opened DB handles before - // returning. Distinct from `bootstrapComplete`, which only resolves on successful bootstrap. + // Tracks bootstrap task settlement so `close()` can await cleanup. let signalBootstrapSettled!: () => void; const bootstrapSettled = new Promise((resolvePromise) => { signalBootstrapSettled = resolvePromise; @@ -129,9 +114,7 @@ export async function entrypointCommand( logger.info("Shutting down server..."); bootstrapAborter.abort(); try { - // Wait for the bootstrap pipeline to settle (including spawned child cleanup and any - // partially-opened DB being closed) before shutting down the HTTP server and DB, so - // we don't leave child processes or LevelDB locks behind. + // Wait for bootstrap cleanup before closing shared resources. await bootstrapSettled; await closeHttpServer(httpServer); await ensRainbowServer.close(); @@ -144,8 +127,7 @@ export async function entrypointCommand( if (options.registerSignalHandlers !== false) { const closeFromSignal = () => { - // Node does not await signal handlers; never allow shutdown errors to become - // unhandled promise rejections during process teardown. + // Node does not await signal handlers; swallow errors to avoid unhandled rejections. void close().catch(() => {}); }; @@ -154,7 +136,7 @@ export async function entrypointCommand( } const bootstrapComplete = new Promise((resolvePromise) => { - // Schedule the bootstrap on the next tick so the HTTP server can accept connections first. + // Defer bootstrap so the HTTP server starts accepting requests first. setTimeout(() => { runDbBootstrap(options, ensRainbowServer, bootstrapAborter.signal) .then((publicConfig) => { @@ -183,14 +165,10 @@ export async function entrypointCommand( } /** - * Full database bootstrap pipeline. Idempotent: if the marker file already exists and the - * on-disk database passes lite validation, the download and extraction steps are skipped. - * - * Honors `signal`: between steps we check for abort and, if a DB has been opened but not yet - * attached to the server, close it so the LevelDB lock is released promptly. Once the DB is - * attached to `ensRainbowServer`, its close is owned by `ENSRainbowServer.close()`. + * Idempotent DB bootstrap pipeline. * - * @returns the `ENSRainbowPublicConfig` for the attached database. + * If marker + DB are present, reuse them; otherwise download + extract. + * Returns the public config for the attached DB. */ async function runDbBootstrap( options: EntrypointCommandOptions, @@ -208,8 +186,7 @@ async function runDbBootstrap( logger.info( `Found existing ENSRainbow marker at ${markerFile}; attempting to open existing database at ${dbSubdir}`, ); - // Track the opened DB and whether ownership has transferred to the server so the catch - // block below can release the correct resources before falling back to re-download. + // Track DB ownership so cleanup chooses the correct close path. let existingDb: ENSRainbowDB | undefined; let existingDbAttached = false; try { @@ -226,11 +203,7 @@ async function runDbBootstrap( if (error instanceof BootstrapAbortedError || signal.aborted) { throw error; } - // Release the DB handle (so the LevelDB LOCK is freed before we re-extract into the - // same path), and wipe the stale on-disk files so the tar re-extraction starts from a - // clean state. If attach succeeded, ownership has transferred to the server, so route - // the close through `ensRainbowServer.close()` which also resets its internal state so - // the re-downloaded DB can be re-attached below. + // Ensure LevelDB lock is released before fallback re-extract. if (existingDbAttached) { try { await ensRainbowServer.close(); @@ -248,7 +221,7 @@ async function runDbBootstrap( error, "Existing ENSRainbow database failed to open or validate; re-downloading from scratch", ); - // Fall through to re-download below. + // Fall through to re-download. } } @@ -279,7 +252,7 @@ async function runDbBootstrap( throw new BootstrapAbortedError(); } - // Write marker after a successful attach so restarts can skip the download step. + // Write marker only after a successful attach. await writeFile(markerFile, ""); return buildEnsRainbowPublicConfig(await buildDbConfig(ensRainbowServer)); @@ -327,7 +300,7 @@ interface DownloadAndExtractParams { async function downloadAndExtractDatabase(params: DownloadAndExtractParams): Promise { const { dataDir, dbSchemaVersion, labelSetId, labelSetVersion, downloadTempDir, signal } = params; - // Clean up any stale state from a previous aborted bootstrap attempt. + // Clean stale state from previous aborted attempts. rmSync(downloadTempDir, { recursive: true, force: true }); mkdirSync(downloadTempDir, { recursive: true }); @@ -362,8 +335,7 @@ async function downloadAndExtractDatabase(params: DownloadAndExtractParams): Pro logger.info(`Extracting ${archivePath} into ${dataDir}`); mkdirSync(dataDir, { recursive: true }); - // If a previous bootstrap attempt was aborted mid-extraction, tar won't remove already-written - // files and a partial database can remain permanently corrupt. Clear the target before extract. + // Ensure extraction target is clean; tar does not delete stale partial files. const dbSubdir = join(dataDir, `data-${labelSetId}_${labelSetVersion}`); rmSync(dbSubdir, { recursive: true, force: true }); await spawnChild("tar", ["-xzf", archivePath, "-C", dataDir, "--strip-components=1"], {}, signal); @@ -376,15 +348,11 @@ export const __TESTING__ = { }; /** - * Resolve the absolute path to `download-prebuilt-database.sh`. - * - * The compiled command lives at `apps/ensrainbow/src/commands/entrypoint-command.ts` in source - * and at `dist/commands/entrypoint-command.js` at runtime; in both cases the sibling `scripts` - * directory is two levels up. + * Resolve absolute path to `download-prebuilt-database.sh`. */ function resolveDownloadScriptPath(): string { const here = dirname(fileURLToPath(import.meta.url)); - // From `src/commands/` or `dist/commands/` go up two levels to reach the app root. + // From `src/commands` or `dist/commands`, go up two levels to app root. return resolve(here, "..", "..", "scripts", "download-prebuilt-database.sh"); } @@ -405,9 +373,7 @@ function spawnChild( env: { ...process.env, ...extraEnv }, }); - // On abort: send SIGTERM for a graceful exit, and escalate to SIGKILL after a grace period - // if the child is still alive. The `unref`'d timeout ensures we don't hold the event loop - // open if the child exits cleanly before the grace period elapses. + // On abort: SIGTERM first, then SIGKILL after a grace period. let killTimer: NodeJS.Timeout | undefined; const onAbort = () => { if (child.exitCode !== null || child.signalCode !== null) return; diff --git a/apps/ensrainbow/src/lib/api.ts b/apps/ensrainbow/src/lib/api.ts index 4dd78a6534..53f9c966f9 100644 --- a/apps/ensrainbow/src/lib/api.ts +++ b/apps/ensrainbow/src/lib/api.ts @@ -111,8 +111,7 @@ export function createApi( const result = await server.heal(labelhash, clientLabelSet); return c.json(result, result.errorCode); } catch (error) { - // Handles the race where isReady() was true at route entry but the server transitioned - // out of ready (e.g. during shutdown). + // Handle readiness races during shutdown. if (error instanceof DbNotReadyError) { return c.json(buildServiceUnavailableBody(), ErrorCode.ServiceUnavailable); } @@ -126,11 +125,7 @@ export function createApi( }); api.get("/ready", (c: HonoContext) => { - // Require both the database to be attached AND the public config to be populated so that - // `/ready === 200` is a true gate for `/v1/heal`, `/v1/labels/count`, and `/v1/config`. - // Without the publicConfigSupplier check there is a brief window inside the entrypoint - // bootstrap where attachDb() has flipped isReady() to true but cachedPublicConfig is still - // null, so /ready would report ok while /v1/config still returns 503. + // Require both DB attach and config publication to avoid a transient false-ready state. if (!server.isReady() || publicConfigSupplier() === null) { return c.json(buildServiceUnavailableBody(), ErrorCode.ServiceUnavailable); } diff --git a/apps/ensrainbow/src/lib/server.ts b/apps/ensrainbow/src/lib/server.ts index ccbee191a4..a3d032e2e5 100644 --- a/apps/ensrainbow/src/lib/server.ts +++ b/apps/ensrainbow/src/lib/server.ts @@ -242,12 +242,8 @@ export class ENSRainbowServer { /** * Closes the attached database (if any). Safe to call on a pending server. * - * Flips readiness (`this.db`/`this._serverLabelSet` → `undefined`) BEFORE awaiting the - * underlying DB close so concurrent handlers checking `isReady()` or calling `requireDb()` - * see the "not ready" state immediately and don't obtain a handle to a DB that is being - * torn down. Handlers already past `requireDb()` still hold their local reference, but new - * ones get a clean `DbNotReadyError` (mapped to 503 at the API layer) instead of racing - * LevelDB mid-close. + * Resets readiness before awaiting DB close so new handlers fail fast with + * `DbNotReadyError` instead of racing an in-progress teardown. */ async close(): Promise { const capturedDb = this.db; diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/glossary.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/glossary.mdx index f8fdebc85e..1b23c19bb9 100644 --- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/glossary.mdx +++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/glossary.mdx @@ -51,7 +51,7 @@ Non-negative integer that monotonically increases when new labelhash-to-label ma ## Healable Count -Total number of labels that can currently be healed by the running server. Exposed via `/count`. +Total number of labels that can currently be healed by the running server. Exposed via `/v1/labels/count`. **Example:** `7 892 001` @@ -61,7 +61,7 @@ High-level outcome of an API call – either `success` or `error`. ## Error Code -HTTP-style numeric code describing the error (`400`, `404`, `500`). +HTTP-style numeric code describing the error (`400`, `404`, `500`, `503`). ## Rainbow Table diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/index.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/index.mdx index 83b73ca930..e59e09e090 100644 --- a/docs/ensnode.io/src/content/docs/ensrainbow/index.mdx +++ b/docs/ensnode.io/src/content/docs/ensrainbow/index.mdx @@ -92,7 +92,7 @@ Get type-safe integration with built-in error handling and caching. Perfect for /> ### Deploy with Docker -Run your own ENSRainbow instance for control, privacy, or high-volume usage. Starts quickly with test data. +Run your own ENSRainbow instance for control, privacy, or high-volume usage. The HTTP server starts immediately; poll `/ready` to gate traffic until the database bootstrap completes. Date: Fri, 24 Apr 2026 12:18:04 +0200 Subject: [PATCH 11/11] fix(ensrainbow): enhance public config readiness handling - Updated the PublicConfigBuilder to await readiness before fetching ENSRainbow config, improving the reliability of public configuration retrieval. - Added error handling to ensure that config fetching does not proceed if the readiness check fails. - Enhanced unit tests to cover new readiness logic and error scenarios, ensuring robust behavior during configuration retrieval. --- .../public-config-builder.test.ts | 40 +++++++++++++++++++ .../public-config-builder.ts | 9 ++++- .../lib/public-config-builder/singleton.ts | 7 +++- .../src/commands/entrypoint-command.ts | 26 +++++++++--- .../ensrainbow/src/commands/server-command.ts | 22 ++++++++-- apps/ensrainbow/src/lib/server.ts | 28 ++++++++----- packages/ensrainbow-sdk/src/client.test.ts | 10 +++++ 7 files changed, 122 insertions(+), 20 deletions(-) diff --git a/apps/ensindexer/src/lib/public-config-builder/public-config-builder.test.ts b/apps/ensindexer/src/lib/public-config-builder/public-config-builder.test.ts index 56b396976c..145813202d 100644 --- a/apps/ensindexer/src/lib/public-config-builder/public-config-builder.test.ts +++ b/apps/ensindexer/src/lib/public-config-builder/public-config-builder.test.ts @@ -228,9 +228,49 @@ describe("PublicConfigBuilder", () => { expect(result).toBe(customConfig); expect(result.isSubgraphCompatible).toBe(false); }); + + it("awaits readiness before fetching ENSRainbow config", async () => { + const callOrder: string[] = []; + const ensRainbowClientMock = { + config: vi.fn().mockImplementation(async () => { + callOrder.push("config"); + return mockEnsRainbowConfig; + }), + } as unknown as EnsRainbow.ApiClient; + const waitForReady = vi.fn().mockImplementation(async () => { + callOrder.push("wait"); + }); + + setupStandardMocks(); + const mockPublicConfig = createMockPublicConfig(); + vi.mocked(validateEnsIndexerPublicConfig).mockReturnValue(mockPublicConfig); + + const builder = new PublicConfigBuilder(ensRainbowClientMock, waitForReady); + const result = await builder.getPublicConfig(); + + expect(waitForReady).toHaveBeenCalledTimes(1); + expect(ensRainbowClientMock.config).toHaveBeenCalledTimes(1); + expect(callOrder).toEqual(["wait", "config"]); + expect(result).toBe(mockPublicConfig); + }); }); describe("getPublicConfig() - error handling", () => { + it("throws when readiness check fails and does not call config()", async () => { + const readinessError = new Error("ENSRainbow not ready"); + const ensRainbowClientMock = { + config: vi.fn(), + } as unknown as EnsRainbow.ApiClient; + const waitForReady = vi.fn().mockRejectedValue(readinessError); + + const builder = new PublicConfigBuilder(ensRainbowClientMock, waitForReady); + + await expect(builder.getPublicConfig()).rejects.toThrow(readinessError); + expect(waitForReady).toHaveBeenCalledTimes(1); + expect(ensRainbowClientMock.config).not.toHaveBeenCalled(); + expect(validateEnsIndexerPublicConfig).not.toHaveBeenCalled(); + }); + it("throws when ENSRainbow client config() fails", async () => { // Arrange const ensRainbowError = new Error("ENSRainbow service unavailable"); diff --git a/apps/ensindexer/src/lib/public-config-builder/public-config-builder.ts b/apps/ensindexer/src/lib/public-config-builder/public-config-builder.ts index a34a8465f0..a57e3002af 100644 --- a/apps/ensindexer/src/lib/public-config-builder/public-config-builder.ts +++ b/apps/ensindexer/src/lib/public-config-builder/public-config-builder.ts @@ -18,6 +18,7 @@ export class PublicConfigBuilder { * the ENSIndexer Public Config. */ private ensRainbowClient: EnsRainbow.ApiClient; + private waitForEnsRainbowReady: () => Promise; /** * Immutable ENSIndexer Public Config @@ -30,8 +31,12 @@ export class PublicConfigBuilder { /** * @param ensRainbowClient ENSRainbow Client instance used to fetch ENSRainbow Public Config */ - constructor(ensRainbowClient: EnsRainbow.ApiClient) { + constructor( + ensRainbowClient: EnsRainbow.ApiClient, + waitForEnsRainbowReady: () => Promise = async () => {}, + ) { this.ensRainbowClient = ensRainbowClient; + this.waitForEnsRainbowReady = waitForEnsRainbowReady; } /** @@ -45,6 +50,8 @@ export class PublicConfigBuilder { */ async getPublicConfig(): Promise { if (typeof this.immutablePublicConfig === "undefined") { + await this.waitForEnsRainbowReady(); + const [versionInfo, ensRainbowPublicConfig] = await Promise.all([ this.getEnsIndexerVersionInfo(), this.ensRainbowClient.config(), diff --git a/apps/ensindexer/src/lib/public-config-builder/singleton.ts b/apps/ensindexer/src/lib/public-config-builder/singleton.ts index 606fa7ca85..5252be2910 100644 --- a/apps/ensindexer/src/lib/public-config-builder/singleton.ts +++ b/apps/ensindexer/src/lib/public-config-builder/singleton.ts @@ -1,4 +1,7 @@ -import { ensRainbowClient } from "@/lib/ensrainbow/singleton"; +import { ensRainbowClient, waitForEnsRainbowToBeReady } from "@/lib/ensrainbow/singleton"; import { PublicConfigBuilder } from "@/lib/public-config-builder/public-config-builder"; -export const publicConfigBuilder = new PublicConfigBuilder(ensRainbowClient); +export const publicConfigBuilder = new PublicConfigBuilder( + ensRainbowClient, + waitForEnsRainbowToBeReady, +); diff --git a/apps/ensrainbow/src/commands/entrypoint-command.ts b/apps/ensrainbow/src/commands/entrypoint-command.ts index 9cd1839428..5470997e36 100644 --- a/apps/ensrainbow/src/commands/entrypoint-command.ts +++ b/apps/ensrainbow/src/commands/entrypoint-command.ts @@ -113,16 +113,32 @@ export async function entrypointCommand( alreadyClosed = true; logger.info("Shutting down server..."); bootstrapAborter.abort(); + // Wait for bootstrap cleanup before closing shared resources. + await bootstrapSettled; + + let shutdownError: unknown; + try { - // Wait for bootstrap cleanup before closing shared resources. - await bootstrapSettled; await closeHttpServer(httpServer); + } catch (error) { + shutdownError = error; + logger.error(error, "Failed to close HTTP server during shutdown"); + } + + try { await ensRainbowServer.close(); - logger.info("Server shutdown complete"); } catch (error) { - logger.error(error, "Error during shutdown:"); - throw error; + if (shutdownError === undefined) { + shutdownError = error; + } + logger.error(error, "Failed to close ENSRainbow server/database during shutdown"); } + + if (shutdownError !== undefined) { + throw shutdownError; + } + + logger.info("Server shutdown complete"); }; if (options.registerSignalHandlers !== false) { diff --git a/apps/ensrainbow/src/commands/server-command.ts b/apps/ensrainbow/src/commands/server-command.ts index 286b4f9efc..02fa917d25 100644 --- a/apps/ensrainbow/src/commands/server-command.ts +++ b/apps/ensrainbow/src/commands/server-command.ts @@ -47,13 +47,29 @@ export async function serverCommand(options: ServerCommandOptions): Promise { + let hadShutdownError = false; + try { await closeHttpServer(httpServer); - await db.close(); - logger.info("Server shutdown complete"); } catch (error) { - logger.error(error, "Error during shutdown:"); + hadShutdownError = true; + logger.error(error, "Failed to close HTTP server during shutdown"); + } finally { + try { + await db.close(); + } catch (error) { + hadShutdownError = true; + logger.error(error, "Failed to close database during shutdown"); + } + } + + if (hadShutdownError) { + process.exitCode = 1; + logger.error("Server shutdown completed with errors"); + return; } + + logger.info("Server shutdown complete"); })(); return shutdownPromise; diff --git a/apps/ensrainbow/src/lib/server.ts b/apps/ensrainbow/src/lib/server.ts index a3d032e2e5..d36510ed2b 100644 --- a/apps/ensrainbow/src/lib/server.ts +++ b/apps/ensrainbow/src/lib/server.ts @@ -21,11 +21,7 @@ import { logger } from "@/utils/logger"; * @throws Error if the server is not ready or the record count cannot be read from the database. */ export async function buildDbConfig(server: ENSRainbowServer): Promise { - if (!server.isReady()) { - throw new Error( - "Cannot build DB config: ENSRainbowServer has no database attached yet (still bootstrapping)", - ); - } + const { serverLabelSet } = server.requireReady(); const countResult = await server.labelCount(); if (countResult.status === StatusCode.Error) { @@ -34,9 +30,8 @@ export async function buildDbConfig(server: ENSRainbowServer): Promise ); } - // isReady() was true so serverLabelSet is defined. return { - labelSet: server.serverLabelSet as EnsRainbowServerLabelSet, + labelSet: serverLabelSet, recordsCount: countResult.count, }; } @@ -134,6 +129,22 @@ export class ENSRainbowServer { return this.db; } + /** + * Returns both ready-state values or throws if the server is not ready. + * + * Centralizes the invariant established by {@link attachDb}: once `db` is attached, + * `_serverLabelSet` must also be present. + */ + public requireReady(): { db: ENSRainbowDB; serverLabelSet: EnsRainbowServerLabelSet } { + const db = this.requireDb(); + if (this._serverLabelSet === undefined) { + throw new Error( + "ENSRainbowServer invariant violation: database is attached but server label set is missing", + ); + } + return { db, serverLabelSet: this._serverLabelSet }; + } + /** * Determines if a versioned rainbow record should be treated as unhealable * based on the client's label set version requirements, ignoring the label set ID. @@ -153,8 +164,7 @@ export class ENSRainbowServer { labelHash: LabelHash, clientLabelSet: EnsRainbowClientLabelSet, ): Promise { - const db = this.requireDb(); - const serverLabelSet = this._serverLabelSet as EnsRainbowServerLabelSet; + const { db, serverLabelSet } = this.requireReady(); let labelHashBytes: ByteArray; try { diff --git a/packages/ensrainbow-sdk/src/client.test.ts b/packages/ensrainbow-sdk/src/client.test.ts index 2096ead148..ab01baa90f 100644 --- a/packages/ensrainbow-sdk/src/client.test.ts +++ b/packages/ensrainbow-sdk/src/client.test.ts @@ -414,4 +414,14 @@ describe("HealResponse cacheability", () => { expect(isCacheableHealResponse(response)).toBe(false); }); + + it("should consider HealServiceUnavailableError responses not cacheable", async () => { + const response: EnsRainbow.HealServiceUnavailableError = { + status: StatusCode.Error, + error: "ENSRainbow is still bootstrapping its database", + errorCode: ErrorCode.ServiceUnavailable, + }; + + expect(isCacheableHealResponse(response)).toBe(false); + }); });