From cb4af07c26f1a93c055f01004be7f590634c60f9 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Thu, 7 May 2026 20:44:58 -0600 Subject: [PATCH 01/24] Scaffold static block storage backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a slot-keyed durable archive (`StaticBlockStore`) for finalized blinded blocks, integrated into `migrate_database` as a second pass that runs alongside the existing cold-state migration. File format and manifest persistence remain `todo!()` — this is the wiring scaffold. - New `DBColumn::BeaconBlockSlot` reverse index (root → slot). - `HotColdDB::get_block_with` and `block_exists` fall through to the archive after a hot-KV miss. - Archival driven inside `migrate_database`: cold ops (BeaconBlockRoots + BeaconBlockSlot) commit atomically, hot deletes after split commit. - Skip-slot dedup seeded from `BeaconBlockRoots[current_split.slot - 1]`, with `Hash256::ZERO` for the genesis case. - Spec at `specs/static-blocks.md`. --- beacon_node/store/src/hot_cold_store.rs | 127 ++++++++++++++++++--- beacon_node/store/src/lib.rs | 9 ++ beacon_node/store/src/static_blocks.rs | 37 ++++++ specs/static-blocks.md | 146 ++++++++++++++++++++++++ 4 files changed, 306 insertions(+), 13 deletions(-) create mode 100644 beacon_node/store/src/static_blocks.rs create mode 100644 specs/static-blocks.md diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index e9b9de76e61..291fdadcf51 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -12,6 +12,7 @@ use crate::metadata::{ SCHEMA_VERSION_KEY, SPLIT_KEY, STATE_UPPER_LIMIT_NO_RETAIN, SchemaVersion, }; use crate::state_cache::{PutStateOutcome, StateCache}; +use crate::static_blocks::StaticBlockStore; use crate::{ BlobSidecarListFromRoot, DBColumn, DatabaseBlock, Error, ItemStore, KeyValueStoreOp, StoreItem, StoreOp, get_data_column_key, @@ -71,6 +72,10 @@ pub struct HotColdDB, Cold: ItemStore> { /// /// The hot database also contains all blocks. pub hot_db: Hot, + /// Optional append-only file-backed store for finalized blinded blocks. When `Some`, + /// reads fall through to it after missing in `hot_db`. When `None` (legacy mode), all + /// finalized blinded blocks remain in `hot_db` as today. + pub static_blocks: Option>, /// LRU cache of deserialized blocks and blobs. Updated whenever a block or blob is loaded. block_cache: Option>>, /// Cache of beacon states. @@ -236,6 +241,7 @@ impl HotColdDB, MemoryStore> { cold_db: MemoryStore::open(), blobs_db: MemoryStore::open(), hot_db: MemoryStore::open(), + static_blocks: None, block_cache: NonZeroUsize::new(config.block_cache_size) .map(BlockCache::new) .map(Mutex::new), @@ -290,6 +296,7 @@ impl HotColdDB, BeaconNodeBackend> { blobs_db: BeaconNodeBackend::open(&config, blobs_db_path)?, cold_db: BeaconNodeBackend::open(&config, cold_path)?, hot_db, + static_blocks: None, block_cache: NonZeroUsize::new(config.block_cache_size) .map(BlockCache::new) .map(Mutex::new), @@ -731,11 +738,38 @@ impl, Cold: ItemStore> HotColdDB block_root: &Hash256, decoder: impl FnOnce(&[u8]) -> Result, ssz::DecodeError>, ) -> Result>, Error> { - self.hot_db + // Hot KV first: covers both unfinalized blocks and (in legacy / pre-migration mode) + // all finalized blocks. After migration, finalized blinded bodies are absent here + // and we fall through to the static block store via the cold-KV reverse index. + if let Some(block_bytes) = self + .hot_db .get_bytes(DBColumn::BeaconBlock, block_root.as_slice())? - .map(|block_bytes| decoder(&block_bytes)) - .transpose() - .map_err(|e| e.into()) + { + return decoder(&block_bytes).map(Some).map_err(Into::into); + } + if let Some(static_blocks) = &self.static_blocks + && let Some(slot) = self.get_finalized_blinded_block_slot(block_root)? + && let Some(block_bytes) = static_blocks.get(slot)? + { + return decoder(&block_bytes).map(Some).map_err(Into::into); + } + Ok(None) + } + + /// Look up the slot of a finalized blinded block by its root, using the cold-KV reverse + /// index in [`DBColumn::BeaconBlockSlot`]. Returns `Ok(None)` if the root is unknown to + /// the cold KV (i.e. the block has not been sealed into a static file). + /// + /// Populated by [`Self::seal_era`]. + fn get_finalized_blinded_block_slot( + &self, + block_root: &Hash256, + ) -> Result, Error> { + Ok(self + .cold_db + .get_bytes(DBColumn::BeaconBlockSlot, block_root.as_slice())? + .map(|bytes| Slot::from_ssz_bytes(&bytes)) + .transpose()?) } pub fn get_payload_envelope( @@ -938,8 +972,16 @@ impl, Cold: ItemStore> HotColdDB /// Determine whether a block exists in the database. pub fn block_exists(&self, block_root: &Hash256) -> Result { - self.hot_db - .key_exists(DBColumn::BeaconBlock, block_root.as_slice()) + if self + .hot_db + .key_exists(DBColumn::BeaconBlock, block_root.as_slice())? + { + return Ok(true); + } + if self.static_blocks.is_some() { + return Ok(self.get_finalized_blinded_block_slot(block_root)?.is_some()); + } + Ok(false) } /// Delete a block from the store and the block cache. @@ -3608,6 +3650,7 @@ pub fn migrate_database, Cold: ItemStore>( } let mut cold_db_block_ops = vec![]; + let mut hot_db_block_delete_ops = vec![]; // Iterate in descending order until the current split slot let state_roots: Vec<_> = @@ -3617,7 +3660,7 @@ pub fn migrate_database, Cold: ItemStore>( })?; // Then, iterate states in slot ascending order, as they are stored wrt previous states. - for (block_root, state_root, slot) in state_roots.into_iter().rev() { + for (block_root, state_root, slot) in state_roots.iter().rev() { // Store the slot to block root mapping. cold_db_block_ops.push(KeyValueStoreOp::PutKeyValue( DBColumn::BeaconBlockRoots, @@ -3628,7 +3671,7 @@ pub fn migrate_database, Cold: ItemStore>( // Do not try to store states if a restore point is yet to be stored, or will never be // stored (see `STATE_UPPER_LIMIT_NO_RETAIN`). Make an exception for the genesis state // which always needs to be copied from the hot DB to the freezer and should not be deleted. - if slot != 0 && slot < anchor_info.state_upper_limit { + if *slot != 0 && *slot < anchor_info.state_upper_limit { continue; } @@ -3637,7 +3680,7 @@ pub fn migrate_database, Cold: ItemStore>( // Only store the cold state if it's on a diff boundary. // Calling `store_cold_state_summary` instead of `store_cold_state` for those allows us // to skip loading many hot states. - if let StorageStrategy::ReplayFrom(from) = store.cold_storage_strategy(slot)? { + if let StorageStrategy::ReplayFrom(from) = store.cold_storage_strategy(*slot)? { // Store slot -> state_root and state_root -> slot mappings. debug!( strategy = "replay", @@ -3645,15 +3688,15 @@ pub fn migrate_database, Cold: ItemStore>( %slot, "Storing cold state" ); - store.store_cold_state_summary(&state_root, slot, &mut cold_db_state_ops)?; + store.store_cold_state_summary(state_root, *slot, &mut cold_db_state_ops)?; } else { // This is some state that we want to migrate to the freezer db. // There is no reason to cache this state. let state: BeaconState = store - .get_hot_state(&state_root, false)? - .ok_or(HotColdDBError::MissingStateToFreeze(state_root))?; + .get_hot_state(state_root, false)? + .ok_or(HotColdDBError::MissingStateToFreeze(*state_root))?; - store.store_cold_state(&state_root, &state, &mut cold_db_state_ops)?; + store.store_cold_state(state_root, &state, &mut cold_db_state_ops)?; } // Cold states are diffed with respect to each other, so we need to finish writing previous @@ -3661,6 +3704,57 @@ pub fn migrate_database, Cold: ItemStore>( store.cold_db.do_atomically(cold_db_state_ops)?; } + // Hand newly-finalized blinded blocks to the static archive. `RootsIterator` yields + // the same `block_root` for every skipped slot covered by that block, so we dedupe + // on the root. The seed handles the boundary case where the migration's first slot + // is a skip-slot extension of a block archived in a previous migration: reading + // `BeaconBlockRoots[current_split.slot - 1]` gives that block's root, which the + // dedup then matches and skips. `Hash256::ZERO` is a safe sentinel for the genesis + // case — no real block root collides with it. + if let Some(static_blocks) = &store.static_blocks { + // The slot in this range of slots might by a skipped slot. Read the previous block_root + // from the existing slot -> block_root index. + let mut prev_block_root: Hash256 = if current_split.slot > 0 { + let prev_slot = current_split.slot - 1; + store + .get_cold_block_root(prev_slot)? + .ok_or(Error::MigrationError(format!( + "missing BeaconBlockRoots entry for slot {prev_slot}", + )))? + } else { + // For the genesis case set the prev_root to zero to trigger a write + Hash256::ZERO + }; + + for (block_root, _, slot) in state_roots.iter().rev() { + // Previous slot's root is the same, therefore this slot is a skipped slot + if *block_root == prev_block_root { + continue; + } + prev_block_root = *block_root; + + // The new-split block stays in hot KV — it isn't yet finalized below split. + if *slot >= finalized_state.slot() { + continue; + } + + let bytes = store + .hot_db + .get_bytes(DBColumn::BeaconBlock, block_root.as_slice())? + .ok_or(Error::BlockNotFound(*block_root))?; + static_blocks.put(*slot, &bytes)?; + cold_db_block_ops.push(KeyValueStoreOp::PutKeyValue( + DBColumn::BeaconBlockSlot, + block_root.as_slice().to_vec(), + slot.as_ssz_bytes(), + )); + hot_db_block_delete_ops.push(KeyValueStoreOp::DeleteKey( + DBColumn::BeaconBlock, + block_root.as_slice().to_vec(), + )); + } + } + // Warning: Critical section. We have to take care not to put any of the two databases in an // inconsistent state if the OS process dies at any point during the freezing // procedure. @@ -3714,6 +3808,13 @@ pub fn migrate_database, Cold: ItemStore>( finalized_state.clone(), )?; + // Reclaim hot-KV space for blinded bodies now durable in the static archive. Runs + // after split commit so a retried migration never tries to fetch a body that was + // already deleted. A crash between the split commit and this delete leaves the + // bodies in hot KV; reads still succeed via the static archive (the reverse-index + // entry, committed atomically with the split's cold-DB ops, points at it). + store.hot_db.do_atomically(hot_db_block_delete_ops)?; + debug!( slot = %finalized_state.slot(), "Freezer migration complete" diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index bd8caa3ad5b..f2b4a54ded3 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -21,6 +21,7 @@ pub mod metadata; pub mod metrics; pub mod reconstruct; pub mod state_cache; +pub mod static_blocks; pub mod database; pub mod iter; @@ -29,6 +30,7 @@ pub use self::blob_sidecar_list_from_root::BlobSidecarListFromRoot; pub use self::config::StoreConfig; pub use self::hot_cold_store::{HotColdDB, HotStateSummary, Split}; pub use self::memory_store::MemoryStore; +pub use self::static_blocks::StaticBlockStore; pub use crate::metadata::BlobInfo; pub use errors::Error; pub use metadata::AnchorInfo; @@ -344,6 +346,12 @@ pub enum DBColumn { /// necessary to guarantee atomicity of the upgrade migration. #[strum(serialize = "bbx")] BeaconBlockRoots, + /// Mapping from block root to slot for blocks whose blinded bodies have been sealed + /// into static block files. Populated by the era-sealer; consulted by + /// `HotColdDB::get_finalized_blinded_block_slot` to resolve root-keyed reads against + /// the (slot-keyed) `StaticBlockStore`. + #[strum(serialize = "bbs")] + BeaconBlockSlot, /// DEPRECATED. This is the previous column for beacon block roots stored by "chunk index". /// /// Can be removed once schema v22 is buried by a hard fork. @@ -404,6 +412,7 @@ impl DBColumn { Self::OverflowLRUCache => 33, // DEPRECATED Self::BeaconMeta | Self::BeaconBlock + | Self::BeaconBlockSlot | Self::BeaconState | Self::BeaconBlob | Self::BeaconStateSummary diff --git a/beacon_node/store/src/static_blocks.rs b/beacon_node/store/src/static_blocks.rs new file mode 100644 index 00000000000..a94e83a7511 --- /dev/null +++ b/beacon_node/store/src/static_blocks.rs @@ -0,0 +1,37 @@ +//! Slot-keyed durable archive for finalized blinded blocks. +//! +//! `StaticBlockStore` is a black box from `HotColdDB`'s perspective: hand it block bytes, +//! ask it for them back by slot, ask it how far it has durably stored. Era boundaries, +//! file format, manifest layout, sealing, and rename semantics are entirely internal. +//! +//! Contract: +//! - `put(slot, bytes)` is durable on return. The caller is allowed to rely on this for +//! source-of-truth flips (e.g. writing a reverse-index entry, deleting from hot KV). + +use crate::Error; +use std::path::{Path, PathBuf}; +use types::Slot; + +#[derive(Debug)] +pub struct StaticBlockStore { + #[allow(dead_code)] + root_dir: PathBuf, +} + +impl StaticBlockStore { + /// Open the archive rooted at `path`. + pub fn open(_path: &Path) -> Result { + todo!() + } + + /// Read the block at `slot`, if present. + pub fn get(&self, _slot: Slot) -> Result>, Error> { + todo!() + } + + /// Durably store `bytes` at `slot`. Must not return `Ok` until the bytes are recoverable + /// after a crash. + pub fn put(&self, _slot: Slot, _bytes: &[u8]) -> Result<(), Error> { + todo!() + } +} diff --git a/specs/static-blocks.md b/specs/static-blocks.md new file mode 100644 index 00000000000..cb9737099bb --- /dev/null +++ b/specs/static-blocks.md @@ -0,0 +1,146 @@ +# Static Block Storage + +Static-file backend for finalized **blinded** `SignedBeaconBlock` archival. +Slot-indexed, append-only forward. Execution payloads, full blocks, and blobs +are out of scope. + +**Genesis sync only.** Checkpoint sync, historical block backfill, and +late activation on an existing populated node are incompatible and rejected +at startup. + +## API + +A field on `HotColdDB`. Not a `KeyValueStore`. No `Hash256` in the API; the +archive is purely slot-keyed. Eras, manifests, file rotation, fsync ordering, +atomic rename — all internal. + +```rust +fn open(path: &Path) -> Result; +fn get(slot: Slot) -> Result>>; // SSZ-encoded blinded block +fn put(slot: Slot, bytes: &[u8]) -> Result<()>; // durable on return +``` + +`put` durability on return is the only caller-visible contract; the source- +of-truth flip in `migrate_database` relies on it. + +## Interaction with existing DBs + +| Concern | Today | With static blocks | +| --------------------- | ---------------------------------------------------- | ------------------------------------------------------ | +| Blinded body by root | `hot_db[BeaconBlock][root]`, forever | `hot_db` until archived, then `static.get(slot)` | +| Slot → root | `cold_db[BeaconBlockRoots][slot]` | unchanged | +| Root → slot | not stored | **new**: `cold_db[BeaconBlockSlot][root]` (SSZ `Slot`) | +| Execution payload | `hot_db[ExecPayload][root]` / `[PayloadEnvelope]` | unchanged | +| Blobs / data columns | `blobs_db` | unchanged | +| Cold-DB block bodies | none (cold has only indices) | unchanged | +| Backfill | writes blinded bodies to `hot_db`, slot→root to cold | rejected at startup | + +## Read path + +`HotColdDB::get_block_with(root)`: +1. `hot_db[BeaconBlock][root]` — hits unfinalized blocks and blocks not yet + archived. +2. else `cold_db[BeaconBlockSlot][root] -> slot`, then `static.get(slot)`. +3. else `None`. + +`HotColdDB::block_exists` mirrors (1)+(2) without decoding. + +## Write path + +Block archival lives **inside `migrate_database`** as a second pass over the +already-collected `state_roots` vector. The migration's existing loop is +unchanged; a new loop after it walks the same range to drive archival. Both +loops contribute to the same `cold_db_block_ops` batch, so `BeaconBlockRoots` +and `BeaconBlockSlot` are committed atomically. + +``` +migrate_database(finalized_state): + state_roots = RootsIterator(finalized_state).take_while(slot >= current_split.slot) + + # Loop 1 (existing): BeaconBlockRoots puts + cold-state migration. + for (block_root, state_root, slot) in state_roots ascending: + cold_db_block_ops.push(BeaconBlockRoots[slot] = block_root) + ...cold state ops... + + # Loop 2 (new, gated on static_blocks): archival. + if static_blocks: + # Seed from the slot just below the iteration to catch the boundary case + # where current_split.slot is itself a skip-slot extension of a block + # archived in a previous migration. + prev_block_root = cold_db[BeaconBlockRoots][current_split.slot - 1] + or Hash256::ZERO # genesis seed; never collides + for (block_root, _, slot) in state_roots ascending: + if block_root == prev_block_root: continue # skip-slot extension + prev_block_root = block_root + if slot >= finalized_state.slot(): continue # new-split block stays in hot + bytes = hot_db[BeaconBlock][block_root] # must be present + static_blocks.put(slot, bytes) # durable + cold_db_block_ops.push(BeaconBlockSlot[block_root] = slot) + hot_db_block_delete_ops.push(delete BeaconBlock[block_root]) + + # Atomic commit of cold ops (BeaconBlockRoots + BeaconBlockSlot together). + cold_db.do_atomically(cold_db_block_ops) + cold_db.sync() + + # Split commit. + ...write SPLIT_KEY, update in-memory split... + + # Reclaim hot-KV space. + hot_db.do_atomically(hot_db_block_delete_ops) +``` + +### Why the seed catches the boundary + +`RootsIterator` yields the same `block_root` for every slot covered by that +block, including skip-slot extensions. In ascending iteration the **first** +slot of each run is the block's real slot — *except* when the migration +starts inside a run (i.e. `current_split.slot` is itself a skip-slot +extension of a block archived in a previous migration). Reading +`BeaconBlockRoots[current_split.slot - 1]` returns that previous block's root, +the dedup match fires on the first iteration, and we correctly skip. + +If the previous-slot lookup is missing, the cold DB is inconsistent and the +migration aborts with `Error::MigrationError`. + +### Crash semantics + +| Crash window | State after restart | Recovery | +| --------------------------------------------- | --------------------------------------------------------- | ---------------------------------------------- | +| During loops, before cold commit | Nothing committed. | Migration retried fresh. | +| Between cold commit and split commit | Reverse-index committed but split not advanced. | Migration retried; cold puts are idempotent, hot bodies still present. | +| Between split commit and hot delete | Split advanced, reverse-index committed, bodies linger in hot. | Reads still correct (hot returns the same bytes); leaked bodies stay in hot. | + +The last window is a bounded leak (~one migration's worth of bodies, ~32 +blocks) and a rare crash. No automatic recovery in v1; can be addressed later +by a startup scan if it matters in practice. + +## Modes of operation + +| Mode | Behavior | +| ------------------------------------- | ----------------------------------------------------------------------- | +| **Disabled** (default) | `static_blocks: None`. Byte-identical to current. | +| **Genesis sync + static enabled** | Archive grows from slot 0; bodies migrate out of `hot_db` per epoch. | +| **Checkpoint sync + static enabled** | Refused at startup. | +| **Late activation on existing node** | Refused at startup. | + +Late activation is unsupported because there is no persisted "lowest unarchived +slot" — the migration relies on `current_split.slot` as the watermark, so the +prefix below it would never be archived. Operator must reinitialize. + +## Schema + +- New `DBColumn::BeaconBlockSlot` (3-letter tag `bbs`). Key: 32-byte block + root. Value: SSZ-encoded `Slot` (8 bytes). Lives in `cold_db`. +- No changes to `AnchorInfo`, `BlobInfo`, `Split`, or any existing column. +- Schema version bump on the addition. + +## CLI + +- `--store-static-blocks` (default off). Mutually exclusive with + `--checkpoint-sync-url` and any block-backfill flag; node refuses to start + if both are set. Cannot be enabled on a node previously run without it. + +## Coexistence + +Additive. Default paths (no flag) are untouched. The `blobs_db` and the +era-blob backend (see `era-storage.md`) are independent of this. From 9546bd4d85b6da51b04da737c1af30d22038b1cf Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Thu, 7 May 2026 20:46:20 -0600 Subject: [PATCH 02/24] Add era blob storage spec Companion document describing the static-file backend for `BlobSidecar` archival via `.erb` files. Initialization via genesis sync or imported era files; checkpoint sync and P2P blob backfill rejected at startup. --- specs/era-storage.md | 284 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 284 insertions(+) create mode 100644 specs/era-storage.md diff --git a/specs/era-storage.md b/specs/era-storage.md new file mode 100644 index 00000000000..854a9c1d65e --- /dev/null +++ b/specs/era-storage.md @@ -0,0 +1,284 @@ +# Era Blob Storage + +Static-file backend for `BlobSidecar` archival, using E2Store-compatible `.erb` +files. Slot-indexed, append-only forward, sealed in fixed-size eras. + +Stored data is blobs only. Column sidecars are derived on read. + +**Initialization is via genesis sync or import of an existing era set. +Checkpoint sync and P2P blob backfill are incompatible with this backend +and rejected at startup.** + +## Required APIs (active forks: Fulu, Gloas) + +``` +get_blobs(slot).into_columns ≡ + get_data_column_sidecars_from_block( + block, + [compute_cells_and_kzg_proofs(b) for b in blobs] + ) +``` +(consensus-specs/fulu/validator.md) + +### REST (beacon-APIs) + +| Endpoint | blobs_db | era backend | +| - | - | - | +| `GET /eth/v1/beacon/blobs/{block_id}?versioned_hashes=…` | `get_blobs(root)`, HTTP filters by hash | resolve slot → `era.get_blobs(slot)` | +| `GET /eth/v1/debug/beacon/data_column_sidecars/{block_id}?indices=…` | `get_data_columns(root)` | resolve slot → `era.get_blobs(slot).into_columns` | + +### P2P Req/Resp (Fulu, carried into Gloas) + +| Method | blobs_db | era backend | +| - | - | - | +| `BlobSidecarsByRange` | blobs_db per slot | `era.get_blobs(slot)` | +| `BlobSidecarsByRoot` | blobs_db per root | resolve root → slot → `BlobSidecarsByRange` | +| `DataColumnSidecarsByRange` | `BeaconDataColumn` per slot | `era.get_blobs(slot).into_columns` | +| `DataColumnSidecarsByRoot` | `BeaconDataColumn` per root | resolve root → slot → `DataColumnSidecarsByRange` | + +`era.get_blobs(slot)` returns the full per-slot list; HTTP / wire-layer +projection (`versioned_hashes`, `indices`, `columns`) happens above the +store. Blob wire methods are deprecated as of +`FULU_FORK_EPOCH + MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS`. + +## Constants + +| Name | Value | +| - | - | +| `SLOTS_PER_ERA` | `SLOTS_PER_HISTORICAL_ROOT` (`8192`) | +| `ERA_SEAL_DELAY` | `2 * SLOTS_PER_EPOCH` | + +## Custom types + +| Name | SSZ | +| - | - | +| `EraNumber` | `uint64` | +| `EraBlobPointers` | `{ back: Slot, forward: Option }` | +| `Manifest` | `{ sealed_eras: List[EraNumber], anchor_era: EraNumber }` | + +## Layout + +``` +{datadir}/beacon/era/ + manifest + {network}-{era:05}-{root_prefix}.erb +``` + +## API + +The era store is **not** a `KeyValueStore`. It is a narrow, slot-indexed type +held as a field on `HotColdDB`: + +```rust +fn get_blobs(slot: Slot) -> Result>>; +fn append_blobs(slot: Slot, blobs: BlobSidecarList) -> Result<()>; // requires slot > forward_pointer +fn forward_pointer() -> Option; +fn back_pointer() -> Slot; // set once at init, read-only +fn is_sealed(era: EraNumber) -> bool; +fn seal(era: EraNumber) -> Result<()>; +``` + +### Invariants + +- `back_pointer` is set at init and never changes: + - genesis sync → `back_pointer = 0` + - era-file init → `back_pointer = lowest slot in imported set` +- `forward_pointer` is `None` at genesis-sync init, or `= highest slot in + imported set` after era-file init. It only advances; `append_blobs` + requires `slot > forward_pointer`. +- Out-of-order writes return `Err(OutOfOrder)`. The store does not de-dupe. +- `get_blobs(slot)`: + - in-range, no blobs at slot → `Some([])` + - in-range, blobs present → `Some(list)` + - out-of-range → `None` + +### Gap fills + +When `append_blobs(slot, ...)` advances multiple slots ahead of +`forward_pointer`, intermediate slots are auto-filled with empties internally. +Required for the pre-Deneb → Deneb jump on first append. + +## Helpers + +```python +def era_of(slot: Slot) -> EraNumber: + return slot // SLOTS_PER_ERA + +def era_range(e: EraNumber) -> (Slot, Slot): + return (e * SLOTS_PER_ERA, (e + 1) * SLOTS_PER_ERA) + +def can_seal(e, back, forward, finalized) -> bool: + start, end = era_range(e) + return (forward is not None + and back <= start and end <= forward + and end + ERA_SEAL_DELAY <= finalized) +``` + +## Triggers + +| Op | Caller | Source | +| - | - | - | +| `append_blobs` | new step on the migrator thread, after `migrate_database`, before `try_prune_blobs` | drains `blobs_db` for slots that became finalized this migration | +| `seal` | post `append_blobs`; also at startup over imported eras | when `can_seal(e)` holds | + +`append_blobs` fires only post-finality; the no-rewrite invariant of `.erb` +files is preserved against reorgs by construction. There is no backfill +trigger — historical data arrives only via era-file import at init. + +## Sealing + +For each `e` with `can_seal(e)`: +1. Write `.erb.tmp`, append `SlotIndex`, fsync. +2. Atomic rename to final filename. +3. Update `manifest`. +4. Delete overlay rows for `era_range(e)`. + +Crash mid-seal leaves a `*.tmp` discarded on restart. Sealing is idempotent. + +## Read + +`HotColdDB::get_blobs(block_root)` becomes: +1. Resolve `slot` from `block_root` (see Status quo — there is no slot index + for blobs today; one of the three options below is required). +2. If `era_of(slot)` is sealed → `era_store.get_blobs(slot)`. +3. Else → `blobs_db` as today. + +Root → slot resolution options (era-mode only): +- (a) extend the call sites to pass `slot` alongside `block_root` (most + callers already have it: block import, blob-by-range RPC). +- (b) maintain a `(root → era)` map in the era manifest, sealed eras only. +- (c) on miss, load the block header from the cold DB to recover its slot. + +Default plan: (a) where the caller has it cheaply, (b) as fallback for the +HTTP-by-root path. The era store itself stays purely slot-indexed. + +## Pruning + +Era and existing pruning interlock by capping the prune cursor: + +``` +prune_horizon = min(retention_horizon, lowest_unsealed_era_start) +``` + +Sealing must precede the prune cursor advancing into a given era. Pruning +itself is unchanged; only the cursor calculation gains the era clamp when era +mode is enabled. + +## Status quo + +### Storage + +- `blobs_db` is a separate physical DB next to `chain_db` and `freezer_db`, + same backend (LevelDB / Redb). + `beacon_node/store/src/hot_cold_store.rs:266-290`. +- `DBColumn::BeaconBlob` rows are **keyed by `block_root` only**; the value + is the entire `BlobSidecarList` for that block, SSZ-encoded as a single + row. `beacon_node/store/src/lib.rs:257`. +- No slot index for blobs anywhere. `get_blobs(block_root) -> BlobSidecarListFromRoot` + is the only read API. `beacon_node/store/src/hot_cold_store.rs:2625`. + +### Lifecycle + +- **Init.** `BlobInfo.oldest_blob_slot = max(anchor_slot, deneb_fork_slot)` in + `init_blob_info`. `hot_cold_store.rs:2854`. +- **Forward sync.** `put_blobs(block_root, blobs)` writes directly to + `blobs_db` per block, no batching. `hot_cold_store.rs:958`. +- **Backfill.** `import_historical_block_batch` builds `StoreOp::PutBlobs` + ops, commits via `blobs_db.do_atomically(blob_batch)` at line 256, then + CAS-updates `oldest_blob_slot` to the min slot seen. + `beacon_chain/src/historical_blocks.rs:159-294`. +- **Finalization migration.** `migrate_database` does **not** touch blobs. + Hot/cold split applies only to states and block roots. + `hot_cold_store.rs:3578-3726`. +- **Pruning.** `try_prune_blobs` runs on the migrator thread post-migrate. + Walks blocks backwards from `min(data_availability_boundary - margin, + split.epoch - 1)`, deletes blob rows by block_root, advances + `oldest_blob_slot` to `end_slot + 1`. `hot_cold_store.rs:3320-3483`. + +### Implications for the era backend + +- **Blobs are root-keyed; routing by slot needs resolution.** Blob reads + today never compute a slot; era-mode introduces that need (see the Read + section above for the chosen approach). +- **`append_blobs` cannot live inside `migrate_database`** — that function + doesn't process blobs today. It hooks as a **new step on the migrator + thread**, after `migrate_database` returns and before `try_prune_blobs` + runs. +- **No backfill hook.** `import_historical_block_batch` is unused under era + mode; checkpoint sync and blob backfill are rejected at startup. +- **`prune_horizon` clamp** lives inside `try_prune_blobs`: when era mode is + on, intersect the existing horizon with `lowest_unsealed_era_start`. + Trivially additive. + +## Integration + +### `beacon_node/store` + +- `EraBlobStore` is a field on `HotColdDB`, gated by a runtime flag. **No new + `BeaconNodeBackend` variant.** +- `HotColdDB::get_blobs` adds the `era_of(slot)` check before falling through + to `blobs_db`. +- All other store paths unchanged. + +### Metadata + +- `AnchorInfo` unchanged. +- `BlobInfo` unchanged. +- New `BeaconMeta` entries: `EraBlobPointers { back, forward }`, `EraManifest`. +- Bump `SchemaVersion`. + +### CLI + +- `--store-era-blobs` (default off). Mutually exclusive with + `--checkpoint-sync-url` and any blob-backfill flag; node refuses to start + if both are set. +- `--era-import-dir ` — directory of `.era` (blocks + boundary state) + + `.erb` (blobs) files consumed at init. Required if not genesis-syncing. +- `lcli era blobs export` to produce `.erb` files from an archival node. + +## Initialization + +Two paths only; the backend refuses to start in any other configuration. + +### Genesis sync + +- `back_pointer = 0`, `forward_pointer = None`. +- Forward sync fills `blobs_db` from genesis. As eras finalize and pass + `ERA_SEAL_DELAY`, `append_blobs` drains them into the era backend; sealing + produces `.erb` files; the overlay rows are deleted. + +### Era-file import + +- User supplies `--era-import-dir` containing matched `.era` and `.erb` + files. +- At startup: + 1. `.era` consumer (existing `era-file` branch) loads blocks + boundary + state, bootstrapping the chain. + 2. Each `.erb` file is validated against the imported blocks: per + sidecar, check `kzg_commitment` against + `block.body.blob_kzg_commitments[index]` and run + `verify_blob_kzg_proof`. + 3. Validated `.erb` files are linked into `{datadir}/beacon/era/`; + manifest is updated; eras are marked sealed. +- `back_pointer = lowest slot in imported set`, + `forward_pointer = highest slot in imported set`. +- Forward sync continues from there. + +### Compatibility + +- **Checkpoint sync incompatible.** A checkpoint-synced node has a gap from + genesis to anchor that requires backfill — disabled here. Startup error. +- **P2P blob backfill incompatible.** Same reason. Startup error. +- **No in-place opt-in for existing nodes.** A populated default-backend + node must `lcli era blobs export` its data, also export `.era` blocks + + state, drop `chain_db` / `freezer_db` / `blobs_db`, and reinitialize via + era-file import. + +Non-canonical blobs never reach the era backend. `append_blobs` runs after +`prune_hot_db` (`migrate.rs:769-778`) deletes orphaned blobs in the same +migrator pass; era-file import only accepts validated canonical data. + +## Coexistence + +Era is additive. Default `blobs_db` paths are untouched. `.erb` output is +spec-compatible with Nimbus. From 58fdb613a053113e100f67d824a6f43fc99f60b0 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 06:50:51 +0200 Subject: [PATCH 03/24] Specify static block file format --- specs/static-blocks.md | 172 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 170 insertions(+), 2 deletions(-) diff --git a/specs/static-blocks.md b/specs/static-blocks.md index cb9737099bb..db57867e6bf 100644 --- a/specs/static-blocks.md +++ b/specs/static-blocks.md @@ -11,8 +11,8 @@ at startup. ## API A field on `HotColdDB`. Not a `KeyValueStore`. No `Hash256` in the API; the -archive is purely slot-keyed. Eras, manifests, file rotation, fsync ordering, -atomic rename — all internal. +archive is purely slot-keyed. File rotation, fsync ordering, and crash recovery +are internal. ```rust fn open(path: &Path) -> Result; @@ -23,6 +23,174 @@ fn put(slot: Slot, bytes: &[u8]) -> Result<()>; // durable on return `put` durability on return is the only caller-visible contract; the source- of-truth flip in `migrate_database` relies on it. +## Static file format + +Files live together in one directory: + +``` +static_blocks_00000 +static_blocks_00000.off +static_blocks_00001 +static_blocks_00001.off +static_blocks.conf +``` + +Mapping: + +``` +SLOTS_PER_FILE = 8192 +file_id = slot / SLOTS_PER_FILE +index = slot % SLOTS_PER_FILE +off_pos = index * 8 +``` + +The data file name uses `file_id` as a zero-padded decimal number. The slot +range is derived from the id and is not encoded in the name. + +Each data file starts with the e2store version record: + +``` +65 32 00 00 00 00 00 00 +``` + +Block records are appended after it: + +``` +type: [0x01, 0x00] +length: compressed_data.len() as u32, little-endian +reserved: u16 = 0 +data: snappy-framed(SSZ-encoded blinded SignedBeaconBlock bytes) +``` + +The `.off` file is fixed-size: `8192 * 8` bytes. Each entry is a little-endian +`u64` absolute byte offset into the matching data file. Offset `0` means no +block is present for that slot. Real block offsets are nonzero because the data +file starts with the version record. + +`static_blocks.conf` is global to the static block store and is fixed-size: + +``` +magic: [u8; 8] = b"LHSTBLK1" +highest_written_slot: u64 little-endian, u64::MAX means empty +current_data_len: u64 little-endian +``` + +`current_data_len` applies to the current file, derived from +`highest_written_slot / SLOTS_PER_FILE`. + +Config updates are atomic: + +1. Write the full config to `static_blocks.conf.tmp`. +2. Fsync `static_blocks.conf.tmp`. +3. Rename it over `static_blocks.conf`. +4. Fsync the directory. + +## `put` contract + +`put(slot, bytes)` requires: + +``` +highest_written_slot == None || slot > highest_written_slot +snappy_framed(bytes).len() <= u32::MAX +``` + +Skipped slots are allowed. They leave zero offsets in `.off`. + +Write sequence: + +1. Lock the writer. +2. Reject `slot <= highest_written_slot`. +3. Compute `file_id`, `index`, and `off_pos`. +4. Create or open `static_blocks_{file_id:05}`. +5. If the data file is new, write the e2store version record. +6. Create or open `static_blocks_{file_id:05}.off`. +7. If the `.off` file is new, initialize it to `8192 * 8` zero bytes. +8. Compress `bytes` with snappy-framed compression. +9. Append the compressed block record to the data file, remembering the offset + of its 8-byte record header. +10. Fsync the data file. +11. Write the offset as `u64` little-endian at `off_pos` in the `.off` file. +12. Fsync the `.off` file. +13. Atomically update `static_blocks.conf` with: + ``` + highest_written_slot = slot + current_data_len = data_file_len + ``` +14. Fsync the directory after the rename. + +A write is committed only when `static_blocks.conf` reflects it. + +On open, the store reads `static_blocks.conf`, truncates the current data file +to `current_data_len`, and clears offsets after `highest_written_slot` in the +current `.off` file. + +Crash behavior: + +| Crash point | Restart behavior | +| - | - | +| Before `static_blocks.conf` update | Previous slot remains committed; appended data is truncated and offset tail is cleared. | +| During `static_blocks.conf.tmp` write | Previous `static_blocks.conf` remains the commit marker. | +| After `static_blocks.conf` rename | New slot is committed. | + +## `get` contract + +`get(slot)`: + +1. Compute `file_id`, `index`, and `off_pos`. +2. Open `static_blocks_{file_id:05}.off`. +3. Read the `u64` little-endian offset at `off_pos`. +4. If the offset is `0`, return `None`. +5. Open `static_blocks_{file_id:05}`. +6. Seek to the offset. +7. Read and validate the 8-byte block record header: + ``` + type == [0x01, 0x00] + reserved == 0 + ``` +8. Read `length` compressed bytes. +9. Snappy-decompress the bytes with the consensus maximum + `SignedBeaconBlock` SSZ size for the active fork as the output bound. +10. Return the decompressed SSZ bytes. + +If decompression exceeds the bound, return a corruption error. + +Missing files are treated as `None` only when the slot is beyond +`highest_written_slot`. Missing files for committed slots are corruption. + +## `open` contract + +In-memory state is minimal: + +``` +dir +highest_written_slot +mutex +``` + +Files are opened inside `put` and `get`; the store does not cache current file +handles in v1. + +`static_blocks.conf` uses `u64::MAX` as the empty-store sentinel for +`highest_written_slot`. + +`open(path)`: + +1. Create `path` if it does not exist. +2. If `static_blocks.conf` does not exist, create it with: + ``` + magic = b"LHSTBLK1" + highest_written_slot = u64::MAX + current_data_len = 0 + ``` +3. Read and validate `static_blocks.conf`. +4. If `highest_written_slot == u64::MAX`, initialize in-memory + `highest_written_slot = None` and return. +5. Derive the current file from `highest_written_slot / SLOTS_PER_FILE`. +6. Truncate the current data file to `current_data_len`. +7. Clear `.off` entries after `highest_written_slot` in the current `.off` + file by writing zeroes. +8. Initialize in-memory `highest_written_slot = Some(slot)`. + ## Interaction with existing DBs | Concern | Today | With static blocks | From 2c40f0f27b4624d1bceaee508e1d76e73ea1c669 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 07:25:08 +0200 Subject: [PATCH 04/24] Implement static block file store --- Cargo.lock | 1 + beacon_node/store/Cargo.toml | 1 + beacon_node/store/src/errors.rs | 8 + beacon_node/store/src/static_blocks.rs | 353 ++++++++++++++++++++++++- 4 files changed, 352 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 078f699f3c8..eba135b0152 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8485,6 +8485,7 @@ dependencies = [ "safe_arith", "serde", "smallvec", + "snap", "ssz_types", "state_processing", "strum", diff --git a/beacon_node/store/Cargo.toml b/beacon_node/store/Cargo.toml index 50028fe73ff..3e103c18636 100644 --- a/beacon_node/store/Cargo.toml +++ b/beacon_node/store/Cargo.toml @@ -27,6 +27,7 @@ redb = { version = "2.1.3", optional = true } safe_arith = { workspace = true } serde = { workspace = true } smallvec = { workspace = true } +snap = { workspace = true } ssz_types = { workspace = true } state_processing = { workspace = true } strum = { workspace = true } diff --git a/beacon_node/store/src/errors.rs b/beacon_node/store/src/errors.rs index a07cc838863..91953ccdb76 100644 --- a/beacon_node/store/src/errors.rs +++ b/beacon_node/store/src/errors.rs @@ -1,5 +1,6 @@ use crate::config::StoreConfigError; use crate::hot_cold_store::{HotColdDBError, StateSummaryIteratorError}; +use crate::static_blocks::StaticBlockStoreError; use crate::{DBColumn, hdiff}; #[cfg(feature = "leveldb")] use leveldb::error::Error as LevelDBError; @@ -14,6 +15,7 @@ pub enum Error { SszDecodeError(DecodeError), BeaconStateError(BeaconStateError), HotColdDBError(HotColdDBError), + StaticBlockStoreError(StaticBlockStoreError), DBError { message: String, }, @@ -129,6 +131,12 @@ impl From for Error { } } +impl From for Error { + fn from(e: StaticBlockStoreError) -> Error { + Error::StaticBlockStoreError(e) + } +} + impl From for Error { fn from(e: BeaconStateError) -> Error { Error::BeaconStateError(e) diff --git a/beacon_node/store/src/static_blocks.rs b/beacon_node/store/src/static_blocks.rs index a94e83a7511..0a95fae7870 100644 --- a/beacon_node/store/src/static_blocks.rs +++ b/beacon_node/store/src/static_blocks.rs @@ -1,37 +1,368 @@ //! Slot-keyed durable archive for finalized blinded blocks. //! //! `StaticBlockStore` is a black box from `HotColdDB`'s perspective: hand it block bytes, -//! ask it for them back by slot, ask it how far it has durably stored. Era boundaries, -//! file format, manifest layout, sealing, and rename semantics are entirely internal. +//! ask it for them back by slot. File mapping, recovery, and rename semantics are internal. //! //! Contract: //! - `put(slot, bytes)` is durable on return. The caller is allowed to rely on this for //! source-of-truth flips (e.g. writing a reverse-index entry, deleting from hot KV). +//! +//! See `specs/static-blocks.md` for the on-disk format. -use crate::Error; -use std::path::{Path, PathBuf}; +use snap::{read::FrameDecoder, write::FrameEncoder}; +use std::{ + fmt, + fs::{self, File, OpenOptions}, + io::{self, Read, Seek, SeekFrom, Write}, + path::{Path, PathBuf}, + sync::Mutex, +}; use types::Slot; +const SLOTS_PER_FILE: u64 = 8192; +const OFFSET_SIZE: u64 = 8; +const OFFSET_FILE_LEN: u64 = SLOTS_PER_FILE * OFFSET_SIZE; +const CONFIG_FILE: &str = "static_blocks.conf"; +const CONFIG_TMP_FILE: &str = "static_blocks.conf.tmp"; +const CONFIG_MAGIC: &[u8; 8] = b"LHSTBLK1"; +const CONFIG_LEN: usize = 24; +// Empty-store sentinel for `highest_written_slot` in `static_blocks.conf`. +const EMPTY_SLOT: u64 = u64::MAX; +// e2store version record. +const VERSION_RECORD: [u8; 8] = [0x65, 0x32, 0, 0, 0, 0, 0, 0]; +// CompressedSignedBeaconBlock e2store record type. +const BLOCK_RECORD_TYPE: [u8; 2] = [0x01, 0x00]; +const MAX_DECOMPRESSED_BLOCK_BYTES: u64 = 10 * 1024 * 1024; + #[derive(Debug)] pub struct StaticBlockStore { - #[allow(dead_code)] root_dir: PathBuf, + highest_written_slot: Mutex>, +} + +struct Config { + highest_written_slot: Option, + current_data_len: u64, +} + +type StoreResult = std::result::Result; + +#[derive(Debug)] +pub enum StaticBlockStoreError { + Io(io::Error), + Compression(io::Error), + Invalid(String), +} + +impl fmt::Display for StaticBlockStoreError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Io(e) => write!(f, "static block store io error: {e}"), + Self::Compression(e) => write!(f, "static block store compression error: {e}"), + Self::Invalid(message) => write!(f, "static block store invalid data: {message}"), + } + } +} + +impl From for StaticBlockStoreError { + fn from(e: io::Error) -> Self { + Self::Io(e) + } } impl StaticBlockStore { /// Open the archive rooted at `path`. - pub fn open(_path: &Path) -> Result { - todo!() + pub fn open(path: &Path) -> StoreResult { + fs::create_dir_all(path)?; + + let store = Self { + root_dir: path.to_path_buf(), + highest_written_slot: Mutex::new(None), + }; + + if !store.config_path().exists() { + store.write_config(None, 0)?; + } + + let config = store.read_config()?; + if let Some(slot) = config.highest_written_slot { + store.heal_current_file(slot, config.current_data_len)?; + } + *store.lock_highest()? = config.highest_written_slot; + + Ok(store) } /// Read the block at `slot`, if present. - pub fn get(&self, _slot: Slot) -> Result>, Error> { - todo!() + pub fn get(&self, slot: Slot) -> StoreResult>> { + let Some(highest_written_slot) = *self.lock_highest()? else { + return Ok(None); + }; + if slot > highest_written_slot { + return Ok(None); + } + + let file_id = file_id(slot); + let offset = self.read_offset(file_id, slot)?; + if offset == 0 { + return Ok(None); + } + + let data_path = self.data_path(file_id); + let mut data_file = File::open(&data_path)?; + data_file.seek(SeekFrom::Start(offset))?; + + let mut header = [0; 8]; + data_file.read_exact(&mut header)?; + if header[0..2] != BLOCK_RECORD_TYPE || header[6..8] != [0, 0] { + return Err(StaticBlockStoreError::Invalid( + "invalid static block record header".into(), + )); + } + + let len = u32::from_le_bytes([header[2], header[3], header[4], header[5]]) as usize; + let mut compressed = vec![0; len]; + data_file.read_exact(&mut compressed)?; + + decompress_block(&compressed) } /// Durably store `bytes` at `slot`. Must not return `Ok` until the bytes are recoverable /// after a crash. - pub fn put(&self, _slot: Slot, _bytes: &[u8]) -> Result<(), Error> { - todo!() + pub fn put(&self, slot: Slot, bytes: &[u8]) -> StoreResult<()> { + let mut highest_written_slot = self.lock_highest()?; + if highest_written_slot.is_some_and(|highest| slot <= highest) { + return Err(StaticBlockStoreError::Invalid( + "static block put out of order".into(), + )); + } + + let compressed = compress_block(bytes)?; + let compressed_len = u32::try_from(compressed.len()).map_err(|_| { + StaticBlockStoreError::Invalid("compressed static block too large".into()) + })?; + + let target_file_id = file_id(slot); + // Discard an uncommitted next-file tail after a crash. + let reset_file = + (*highest_written_slot).map(|highest| file_id(highest)) != Some(target_file_id); + let off_pos = offset_position(slot); + let data_path = self.data_path(target_file_id); + let off_path = self.offset_path(target_file_id); + + let mut data_file = OpenOptions::new() + .read(true) + .append(true) + .create(true) + .open(&data_path)?; + if reset_file { + data_file.set_len(0)?; + } + + if data_file.metadata()?.len() == 0 { + data_file.write_all(&VERSION_RECORD)?; + } + + let offset = data_file.seek(SeekFrom::End(0))?; + write_block_record(&mut data_file, compressed_len, &compressed)?; + let data_len = data_file.seek(SeekFrom::End(0))?; + // Data and offset files must hit disk before the config commit marker. + data_file.sync_all()?; + + let mut off_file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(&off_path)?; + if reset_file { + off_file.set_len(0)?; + } + if off_file.metadata()?.len() < OFFSET_FILE_LEN { + off_file.set_len(OFFSET_FILE_LEN)?; + } + off_file.seek(SeekFrom::Start(off_pos))?; + off_file.write_all(&offset.to_le_bytes())?; + off_file.sync_all()?; + + // Atomic config update is the commit point. + self.write_config(Some(slot), data_len)?; + *highest_written_slot = Some(slot); + + Ok(()) } + + /// Truncate uncommitted data and clear uncommitted offsets after restart. + fn heal_current_file(&self, slot: Slot, current_data_len: u64) -> StoreResult<()> { + let file_id = file_id(slot); + let data_path = self.data_path(file_id); + let data_file = OpenOptions::new().read(true).write(true).open(&data_path)?; + let data_len = data_file.metadata()?.len(); + if data_len < current_data_len { + return Err(StaticBlockStoreError::Invalid( + "static block data file shorter than committed length".into(), + )); + } + if data_len != current_data_len { + data_file.set_len(current_data_len)?; + data_file.sync_all()?; + } + + let off_path = self.offset_path(file_id); + let mut off_file = OpenOptions::new().read(true).write(true).open(&off_path)?; + let required_len = offset_position(slot) + OFFSET_SIZE; + let off_len = off_file.metadata()?.len(); + if off_len < required_len { + return Err(StaticBlockStoreError::Invalid( + "static block offset file shorter than committed slot".into(), + )); + } + if off_len < OFFSET_FILE_LEN { + off_file.set_len(OFFSET_FILE_LEN)?; + } + + let clear_start = required_len; + if clear_start < OFFSET_FILE_LEN { + // Remove offsets to entries beyond the committed slot. + off_file.seek(SeekFrom::Start(clear_start))?; + let zeroes = vec![0; (OFFSET_FILE_LEN - clear_start) as usize]; + off_file.write_all(&zeroes)?; + off_file.sync_all()?; + } + + Ok(()) + } + + /// Read the global commit marker. + fn read_config(&self) -> StoreResult { + let path = self.config_path(); + let bytes = fs::read(&path)?; + if bytes.len() != CONFIG_LEN || &bytes[0..8] != CONFIG_MAGIC { + return Err(StaticBlockStoreError::Invalid( + "invalid static block config".into(), + )); + } + + let highest = u64::from_le_bytes(bytes[8..16].try_into().expect("slice length checked")); + let current_data_len = + u64::from_le_bytes(bytes[16..24].try_into().expect("slice length checked")); + + Ok(Config { + highest_written_slot: (highest != EMPTY_SLOT).then(|| Slot::new(highest)), + current_data_len, + }) + } + + /// Atomically write the global commit marker. + fn write_config( + &self, + highest_written_slot: Option, + current_data_len: u64, + ) -> StoreResult<()> { + let path = self.config_path(); + let tmp_path = self.root_dir.join(CONFIG_TMP_FILE); + let mut bytes = [0; CONFIG_LEN]; + bytes[0..8].copy_from_slice(CONFIG_MAGIC); + bytes[8..16].copy_from_slice( + &highest_written_slot + .map_or(EMPTY_SLOT, |slot| slot.as_u64()) + .to_le_bytes(), + ); + bytes[16..24].copy_from_slice(¤t_data_len.to_le_bytes()); + + { + let mut tmp = File::create(&tmp_path)?; + tmp.write_all(&bytes)?; + tmp.sync_all()?; + } + + fs::rename(&tmp_path, &path)?; + sync_dir(&self.root_dir) + } + + /// Read the slot's absolute data-file offset. + fn read_offset(&self, file_id: u64, slot: Slot) -> StoreResult { + let off_path = self.offset_path(file_id); + let mut off_file = File::open(&off_path)?; + let mut bytes = [0; 8]; + off_file.seek(SeekFrom::Start(offset_position(slot)))?; + off_file.read_exact(&mut bytes)?; + Ok(u64::from_le_bytes(bytes)) + } + + /// Lock writer state. + fn lock_highest(&self) -> StoreResult>> { + self.highest_written_slot + .lock() + .map_err(|_| StaticBlockStoreError::Invalid("static block mutex poisoned".into())) + } + + /// Path to the global config file. + fn config_path(&self) -> PathBuf { + self.root_dir.join(CONFIG_FILE) + } + + /// Path to a data file. + fn data_path(&self, file_id: u64) -> PathBuf { + self.root_dir.join(format!("static_blocks_{file_id:05}")) + } + + /// Path to a sidecar offset file. + fn offset_path(&self, file_id: u64) -> PathBuf { + self.root_dir + .join(format!("static_blocks_{file_id:05}.off")) + } +} + +/// File id containing `slot`. +fn file_id(slot: Slot) -> u64 { + slot.as_u64() / SLOTS_PER_FILE +} + +/// Byte position of `slot` in its `.off` file. +fn offset_position(slot: Slot) -> u64 { + (slot.as_u64() % SLOTS_PER_FILE) * OFFSET_SIZE +} + +/// Snappy-frame SSZ block bytes. +fn compress_block(bytes: &[u8]) -> StoreResult> { + let mut encoder = FrameEncoder::new(Vec::new()); + encoder + .write_all(bytes) + .map_err(StaticBlockStoreError::Compression)?; + encoder + .flush() + .map_err(StaticBlockStoreError::Compression)?; + Ok(encoder.get_ref().clone()) +} + +/// Append one compressed block record. +fn write_block_record(file: &mut File, compressed_len: u32, compressed: &[u8]) -> StoreResult<()> { + file.write_all(&BLOCK_RECORD_TYPE)?; + file.write_all(&compressed_len.to_le_bytes())?; + file.write_all(&0u16.to_le_bytes())?; + file.write_all(compressed)?; + Ok(()) +} + +/// Decode one compressed block record payload. +fn decompress_block(bytes: &[u8]) -> StoreResult>> { + let decoder = FrameDecoder::new(bytes); + let mut limited = decoder.take(MAX_DECOMPRESSED_BLOCK_BYTES + 1); + let mut decompressed = Vec::new(); + limited + .read_to_end(&mut decompressed) + .map_err(StaticBlockStoreError::Compression)?; + if decompressed.len() as u64 > MAX_DECOMPRESSED_BLOCK_BYTES { + return Err(StaticBlockStoreError::Invalid( + "static block exceeds decompressed size limit".into(), + )); + } + Ok(Some(decompressed)) +} + +/// Fsync directory entries after rename/create. +fn sync_dir(path: &Path) -> StoreResult<()> { + let dir = File::open(path)?; + dir.sync_all()?; + Ok(()) } From ad2c38726b19a9b7de7c455eaebe5e1cf5be2736 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 16:27:45 +0200 Subject: [PATCH 05/24] Add static blob API --- TODO-static-block-storage.md | 42 ++++++++++++++++++ beacon_node/store/src/errors.rs | 8 ++++ beacon_node/store/src/hot_cold_store.rs | 34 +++++++++++++- beacon_node/store/src/lib.rs | 2 + beacon_node/store/src/static_blobs.rs | 59 +++++++++++++++++++++++++ 5 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 TODO-static-block-storage.md create mode 100644 beacon_node/store/src/static_blobs.rs diff --git a/TODO-static-block-storage.md b/TODO-static-block-storage.md new file mode 100644 index 00000000000..385cb13b59c --- /dev/null +++ b/TODO-static-block-storage.md @@ -0,0 +1,42 @@ +# Static Block Storage TODO + +Current spec: [`specs/static-blocks.md`](./specs/static-blocks.md) + +Implemented: +- static block file format spec +- `StaticBlockStore::open/get/put` +- snappy-framed block records +- fixed-size `.off` sidecar files +- global `static_blocks.conf` commit marker +- startup healing for interrupted writes + +Remaining: + +1. Wire startup/config. + - add CLI/config path for enabling static block storage + - initialize `HotColdDB::static_blocks` + - reject checkpoint sync, late activation, and historical backfill init modes + +2. Bump schema. + - `DBColumn::BeaconBlockSlot` was added + - update schema version in `beacon_node/store/src/metadata.rs` + +3. Verify static fallback reads. + - after `static_blocks.get(slot)`, decode and verify the block root matches the requested root + - treat mismatches as corruption + +4. Update invariants. + - archived finalized blocks no longer require hot-db block bodies + - root/slot indices must remain consistent with static storage + +5. Add tests. + - archive/read happy path + - skip-slot dedup + - out-of-order put rejection + - crash windows around data, `.off`, and `.conf` + - wrong `BeaconBlockSlot` + - unsupported startup modes + +6. Decide decompression bound wiring. + - current implementation uses a local 10 MiB bound + - consider passing consensus `max_payload_size` or another store config value diff --git a/beacon_node/store/src/errors.rs b/beacon_node/store/src/errors.rs index 91953ccdb76..d198f446d3c 100644 --- a/beacon_node/store/src/errors.rs +++ b/beacon_node/store/src/errors.rs @@ -1,5 +1,6 @@ use crate::config::StoreConfigError; use crate::hot_cold_store::{HotColdDBError, StateSummaryIteratorError}; +use crate::static_blobs::StaticBlobStoreError; use crate::static_blocks::StaticBlockStoreError; use crate::{DBColumn, hdiff}; #[cfg(feature = "leveldb")] @@ -16,6 +17,7 @@ pub enum Error { BeaconStateError(BeaconStateError), HotColdDBError(HotColdDBError), StaticBlockStoreError(StaticBlockStoreError), + StaticBlobStoreError(StaticBlobStoreError), DBError { message: String, }, @@ -137,6 +139,12 @@ impl From for Error { } } +impl From for Error { + fn from(e: StaticBlobStoreError) -> Error { + Error::StaticBlobStoreError(e) + } +} + impl From for Error { fn from(e: BeaconStateError) -> Error { Error::BeaconStateError(e) diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 291fdadcf51..a32f2dd9c46 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -12,6 +12,7 @@ use crate::metadata::{ SCHEMA_VERSION_KEY, SPLIT_KEY, STATE_UPPER_LIMIT_NO_RETAIN, SchemaVersion, }; use crate::state_cache::{PutStateOutcome, StateCache}; +use crate::static_blobs::StaticBlobStore; use crate::static_blocks::StaticBlockStore; use crate::{ BlobSidecarListFromRoot, DBColumn, DatabaseBlock, Error, ItemStore, KeyValueStoreOp, StoreItem, @@ -76,6 +77,8 @@ pub struct HotColdDB, Cold: ItemStore> { /// reads fall through to it after missing in `hot_db`. When `None` (legacy mode), all /// finalized blinded blocks remain in `hot_db` as today. pub static_blocks: Option>, + /// Optional slot-keyed archive for finalized blob sidecars. + pub static_blobs: Option>, /// LRU cache of deserialized blocks and blobs. Updated whenever a block or blob is loaded. block_cache: Option>>, /// Cache of beacon states. @@ -242,6 +245,7 @@ impl HotColdDB, MemoryStore> { blobs_db: MemoryStore::open(), hot_db: MemoryStore::open(), static_blocks: None, + static_blobs: None, block_cache: NonZeroUsize::new(config.block_cache_size) .map(BlockCache::new) .map(Mutex::new), @@ -297,6 +301,7 @@ impl HotColdDB, BeaconNodeBackend> { cold_db: BeaconNodeBackend::open(&config, cold_path)?, hot_db, static_blocks: None, + static_blobs: None, block_cache: NonZeroUsize::new(config.block_cache_size) .map(BlockCache::new) .map(Mutex::new), @@ -2700,10 +2705,37 @@ impl, Cold: ItemStore> HotColdDB Ok(BlobSidecarListFromRoot::NoBlobs) } } - None => Ok(BlobSidecarListFromRoot::NoRoot), + None => self.get_static_blobs(block_root), } } + /// Fetch blobs from the slot-keyed static archive after a blob-db miss. + fn get_static_blobs(&self, block_root: &Hash256) -> Result, Error> { + let Some(static_blobs) = &self.static_blobs else { + return Ok(BlobSidecarListFromRoot::NoRoot); + }; + let Some(slot) = self.get_finalized_blinded_block_slot(block_root)? else { + return Ok(BlobSidecarListFromRoot::NoRoot); + }; + let Some(blobs_bytes) = static_blobs.get(slot)? else { + return Ok(BlobSidecarListFromRoot::NoBlobs); + }; + + let blobs: Vec>> = Vec::<_>::from_ssz_bytes(&blobs_bytes)?; + let Some(max_blobs_per_block) = blobs + .first() + .map(|blob| self.spec.max_blobs_per_block(blob.epoch())) + else { + return Ok(BlobSidecarListFromRoot::NoBlobs); + }; + + let blobs = BlobSidecarList::new(blobs, max_blobs_per_block as usize)?; + self.block_cache + .as_ref() + .inspect(|cache| cache.lock().put_blobs(*block_root, blobs.clone())); + Ok(BlobSidecarListFromRoot::Blobs(blobs)) + } + /// Fetch all keys in the data_column column with prefix `block_root` pub fn get_data_column_keys(&self, block_root: Hash256) -> Result, Error> { self.blobs_db diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index f2b4a54ded3..8d7c2a3c165 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -21,6 +21,7 @@ pub mod metadata; pub mod metrics; pub mod reconstruct; pub mod state_cache; +pub mod static_blobs; pub mod static_blocks; pub mod database; @@ -30,6 +31,7 @@ pub use self::blob_sidecar_list_from_root::BlobSidecarListFromRoot; pub use self::config::StoreConfig; pub use self::hot_cold_store::{HotColdDB, HotStateSummary, Split}; pub use self::memory_store::MemoryStore; +pub use self::static_blobs::StaticBlobStore; pub use self::static_blocks::StaticBlockStore; pub use crate::metadata::BlobInfo; pub use errors::Error; diff --git a/beacon_node/store/src/static_blobs.rs b/beacon_node/store/src/static_blobs.rs new file mode 100644 index 00000000000..c221a9c299e --- /dev/null +++ b/beacon_node/store/src/static_blobs.rs @@ -0,0 +1,59 @@ +//! Slot-keyed archive API for finalized blob sidecars. +//! +//! This is the minimal surface needed to test HotColdDB integration. The file +//! backend is intentionally not implemented yet. + +use std::{ + fmt, io, + path::{Path, PathBuf}, +}; +use types::Slot; + +#[derive(Debug)] +pub struct StaticBlobStore { + root_dir: PathBuf, +} + +#[derive(Debug)] +pub enum StaticBlobStoreError { + Io(io::Error), + Unsupported(&'static str), +} + +impl fmt::Display for StaticBlobStoreError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Io(e) => write!(f, "static blob store io error: {e}"), + Self::Unsupported(message) => { + write!(f, "static blob store unsupported operation: {message}") + } + } + } +} + +impl From for StaticBlobStoreError { + fn from(e: io::Error) -> Self { + Self::Io(e) + } +} + +impl StaticBlobStore { + /// Open the archive rooted at `path`. + pub fn open(path: &Path) -> Result { + Ok(Self { + root_dir: path.to_path_buf(), + }) + } + + /// Read SSZ-encoded blob sidecars for `slot`, if present. + pub fn get(&self, _slot: Slot) -> Result>, StaticBlobStoreError> { + let _ = &self.root_dir; + Err(StaticBlobStoreError::Unsupported("get")) + } + + /// Store SSZ-encoded blob sidecars at `slot`. + pub fn put(&self, _slot: Slot, _bytes: &[u8]) -> Result<(), StaticBlobStoreError> { + let _ = &self.root_dir; + Err(StaticBlobStoreError::Unsupported("put")) + } +} From af6e99ba670e3ff78e9e79f5e24661d8ff728153 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 16:28:58 +0200 Subject: [PATCH 06/24] Fix static block lint --- beacon_node/store/src/static_blocks.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beacon_node/store/src/static_blocks.rs b/beacon_node/store/src/static_blocks.rs index 0a95fae7870..c0f5cfda458 100644 --- a/beacon_node/store/src/static_blocks.rs +++ b/beacon_node/store/src/static_blocks.rs @@ -144,8 +144,7 @@ impl StaticBlockStore { let target_file_id = file_id(slot); // Discard an uncommitted next-file tail after a crash. - let reset_file = - (*highest_written_slot).map(|highest| file_id(highest)) != Some(target_file_id); + let reset_file = (*highest_written_slot).map(file_id) != Some(target_file_id); let off_pos = offset_position(slot); let data_path = self.data_path(target_file_id); let off_path = self.offset_path(target_file_id); @@ -173,6 +172,7 @@ impl StaticBlockStore { .read(true) .write(true) .create(true) + .truncate(false) .open(&off_path)?; if reset_file { off_file.set_len(0)?; From 85b66542b7e8cbcd9a63648a3eea4c39a5a82252 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 19:20:27 +0200 Subject: [PATCH 07/24] ColdStore trait + StaticColdStore generalization Replaces the byte-keyed Cold: ItemStore bound on HotColdDB with a slot-typed ColdStore trait: get/put_batch/exists/iter_from for slot-keyed columns plus get_index/put_index_batch over a tight DBColumnColdIndex enum (BlockSlot, ColdStateSummary). KV backends (BeaconNodeBackend, MemoryStore) implement it by translating slot/root keys into the existing KeyValueStore byte API. StaticBlockStore generalised to StaticColdStore: one type, columns dispatched on each call. Per-column subdirectory; per-column settings (record_type, compression, max_decompressed) come from a build-time column_config table on first creation and are persisted in each column's conf so future builds with different defaults stay compatible. Conf magic bumped to LHSTBLK2. Removes prune_historic_states + the lighthouse db prune-states CLI: the mode they produce ("cold blocks present, cold states absent") isn't in the startup-path table in specs/static-cold-backend.md and the spec doesn't support runtime mode transitions. full_state_pruning_enabled goes with it. Other: store_cold_state* helpers take separate slot-keyed and root-index buffers; migration writes slot-keyed cold data first, root indices after, so a crash leaves no dangling indices. --- beacon_node/beacon_chain/src/beacon_chain.rs | 2 +- .../src/beacon_fork_choice_store.rs | 10 +- beacon_node/beacon_chain/src/builder.rs | 12 +- .../overflow_lru_cache.rs | 2 +- .../beacon_chain/src/historical_blocks.rs | 22 +- beacon_node/beacon_chain/src/migrate.rs | 6 +- .../beacon_chain/src/persisted_custody.rs | 6 +- beacon_node/beacon_chain/src/test_utils.rs | 6 +- beacon_node/beacon_chain/tests/store_tests.rs | 71 +-- beacon_node/client/src/builder.rs | 6 +- beacon_node/network/src/persisted_dht.rs | 8 +- beacon_node/store/src/database/interface.rs | 75 ++- beacon_node/store/src/errors.rs | 10 +- beacon_node/store/src/forwards_iter.rs | 29 +- beacon_node/store/src/hot_cold_store.rs | 399 ++++------------ beacon_node/store/src/invariants.rs | 77 ++- beacon_node/store/src/iter.rs | 46 +- beacon_node/store/src/lib.rs | 46 +- beacon_node/store/src/memory_store.rs | 80 +++- beacon_node/store/src/metadata.rs | 5 - beacon_node/store/src/reconstruct.rs | 19 +- beacon_node/store/src/static_blocks.rs | 440 ++++++++++++------ database_manager/src/cli.rs | 16 - database_manager/src/lib.rs | 99 +--- specs/static-cold-backend.md | 61 +++ 25 files changed, 786 insertions(+), 767 deletions(-) create mode 100644 specs/static-cold-backend.md diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index f618cf63217..8e996f5ef3b 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -320,7 +320,7 @@ pub enum StateSkipConfig { pub trait BeaconChainTypes: Send + Sync + 'static { type HotStore: store::ItemStore; - type ColdStore: store::ItemStore; + type ColdStore: store::ColdStore; type SlotClock: slot_clock::SlotClock; type EthSpec: types::EthSpec; } diff --git a/beacon_node/beacon_chain/src/beacon_fork_choice_store.rs b/beacon_node/beacon_chain/src/beacon_fork_choice_store.rs index 95fde28f5b2..6d63aa6136e 100644 --- a/beacon_node/beacon_chain/src/beacon_fork_choice_store.rs +++ b/beacon_node/beacon_chain/src/beacon_fork_choice_store.rs @@ -14,7 +14,7 @@ use ssz_derive::{Decode, Encode}; use std::collections::BTreeSet; use std::marker::PhantomData; use std::sync::Arc; -use store::{Error as StoreError, HotColdDB, ItemStore}; +use store::{ColdStore, Error as StoreError, HotColdDB, ItemStore}; use superstruct::superstruct; use types::{ AbstractExecPayload, BeaconBlockRef, BeaconState, BeaconStateError, Checkpoint, Epoch, EthSpec, @@ -129,8 +129,8 @@ impl BalancesCache { /// Implements `fork_choice::ForkChoiceStore` in order to provide a persistent backing to the /// `fork_choice::ForkChoice` struct. #[derive(Debug, Educe)] -#[educe(PartialEq(bound(E: EthSpec, Hot: ItemStore, Cold: ItemStore)))] -pub struct BeaconForkChoiceStore, Cold: ItemStore> { +#[educe(PartialEq(bound(E: EthSpec, Hot: ItemStore, Cold: ColdStore)))] +pub struct BeaconForkChoiceStore, Cold: ColdStore> { #[educe(PartialEq(ignore))] store: Arc>, balances_cache: BalancesCache, @@ -151,7 +151,7 @@ impl BeaconForkChoiceStore where E: EthSpec, Hot: ItemStore, - Cold: ItemStore, + Cold: ColdStore, { /// Initialize `Self` from some `anchor` checkpoint which may or may not be the genesis state. /// @@ -268,7 +268,7 @@ impl ForkChoiceStore for BeaconForkChoiceStore where E: EthSpec, Hot: ItemStore, - Cold: ItemStore, + Cold: ColdStore, { type Error = Error; diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index d70561db9ba..5de28f43f20 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -40,7 +40,7 @@ use state_processing::per_slot_processing; use std::marker::PhantomData; use std::sync::Arc; use std::time::Duration; -use store::{Error as StoreError, HotColdDB, ItemStore, KeyValueStoreOp}; +use store::{ColdStore, DBColumn, Error as StoreError, HotColdDB, ItemStore, KeyValueStoreOp}; use task_executor::{ShutdownReason, TaskExecutor}; use tracing::{debug, error, info, warn}; use tree_hash::TreeHash; @@ -60,7 +60,7 @@ impl BeaconChainTypes for Witness where THotStore: ItemStore + 'static, - TColdStore: ItemStore + 'static, + TColdStore: ColdStore + 'static, TSlotClock: SlotClock + 'static, E: EthSpec + 'static, { @@ -115,7 +115,7 @@ impl BeaconChainBuilder> where THotStore: ItemStore + 'static, - TColdStore: ItemStore + 'static, + TColdStore: ColdStore + 'static, TSlotClock: SlotClock + 'static, E: EthSpec + 'static, { @@ -340,7 +340,7 @@ where .map_err(|e| format!("Failed to store genesis block: {:?}", e))?; store .store_frozen_block_root_at_skip_slots(Slot::new(0), Slot::new(1), beacon_block_root) - .and_then(|ops| store.cold_db.do_atomically(ops)) + .and_then(|items| store.cold_db.put_batch(DBColumn::BeaconBlockRoots, items)) .map_err(|e| format!("Failed to store genesis block root: {e:?}"))?; // Store the genesis block under the `ZERO_HASH` key. @@ -558,7 +558,7 @@ where .map_err(|e| format!("Error writing frozen block roots: {e:?}"))?; store .cold_db - .do_atomically(block_root_batch) + .put_batch(DBColumn::BeaconBlockRoots, block_root_batch) .map_err(|e| format!("Error writing frozen block roots: {e:?}"))?; debug!( from = %weak_subj_block.slot(), @@ -1152,7 +1152,7 @@ impl BeaconChainBuilder> where THotStore: ItemStore + 'static, - TColdStore: ItemStore + 'static, + TColdStore: ColdStore + 'static, E: EthSpec + 'static, { /// Sets the `BeaconChain` slot clock to `TestingSlotClock`. diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs index 7d1bba2de98..e5158bcc8cb 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs @@ -861,7 +861,7 @@ mod test { where E: EthSpec, Hot: ItemStore, - Cold: ItemStore, + Cold: ColdStore, { let chain = &harness.chain; let head = chain.head_snapshot(); diff --git a/beacon_node/beacon_chain/src/historical_blocks.rs b/beacon_node/beacon_chain/src/historical_blocks.rs index bfda52558e4..9c156394bd0 100644 --- a/beacon_node/beacon_chain/src/historical_blocks.rs +++ b/beacon_node/beacon_chain/src/historical_blocks.rs @@ -10,7 +10,9 @@ use std::borrow::Cow; use std::iter; use std::time::Duration; use store::metadata::DataColumnInfo; -use store::{AnchorInfo, BlobInfo, DBColumn, Error as StoreError, KeyValueStore, KeyValueStoreOp}; +use store::{ + AnchorInfo, BlobInfo, ColdStore, DBColumn, Error as StoreError, KeyValueStore, KeyValueStoreOp, +}; use strum::IntoStaticStr; use tracing::{debug, debug_span, instrument}; use types::{Hash256, Slot}; @@ -108,7 +110,7 @@ impl BeaconChain { let mut new_oldest_data_column_slot = data_column_info.oldest_data_column_slot; let mut blob_batch = Vec::::new(); - let mut cold_batch = Vec::with_capacity(blocks_to_import.len()); + let mut cold_batch: Vec<(Slot, Vec)> = Vec::with_capacity(blocks_to_import.len()); let mut hot_batch = Vec::with_capacity(blocks_to_import.len()); let mut signed_blocks = Vec::with_capacity(blocks_to_import.len()); @@ -174,11 +176,7 @@ impl BeaconChain { // Store block roots, including at all skip slots in the freezer DB. for slot in (block.slot().as_u64()..prev_block_slot.as_u64()).rev() { debug!(%slot, ?block_root, "Storing frozen block to root mapping"); - cold_batch.push(KeyValueStoreOp::PutKeyValue( - DBColumn::BeaconBlockRoots, - slot.to_be_bytes().to_vec(), - block_root.as_slice().to_vec(), - )); + cold_batch.push((Slot::new(slot), block_root.as_slice().to_vec())); } prev_block_slot = block.slot(); @@ -191,11 +189,7 @@ impl BeaconChain { if expected_block_root == self.genesis_block_root { let genesis_slot = self.spec.genesis_slot; for slot in genesis_slot.as_u64()..prev_block_slot.as_u64() { - cold_batch.push(KeyValueStoreOp::PutKeyValue( - DBColumn::BeaconBlockRoots, - slot.to_be_bytes().to_vec(), - self.genesis_block_root.as_slice().to_vec(), - )); + cold_batch.push((Slot::new(slot), self.genesis_block_root.as_slice().to_vec())); } prev_block_slot = genesis_slot; expected_block_root = Hash256::zero(); @@ -261,7 +255,9 @@ impl BeaconChain { } { let _span = debug_span!("backfill_write_cold_db").entered(); - self.store.cold_db.do_atomically(cold_batch)?; + self.store + .cold_db + .put_batch(DBColumn::BeaconBlockRoots, cold_batch)?; } let mut anchor_and_blob_batch = Vec::with_capacity(3); diff --git a/beacon_node/beacon_chain/src/migrate.rs b/beacon_node/beacon_chain/src/migrate.rs index 3c17c1ebba4..f86ee3b7cda 100644 --- a/beacon_node/beacon_chain/src/migrate.rs +++ b/beacon_node/beacon_chain/src/migrate.rs @@ -7,7 +7,7 @@ use std::sync::{Arc, mpsc}; use std::thread; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use store::hot_cold_store::{HotColdDBError, migrate_database}; -use store::{Error, ItemStore, Split, StoreOp}; +use store::{ColdStore, Error, ItemStore, Split, StoreOp}; pub use store::{HotColdDB, MemoryStore}; use tracing::{debug, error, info, warn}; use types::{BeaconState, BeaconStateHash, Checkpoint, Epoch, EthSpec, Hash256, Slot}; @@ -30,7 +30,7 @@ pub const DEFAULT_EPOCHS_PER_MIGRATION: u64 = 1; /// The background migrator runs a thread to perform pruning and migrate state from the hot /// to the cold database. -pub struct BackgroundMigrator, Cold: ItemStore> { +pub struct BackgroundMigrator, Cold: ColdStore> { db: Arc>, /// Record of when the last migration ran, for enforcing `epochs_per_migration`. prev_migration: Arc>, @@ -135,7 +135,7 @@ pub struct FinalizationNotification { pub prev_migration: Arc>, } -impl, Cold: ItemStore> BackgroundMigrator { +impl, Cold: ColdStore> BackgroundMigrator { /// Create a new `BackgroundMigrator` and spawn its thread if necessary. pub fn new(db: Arc>, config: MigratorConfig) -> Self { // Estimate last migration run from DB split slot. diff --git a/beacon_node/beacon_chain/src/persisted_custody.rs b/beacon_node/beacon_chain/src/persisted_custody.rs index ba221c67b5f..e13ab83319a 100644 --- a/beacon_node/beacon_chain/src/persisted_custody.rs +++ b/beacon_node/beacon_chain/src/persisted_custody.rs @@ -1,7 +1,7 @@ use crate::custody_context::CustodyContextSsz; use ssz::{Decode, Encode}; use std::sync::Arc; -use store::{DBColumn, Error as StoreError, HotColdDB, ItemStore, StoreItem}; +use store::{ColdStore, DBColumn, Error as StoreError, HotColdDB, ItemStore, StoreItem}; use types::{EthSpec, Hash256}; /// 32-byte key for accessing the `CustodyContext`. All zero because `CustodyContext` has its own column. @@ -9,7 +9,7 @@ pub const CUSTODY_DB_KEY: Hash256 = Hash256::ZERO; pub struct PersistedCustody(pub CustodyContextSsz); -pub fn load_custody_context, Cold: ItemStore>( +pub fn load_custody_context, Cold: ColdStore>( store: Arc>, ) -> Option { let res: Result, _> = @@ -22,7 +22,7 @@ pub fn load_custody_context, Cold: ItemStore>( } /// Attempt to persist the custody context object to `self.store`. -pub fn persist_custody_context, Cold: ItemStore>( +pub fn persist_custody_context, Cold: ColdStore>( store: Arc>, custody_context: CustodyContextSsz, ) -> Result<(), store::Error> { diff --git a/beacon_node/beacon_chain/src/test_utils.rs b/beacon_node/beacon_chain/src/test_utils.rs index ca55811a706..287e51436fb 100644 --- a/beacon_node/beacon_chain/src/test_utils.rs +++ b/beacon_node/beacon_chain/src/test_utils.rs @@ -66,7 +66,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, LazyLock}; use std::time::Duration; use store::database::interface::BeaconNodeBackend; -use store::{HotColdDB, ItemStore, MemoryStore, config::StoreConfig}; +use store::{ColdStore, HotColdDB, ItemStore, MemoryStore, config::StoreConfig}; use task_executor::TaskExecutor; use task_executor::{ShutdownReason, test_utils::TestRuntime}; use tracing::debug; @@ -400,7 +400,7 @@ impl Builder> where E: EthSpec, Hot: ItemStore, - Cold: ItemStore, + Cold: ColdStore, { pub fn new(eth_spec_instance: E) -> Self { let runtime = TestRuntime::default(); @@ -761,7 +761,7 @@ impl BeaconChainHarness> where E: EthSpec, Hot: ItemStore, - Cold: ItemStore, + Cold: ColdStore, { pub fn builder(eth_spec_instance: E) -> Builder> { create_test_tracing_subscriber(); diff --git a/beacon_node/beacon_chain/tests/store_tests.rs b/beacon_node/beacon_chain/tests/store_tests.rs index 1576092c814..06a77e9047f 100644 --- a/beacon_node/beacon_chain/tests/store_tests.rs +++ b/beacon_node/beacon_chain/tests/store_tests.rs @@ -44,7 +44,7 @@ use std::str::FromStr; use std::sync::{Arc, LazyLock}; use std::time::Duration; use store::database::interface::BeaconNodeBackend; -use store::metadata::{CURRENT_SCHEMA_VERSION, STATE_UPPER_LIMIT_NO_RETAIN, SchemaVersion}; +use store::metadata::{CURRENT_SCHEMA_VERSION, SchemaVersion}; use store::{ BlobInfo, DBColumn, HotColdDB, StoreConfig, hdiff::HierarchyConfig, @@ -5018,75 +5018,6 @@ fn check_data_column_existence( } } -#[tokio::test] -async fn prune_historic_states() { - let num_blocks_produced = E::slots_per_epoch() * 5; - let db_path = tempdir().unwrap(); - let store = get_store(&db_path); - let harness = get_harness(store.clone(), LOW_VALIDATOR_COUNT); - let genesis_state_root = harness.chain.genesis_state_root; - - let genesis_state = harness - .chain - .get_state(&genesis_state_root, None, CACHE_STATE_IN_TESTS) - .unwrap() - .unwrap(); - - harness - .extend_chain( - num_blocks_produced as usize, - BlockStrategy::OnCanonicalHead, - AttestationStrategy::AllValidators, - ) - .await; - - // Check historical states are present. - let first_epoch_state_roots = harness - .chain - .forwards_iter_state_roots(Slot::new(0)) - .unwrap() - .take(E::slots_per_epoch() as usize) - .map(Result::unwrap) - .collect::>(); - for &(state_root, slot) in &first_epoch_state_roots { - assert!( - store - .get_state(&state_root, Some(slot), CACHE_STATE_IN_TESTS) - .unwrap() - .is_some() - ); - } - - store - .prune_historic_states(genesis_state_root, &genesis_state) - .unwrap(); - - // Check that anchor info is updated. - let anchor_info = store.get_anchor_info(); - assert_eq!(anchor_info.state_lower_limit, 0); - assert_eq!(anchor_info.state_upper_limit, STATE_UPPER_LIMIT_NO_RETAIN); - - // Ensure all epoch 0 states other than the genesis have been pruned. - for &(state_root, slot) in &first_epoch_state_roots { - assert_eq!( - store - .get_state(&state_root, Some(slot), CACHE_STATE_IN_TESTS) - .unwrap() - .is_some(), - slot == 0 - ); - } - - // Run for another two epochs. - let additional_blocks_produced = 2 * E::slots_per_epoch(); - harness - .extend_slots(additional_blocks_produced as usize) - .await; - - check_finalization(&harness, num_blocks_produced + additional_blocks_produced); - check_split_slot(&harness, store); -} - // Test the function `get_ancestor_state_root` for slots prior to the split where we only have // sparse summaries stored. #[tokio::test] diff --git a/beacon_node/client/src/builder.rs b/beacon_node/client/src/builder.rs index 9dfb8304bc8..6d5dacd6707 100644 --- a/beacon_node/client/src/builder.rs +++ b/beacon_node/client/src/builder.rs @@ -98,7 +98,7 @@ where TSlotClock: SlotClock + Clone + 'static, E: EthSpec + 'static, THotStore: ItemStore + 'static, - TColdStore: ItemStore + 'static, + TColdStore: store::ColdStore + 'static, { /// Instantiates a new, empty builder. /// @@ -811,7 +811,7 @@ where TSlotClock: SlotClock + Clone + 'static, E: EthSpec + 'static, THotStore: ItemStore + 'static, - TColdStore: ItemStore + 'static, + TColdStore: store::ColdStore + 'static, { /// Consumes the internal `BeaconChainBuilder`, attaching the resulting `BeaconChain` to self. #[instrument(skip_all)] @@ -885,7 +885,7 @@ impl ClientBuilder + 'static, - TColdStore: ItemStore + 'static, + TColdStore: store::ColdStore + 'static, { /// Specifies that the slot clock should read the time from the computers system clock. pub fn system_time_slot_clock(mut self) -> Result { diff --git a/beacon_node/network/src/persisted_dht.rs b/beacon_node/network/src/persisted_dht.rs index 113b3cdd323..f3195c7c21a 100644 --- a/beacon_node/network/src/persisted_dht.rs +++ b/beacon_node/network/src/persisted_dht.rs @@ -1,12 +1,12 @@ use lighthouse_network::Enr; use std::sync::Arc; -use store::{DBColumn, Error as StoreError, HotColdDB, ItemStore, StoreItem}; +use store::{ColdStore, DBColumn, Error as StoreError, HotColdDB, ItemStore, StoreItem}; use types::{EthSpec, Hash256}; /// 32-byte key for accessing the `DhtEnrs`. All zero because `DhtEnrs` has its own column. pub const DHT_DB_KEY: Hash256 = Hash256::ZERO; -pub fn load_dht, Cold: ItemStore>( +pub fn load_dht, Cold: ColdStore>( store: Arc>, ) -> Vec { // Load DHT from store @@ -20,7 +20,7 @@ pub fn load_dht, Cold: ItemStore>( } /// Attempt to persist the ENR's in the DHT to `self.store`. -pub fn persist_dht, Cold: ItemStore>( +pub fn persist_dht, Cold: ColdStore>( store: Arc>, enrs: Vec, ) -> Result<(), store::Error> { @@ -28,7 +28,7 @@ pub fn persist_dht, Cold: ItemStore>( } /// Attempts to clear any DHT entries. -pub fn clear_dht, Cold: ItemStore>( +pub fn clear_dht, Cold: ColdStore>( store: Arc>, ) -> Result<(), store::Error> { store.hot_db.delete::(&DHT_DB_KEY) diff --git a/beacon_node/store/src/database/interface.rs b/beacon_node/store/src/database/interface.rs index 5646f1179c8..88bf2758172 100644 --- a/beacon_node/store/src/database/interface.rs +++ b/beacon_node/store/src/database/interface.rs @@ -2,11 +2,16 @@ use crate::database::leveldb_impl; #[cfg(feature = "redb")] use crate::database::redb_impl; -use crate::{ColumnIter, ColumnKeyIter, DBColumn, Error, ItemStore, Key, KeyValueStore, metrics}; +use crate::{ + ColdStore, ColumnIter, ColumnKeyIter, DBColumn, DBColumnColdIndex, Error, ItemStore, Key, + KeyValueStore, SlotIter, metrics, +}; use crate::{KeyValueStoreOp, StoreConfig, config::DatabaseBackend}; +use ssz::{Decode, Encode}; use std::collections::HashSet; use std::path::Path; use types::EthSpec; +use types::{Hash256, Slot}; pub enum BeaconNodeBackend { #[cfg(feature = "leveldb")] @@ -17,6 +22,74 @@ pub enum BeaconNodeBackend { impl ItemStore for BeaconNodeBackend {} +impl ColdStore for BeaconNodeBackend { + fn get(&self, column: DBColumn, slot: Slot) -> Result>, Error> { + KeyValueStore::get_bytes(self, column, &slot.as_u64().to_be_bytes()) + } + + fn put_batch(&self, column: DBColumn, items: Vec<(Slot, Vec)>) -> Result<(), Error> { + let ops = items + .into_iter() + .map(|(slot, value)| { + crate::KeyValueStoreOp::PutKeyValue( + column, + slot.as_u64().to_be_bytes().to_vec(), + value, + ) + }) + .collect(); + KeyValueStore::do_atomically(self, ops) + } + + fn exists(&self, column: DBColumn, slot: Slot) -> Result { + KeyValueStore::key_exists(self, column, &slot.as_u64().to_be_bytes()) + } + + fn iter_from(&self, column: DBColumn, from: Slot) -> SlotIter<'_> { + Box::new( + KeyValueStore::iter_column_from::>(self, column, &from.as_u64().to_be_bytes()) + .map(|res| { + res.and_then(|(key_bytes, value)| { + let bytes: [u8; 8] = + key_bytes.try_into().map_err(|_| Error::InvalidBytes)?; + Ok((Slot::new(u64::from_be_bytes(bytes)), value)) + }) + }), + ) + } + + fn get_index(&self, column: DBColumnColdIndex, root: Hash256) -> Result, Error> { + Ok( + KeyValueStore::get_bytes(self, column.db_column(), root.as_slice())? + .map(|bytes| Slot::from_ssz_bytes(&bytes)) + .transpose()?, + ) + } + + fn put_index_batch( + &self, + column: DBColumnColdIndex, + items: Vec<(Hash256, Slot)>, + ) -> Result<(), Error> { + let col = column.db_column(); + let ops = items + .into_iter() + .map(|(root, slot)| { + crate::KeyValueStoreOp::PutKeyValue( + col, + root.as_slice().to_vec(), + slot.as_ssz_bytes(), + ) + }) + .collect(); + KeyValueStore::do_atomically(self, ops) + } + + fn sync(&self) -> Result<(), Error> { + KeyValueStore::sync(self) + } +} + impl KeyValueStore for BeaconNodeBackend { fn get_bytes(&self, column: DBColumn, key: &[u8]) -> Result>, Error> { match self { diff --git a/beacon_node/store/src/errors.rs b/beacon_node/store/src/errors.rs index d198f446d3c..e479e6cf791 100644 --- a/beacon_node/store/src/errors.rs +++ b/beacon_node/store/src/errors.rs @@ -1,7 +1,7 @@ use crate::config::StoreConfigError; use crate::hot_cold_store::{HotColdDBError, StateSummaryIteratorError}; use crate::static_blobs::StaticBlobStoreError; -use crate::static_blocks::StaticBlockStoreError; +use crate::static_blocks::StaticColdStoreError; use crate::{DBColumn, hdiff}; #[cfg(feature = "leveldb")] use leveldb::error::Error as LevelDBError; @@ -16,7 +16,7 @@ pub enum Error { SszDecodeError(DecodeError), BeaconStateError(BeaconStateError), HotColdDBError(HotColdDBError), - StaticBlockStoreError(StaticBlockStoreError), + StaticColdStoreError(StaticColdStoreError), StaticBlobStoreError(StaticBlobStoreError), DBError { message: String, @@ -133,9 +133,9 @@ impl From for Error { } } -impl From for Error { - fn from(e: StaticBlockStoreError) -> Error { - Error::StaticBlockStoreError(e) +impl From for Error { + fn from(e: StaticColdStoreError) -> Error { + Error::StaticColdStoreError(e) } } diff --git a/beacon_node/store/src/forwards_iter.rs b/beacon_node/store/src/forwards_iter.rs index 255b7d8eac8..4c994192b0a 100644 --- a/beacon_node/store/src/forwards_iter.rs +++ b/beacon_node/store/src/forwards_iter.rs @@ -1,6 +1,6 @@ use crate::errors::{Error, Result}; use crate::iter::{BlockRootsIterator, StateRootsIterator}; -use crate::{ColumnIter, DBColumn, HotColdDB, ItemStore}; +use crate::{ColdStore, DBColumn, HotColdDB, ItemStore, SlotIter}; use itertools::process_results; use std::marker::PhantomData; use types::{BeaconState, EthSpec, Hash256, Slot}; @@ -9,7 +9,7 @@ pub type HybridForwardsBlockRootsIterator<'a, E, Hot, Cold> = pub type HybridForwardsStateRootsIterator<'a, E, Hot, Cold> = HybridForwardsIterator<'a, E, Hot, Cold>; -impl, Cold: ItemStore> HotColdDB { +impl, Cold: ColdStore> HotColdDB { fn simple_forwards_iterator( &self, column: DBColumn, @@ -116,15 +116,15 @@ impl, Cold: ItemStore> HotColdDB } /// Forwards root iterator that makes use of a slot -> root mapping in the freezer DB. -pub struct FrozenForwardsIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> { - inner: ColumnIter<'a, Vec>, +pub struct FrozenForwardsIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> { + inner: SlotIter<'a>, column: DBColumn, next_slot: Slot, end_slot: Slot, _phantom: PhantomData<(E, Hot, Cold)>, } -impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> +impl<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> FrozenForwardsIterator<'a, E, Hot, Cold> { /// `end_slot` is EXCLUSIVE here. @@ -137,9 +137,8 @@ impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> if column != DBColumn::BeaconBlockRoots && column != DBColumn::BeaconStateRoots { return Err(Error::ForwardsIterInvalidColumn(column)); } - let start = start_slot.as_u64().to_be_bytes(); Ok(Self { - inner: store.cold_db.iter_column_from(column, &start), + inner: store.cold_db.iter_from(column, start_slot), column, next_slot: start_slot, end_slot, @@ -148,7 +147,7 @@ impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> } } -impl, Cold: ItemStore> Iterator +impl, Cold: ColdStore> Iterator for FrozenForwardsIterator<'_, E, Hot, Cold> { type Item = Result<(Hash256, Slot)>; @@ -160,13 +159,7 @@ impl, Cold: ItemStore> Iterator self.inner .as_mut() .next()? - .and_then(|(slot_bytes, root_bytes)| { - let slot = slot_bytes - .clone() - .try_into() - .map(u64::from_be_bytes) - .map(Slot::new) - .map_err(|_| Error::InvalidBytes)?; + .and_then(|(slot, root_bytes)| { if root_bytes.len() != std::mem::size_of::() { return Err(Error::InvalidBytes); } @@ -199,7 +192,7 @@ impl Iterator for SimpleForwardsIterator { } /// Fusion of the above two approaches to forwards iteration. Fast and efficient. -pub enum HybridForwardsIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> { +pub enum HybridForwardsIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> { PreFinalization { iter: Box>, store: &'a HotColdDB, @@ -220,7 +213,7 @@ pub enum HybridForwardsIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ItemSto Finished, } -impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> +impl<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> HybridForwardsIterator<'a, E, Hot, Cold> { /// Construct a new hybrid iterator. @@ -349,7 +342,7 @@ impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> } } -impl, Cold: ItemStore> Iterator +impl, Cold: ColdStore> Iterator for HybridForwardsIterator<'_, E, Hot, Cold> { type Item = Result<(Hash256, Slot)>; diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index a32f2dd9c46..e117709527b 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -12,11 +12,9 @@ use crate::metadata::{ SCHEMA_VERSION_KEY, SPLIT_KEY, STATE_UPPER_LIMIT_NO_RETAIN, SchemaVersion, }; use crate::state_cache::{PutStateOutcome, StateCache}; -use crate::static_blobs::StaticBlobStore; -use crate::static_blocks::StaticBlockStore; use crate::{ - BlobSidecarListFromRoot, DBColumn, DatabaseBlock, Error, ItemStore, KeyValueStoreOp, StoreItem, - StoreOp, get_data_column_key, + BlobSidecarListFromRoot, ColdStore, DBColumn, DBColumnColdIndex, DatabaseBlock, Error, + ItemStore, KeyValueStoreOp, StoreItem, StoreOp, get_data_column_key, metrics::{self, COLD_METRIC, HOT_METRIC}, parse_data_column_key, }; @@ -51,7 +49,7 @@ use zstd::{Decoder, Encoder}; /// Stores vector fields like the `block_roots` and `state_roots` separately, and only stores /// intermittent "restore point" states pre-finalization. #[derive(Debug)] -pub struct HotColdDB, Cold: ItemStore> { +pub struct HotColdDB, Cold: ColdStore> { /// The slot and state root at the point where the database is split between hot and cold. /// /// States with slots less than `split.slot` are in the cold DB, while states with slots @@ -68,17 +66,11 @@ pub struct HotColdDB, Cold: ItemStore> { /// Cold database containing compact historical data. pub cold_db: Cold, /// Database containing blobs. If None, store falls back to use `cold_db`. - pub blobs_db: Cold, + pub blobs_db: Hot, /// Hot database containing duplicated but quick-to-access recent data. /// /// The hot database also contains all blocks. pub hot_db: Hot, - /// Optional append-only file-backed store for finalized blinded blocks. When `Some`, - /// reads fall through to it after missing in `hot_db`. When `None` (legacy mode), all - /// finalized blinded blocks remain in `hot_db` as today. - pub static_blocks: Option>, - /// Optional slot-keyed archive for finalized blob sidecars. - pub static_blobs: Option>, /// LRU cache of deserialized blocks and blobs. Updated whenever a block or blob is loaded. block_cache: Option>>, /// Cache of beacon states. @@ -244,8 +236,6 @@ impl HotColdDB, MemoryStore> { cold_db: MemoryStore::open(), blobs_db: MemoryStore::open(), hot_db: MemoryStore::open(), - static_blocks: None, - static_blobs: None, block_cache: NonZeroUsize::new(config.block_cache_size) .map(BlockCache::new) .map(Mutex::new), @@ -300,8 +290,6 @@ impl HotColdDB, BeaconNodeBackend> { blobs_db: BeaconNodeBackend::open(&config, blobs_db_path)?, cold_db: BeaconNodeBackend::open(&config, cold_path)?, hot_db, - static_blocks: None, - static_blobs: None, block_cache: NonZeroUsize::new(config.block_cache_size) .map(BlockCache::new) .map(Mutex::new), @@ -463,7 +451,7 @@ impl HotColdDB, BeaconNodeBackend> { } } -impl, Cold: ItemStore> HotColdDB { +impl, Cold: ColdStore> HotColdDB { fn cold_storage_strategy(&self, slot: Slot) -> Result { // The start slot for the freezer HDiff is always 0 Ok(self.hierarchy.storage_strategy(slot, Slot::new(0))?) @@ -743,38 +731,11 @@ impl, Cold: ItemStore> HotColdDB block_root: &Hash256, decoder: impl FnOnce(&[u8]) -> Result, ssz::DecodeError>, ) -> Result>, Error> { - // Hot KV first: covers both unfinalized blocks and (in legacy / pre-migration mode) - // all finalized blocks. After migration, finalized blinded bodies are absent here - // and we fall through to the static block store via the cold-KV reverse index. - if let Some(block_bytes) = self - .hot_db + self.hot_db .get_bytes(DBColumn::BeaconBlock, block_root.as_slice())? - { - return decoder(&block_bytes).map(Some).map_err(Into::into); - } - if let Some(static_blocks) = &self.static_blocks - && let Some(slot) = self.get_finalized_blinded_block_slot(block_root)? - && let Some(block_bytes) = static_blocks.get(slot)? - { - return decoder(&block_bytes).map(Some).map_err(Into::into); - } - Ok(None) - } - - /// Look up the slot of a finalized blinded block by its root, using the cold-KV reverse - /// index in [`DBColumn::BeaconBlockSlot`]. Returns `Ok(None)` if the root is unknown to - /// the cold KV (i.e. the block has not been sealed into a static file). - /// - /// Populated by [`Self::seal_era`]. - fn get_finalized_blinded_block_slot( - &self, - block_root: &Hash256, - ) -> Result, Error> { - Ok(self - .cold_db - .get_bytes(DBColumn::BeaconBlockSlot, block_root.as_slice())? - .map(|bytes| Slot::from_ssz_bytes(&bytes)) - .transpose()?) + .map(|block_bytes| decoder(&block_bytes)) + .transpose() + .map_err(Into::into) } pub fn get_payload_envelope( @@ -977,16 +938,8 @@ impl, Cold: ItemStore> HotColdDB /// Determine whether a block exists in the database. pub fn block_exists(&self, block_root: &Hash256) -> Result { - if self - .hot_db - .key_exists(DBColumn::BeaconBlock, block_root.as_slice())? - { - return Ok(true); - } - if self.static_blocks.is_some() { - return Ok(self.get_finalized_blinded_block_slot(block_root)?.is_some()); - } - Ok(false) + self.hot_db + .key_exists(DBColumn::BeaconBlock, block_root.as_slice()) } /// Delete a block from the store and the block cache. @@ -1122,11 +1075,16 @@ impl, Cold: ItemStore> HotColdDB /// Store a state in the store. pub fn put_state(&self, state_root: &Hash256, state: &BeaconState) -> Result<(), Error> { - let mut ops: Vec = Vec::new(); if state.slot() < self.get_split_slot() { - self.store_cold_state(state_root, state, &mut ops)?; - self.cold_db.do_atomically(ops) + let mut cold_items = Vec::new(); + let mut summary_index: Vec<(Hash256, Slot)> = Vec::new(); + self.store_cold_state(state_root, state, &mut cold_items, &mut summary_index)?; + // Cold bulk first; the index entry trails so a crash leaves no dangling pointer. + self.commit_cold_items(cold_items)?; + self.cold_db + .put_index_batch(DBColumnColdIndex::ColdStateSummary, summary_index) } else { + let mut ops: Vec = Vec::new(); self.store_hot_state(state_root, state, &mut ops)?; self.hot_db.do_atomically(ops) } @@ -2122,16 +2080,39 @@ impl, Cold: ItemStore> HotColdDB ) } + /// Group `cold_items` by column and write each column to the cold backend. + /// + /// Used to commit pre-finalization cold writes ahead of the matching hot-DB index puts + /// (BeaconColdStateSummary, BeaconBlockSlot). Order matters for crash safety: cold data + /// must be durable before any hot index entry that references it. + pub fn commit_cold_items( + &self, + cold_items: Vec<(DBColumn, Slot, Vec)>, + ) -> Result<(), Error> { + let mut groups: HashMap)>> = HashMap::new(); + for (col, slot, value) in cold_items { + groups.entry(col).or_default().push((slot, value)); + } + for (col, batch) in groups { + self.cold_db.put_batch(col, batch)?; + } + Ok(()) + } + pub fn store_cold_state_summary( &self, state_root: &Hash256, slot: Slot, - ops: &mut Vec, + cold_items: &mut Vec<(DBColumn, Slot, Vec)>, + summary_index: &mut Vec<(Hash256, Slot)>, ) -> Result<(), Error> { - ops.push(ColdStateSummary { slot }.as_kv_store_op(*state_root)); - ops.push(KeyValueStoreOp::PutKeyValue( + // BeaconColdStateSummary is a state_root → slot index owned by the cold backend. + // Slot-keyed bulk data must be durable before we commit the index entry; the + // caller is responsible for the ordering. + summary_index.push((*state_root, slot)); + cold_items.push(( DBColumn::BeaconStateRoots, - slot.as_u64().to_be_bytes().to_vec(), + slot, state_root.as_slice().to_vec(), )); Ok(()) @@ -2142,9 +2123,10 @@ impl, Cold: ItemStore> HotColdDB &self, state_root: &Hash256, state: &BeaconState, - ops: &mut Vec, + cold_items: &mut Vec<(DBColumn, Slot, Vec)>, + summary_index: &mut Vec<(Hash256, Slot)>, ) -> Result<(), Error> { - self.store_cold_state_summary(state_root, state.slot(), ops)?; + self.store_cold_state_summary(state_root, state.slot(), cold_items, summary_index)?; let slot = state.slot(); match self.cold_storage_strategy(slot)? { @@ -2163,7 +2145,7 @@ impl, Cold: ItemStore> HotColdDB %slot, "Storing cold state" ); - self.store_cold_state_as_snapshot(state, ops)?; + self.store_cold_state_as_snapshot(state, cold_items)?; } StorageStrategy::DiffFrom(from) => { debug!( @@ -2172,7 +2154,7 @@ impl, Cold: ItemStore> HotColdDB %slot, "Storing cold state" ); - self.store_cold_state_as_diff(state, from, ops)?; + self.store_cold_state_as_diff(state, from, cold_items)?; } } Ok(()) @@ -2181,7 +2163,7 @@ impl, Cold: ItemStore> HotColdDB pub fn store_cold_state_as_snapshot( &self, state: &BeaconState, - ops: &mut Vec, + cold_items: &mut Vec<(DBColumn, Slot, Vec)>, ) -> Result<(), Error> { let bytes = state.as_ssz_bytes(); let compressed_value = { @@ -2194,19 +2176,16 @@ impl, Cold: ItemStore> HotColdDB out }; - ops.push(KeyValueStoreOp::PutKeyValue( + cold_items.push(( DBColumn::BeaconStateSnapshot, - state.slot().as_u64().to_be_bytes().to_vec(), + state.slot(), compressed_value, )); Ok(()) } fn load_cold_state_bytes_as_snapshot(&self, slot: Slot) -> Result>, Error> { - match self - .cold_db - .get_bytes(DBColumn::BeaconStateSnapshot, &slot.as_u64().to_be_bytes())? - { + match self.cold_db.get(DBColumn::BeaconStateSnapshot, slot)? { Some(bytes) => { let _timer = metrics::start_timer(&metrics::STORE_BEACON_STATE_FREEZER_DECOMPRESS_TIME); @@ -2297,7 +2276,7 @@ impl, Cold: ItemStore> HotColdDB &self, state: &BeaconState, from_slot: Slot, - ops: &mut Vec, + cold_items: &mut Vec<(DBColumn, Slot, Vec)>, ) -> Result<(), Error> { // Load diff base state bytes. let (_, base_buffer) = { @@ -2320,11 +2299,7 @@ impl, Cold: ItemStore> HotColdDB diff_bytes.len() as f64, ); - ops.push(KeyValueStoreOp::PutKeyValue( - DBColumn::BeaconStateDiff, - state.slot().as_u64().to_be_bytes().to_vec(), - diff_bytes, - )); + cold_items.push((DBColumn::BeaconStateDiff, state.slot(), diff_bytes)); Ok(()) } @@ -2446,7 +2421,7 @@ impl, Cold: ItemStore> HotColdDB let bytes = { let _t = metrics::start_timer_vec(&metrics::BEACON_HDIFF_READ_TIME, COLD_METRIC); self.cold_db - .get_bytes(DBColumn::BeaconStateDiff, &slot.as_u64().to_be_bytes())? + .get(DBColumn::BeaconStateDiff, slot)? .ok_or(HotColdDBError::MissingHDiff(slot))? }; let hdiff = { @@ -2705,37 +2680,10 @@ impl, Cold: ItemStore> HotColdDB Ok(BlobSidecarListFromRoot::NoBlobs) } } - None => self.get_static_blobs(block_root), + None => Ok(BlobSidecarListFromRoot::NoRoot), } } - /// Fetch blobs from the slot-keyed static archive after a blob-db miss. - fn get_static_blobs(&self, block_root: &Hash256) -> Result, Error> { - let Some(static_blobs) = &self.static_blobs else { - return Ok(BlobSidecarListFromRoot::NoRoot); - }; - let Some(slot) = self.get_finalized_blinded_block_slot(block_root)? else { - return Ok(BlobSidecarListFromRoot::NoRoot); - }; - let Some(blobs_bytes) = static_blobs.get(slot)? else { - return Ok(BlobSidecarListFromRoot::NoBlobs); - }; - - let blobs: Vec>> = Vec::<_>::from_ssz_bytes(&blobs_bytes)?; - let Some(max_blobs_per_block) = blobs - .first() - .map(|blob| self.spec.max_blobs_per_block(blob.epoch())) - else { - return Ok(BlobSidecarListFromRoot::NoBlobs); - }; - - let blobs = BlobSidecarList::new(blobs, max_blobs_per_block as usize)?; - self.block_cache - .as_ref() - .inspect(|cache| cache.lock().put_blobs(*block_root, blobs.clone())); - Ok(BlobSidecarListFromRoot::Blobs(blobs)) - } - /// Fetch all keys in the data_column column with prefix `block_root` pub fn get_data_column_keys(&self, block_root: Hash256) -> Result, Error> { self.blobs_db @@ -3145,10 +3093,8 @@ impl, Cold: ItemStore> HotColdDB /// Load a frozen state's slot, given its root. pub fn load_cold_state_slot(&self, state_root: &Hash256) -> Result, Error> { - Ok(self - .cold_db - .get(state_root)? - .map(|s: ColdStateSummary| s.slot)) + self.cold_db + .get_index(DBColumnColdIndex::ColdStateSummary, *state_root) } /// Load a hot state's summary, given its root. @@ -3187,23 +3133,6 @@ impl, Cold: ItemStore> HotColdDB Ok(()) } - /// Run a compaction pass on the freezer DB to free up space used by deleted states. - pub fn compact_freezer(&self) -> Result<(), Error> { - let columns = vec![ - DBColumn::BeaconColdStateSummary, - DBColumn::BeaconStateSnapshot, - DBColumn::BeaconStateDiff, - DBColumn::BeaconStateRoots, - ]; - - for column in columns { - info!(?column, "Starting compaction"); - self.cold_db.compact_column(column)?; - info!(?column, "Finishing compaction"); - } - Ok(()) - } - /// Return `true` if compaction on finalization/pruning is enabled. pub fn compact_on_prune(&self) -> bool { self.config.compact_on_prune @@ -3233,16 +3162,12 @@ impl, Cold: ItemStore> HotColdDB start_slot: Slot, end_slot: Slot, block_root: Hash256, - ) -> Result, Error> { - let mut ops = vec![]; + ) -> Result)>, Error> { + let mut items = vec![]; for slot in start_slot.as_u64()..end_slot.as_u64() { - ops.push(KeyValueStoreOp::PutKeyValue( - DBColumn::BeaconBlockRoots, - slot.to_be_bytes().to_vec(), - block_root.as_slice().to_vec(), - )); + items.push((Slot::new(slot), block_root.as_slice().to_vec())); } - Ok(ops) + Ok(items) } /// Return a single block root from the cold DB. @@ -3251,7 +3176,7 @@ impl, Cold: ItemStore> HotColdDB pub fn get_cold_block_root(&self, slot: Slot) -> Result, Error> { Ok(self .cold_db - .get_bytes(DBColumn::BeaconBlockRoots, &slot.as_u64().to_be_bytes())? + .get(DBColumn::BeaconBlockRoots, slot)? .map(|bytes| Hash256::from_ssz_bytes(&bytes)) .transpose()?) } @@ -3264,7 +3189,7 @@ impl, Cold: ItemStore> HotColdDB pub fn get_cold_state_root(&self, slot: Slot) -> Result, Error> { Ok(self .cold_db - .get_bytes(DBColumn::BeaconStateRoots, &slot.as_u64().to_be_bytes())? + .get(DBColumn::BeaconStateRoots, slot)? .map(|bytes| Hash256::from_ssz_bytes(&bytes)) .transpose()?) } @@ -3557,67 +3482,6 @@ impl, Cold: ItemStore> HotColdDB Ok(()) } - /// Delete *all* states from the freezer database and update the anchor accordingly. - /// - /// WARNING: this method deletes the genesis state and replaces it with the provided - /// `genesis_state`. This is to support its use in schema migrations where the storage scheme of - /// the genesis state may be modified. It is the responsibility of the caller to ensure that the - /// genesis state is correct, else a corrupt database will be created. - pub fn prune_historic_states( - &self, - genesis_state_root: Hash256, - genesis_state: &BeaconState, - ) -> Result<(), Error> { - // Update the anchor to use the dummy state upper limit and disable historic state storage. - let old_anchor = self.get_anchor_info(); - let new_anchor = AnchorInfo { - state_upper_limit: STATE_UPPER_LIMIT_NO_RETAIN, - state_lower_limit: Slot::new(0), - ..old_anchor.clone() - }; - - // Commit the anchor change immediately: if the cold database ops fail they can always be - // retried, and we can't do them atomically with this change anyway. - self.compare_and_set_anchor_info_with_write(old_anchor, new_anchor)?; - - // Stage freezer data for deletion. Do not bother loading and deserializing values as this - // wastes time and is less schema-agnostic. My hope is that this method will be useful for - // migrating to the tree-states schema (delete everything in the freezer then start afresh). - let mut cold_ops = vec![]; - - let columns = vec![ - DBColumn::BeaconColdStateSummary, - DBColumn::BeaconStateSnapshot, - DBColumn::BeaconStateDiff, - DBColumn::BeaconStateRoots, - ]; - - for column in columns { - for res in self.cold_db.iter_column_keys::>(column) { - let key = res?; - cold_ops.push(KeyValueStoreOp::DeleteKey(column, key)); - } - } - let delete_ops = cold_ops.len(); - - // If we just deleted the genesis state, re-store it using the current* schema. - if self.get_split_slot() > 0 { - info!( - state_root = ?genesis_state_root, - "Re-storing genesis state" - ); - self.store_cold_state(&genesis_state_root, genesis_state, &mut cold_ops)?; - } - - info!(delete_ops, "Deleting historic states"); - self.cold_db.do_atomically(cold_ops)?; - - // In order to reclaim space, we need to compact the freezer DB as well. - self.compact_freezer()?; - - Ok(()) - } - fn update_blob_or_data_column_info( &self, start_epoch: Epoch, @@ -3649,7 +3513,7 @@ impl, Cold: ItemStore> HotColdDB /// This function previously did a combination of freezer migration alongside pruning. Now it is /// *just* responsible for copying relevant data to the freezer, while pruning is implemented /// in `prune_hot_db`. -pub fn migrate_database, Cold: ItemStore>( +pub fn migrate_database, Cold: ColdStore>( store: Arc>, finalized_state_root: Hash256, finalized_block_root: Hash256, @@ -3681,8 +3545,11 @@ pub fn migrate_database, Cold: ItemStore>( return Err(HotColdDBError::FreezeSlotUnaligned(finalized_state.slot()).into()); } - let mut cold_db_block_ops = vec![]; - let mut hot_db_block_delete_ops = vec![]; + // Block-side cold puts (BeaconBlockRoots), accumulated across all states in this batch. + let mut cold_block_root_items: Vec<(Slot, Vec)> = vec![]; + // Cold-DB root index for state summaries (state_root -> slot). + // Committed after the slot-keyed cold data so a crash leaves no dangling indices. + let mut cold_state_summary_index: Vec<(Hash256, Slot)> = vec![]; // Iterate in descending order until the current split slot let state_roots: Vec<_> = @@ -3694,11 +3561,7 @@ pub fn migrate_database, Cold: ItemStore>( // Then, iterate states in slot ascending order, as they are stored wrt previous states. for (block_root, state_root, slot) in state_roots.iter().rev() { // Store the slot to block root mapping. - cold_db_block_ops.push(KeyValueStoreOp::PutKeyValue( - DBColumn::BeaconBlockRoots, - slot.as_u64().to_be_bytes().to_vec(), - block_root.as_slice().to_vec(), - )); + cold_block_root_items.push((*slot, block_root.as_slice().to_vec())); // Do not try to store states if a restore point is yet to be stored, or will never be // stored (see `STATE_UPPER_LIMIT_NO_RETAIN`). Make an exception for the genesis state @@ -3707,7 +3570,7 @@ pub fn migrate_database, Cold: ItemStore>( continue; } - let mut cold_db_state_ops = vec![]; + let mut cold_state_items: Vec<(DBColumn, Slot, Vec)> = vec![]; // Only store the cold state if it's on a diff boundary. // Calling `store_cold_state_summary` instead of `store_cold_state` for those allows us @@ -3720,7 +3583,12 @@ pub fn migrate_database, Cold: ItemStore>( %slot, "Storing cold state" ); - store.store_cold_state_summary(state_root, *slot, &mut cold_db_state_ops)?; + store.store_cold_state_summary( + state_root, + *slot, + &mut cold_state_items, + &mut cold_state_summary_index, + )?; } else { // This is some state that we want to migrate to the freezer db. // There is no reason to cache this state. @@ -3728,63 +3596,18 @@ pub fn migrate_database, Cold: ItemStore>( .get_hot_state(state_root, false)? .ok_or(HotColdDBError::MissingStateToFreeze(*state_root))?; - store.store_cold_state(state_root, &state, &mut cold_db_state_ops)?; + store.store_cold_state( + state_root, + &state, + &mut cold_state_items, + &mut cold_state_summary_index, + )?; } // Cold states are diffed with respect to each other, so we need to finish writing previous - // states before storing new ones. - store.cold_db.do_atomically(cold_db_state_ops)?; - } - - // Hand newly-finalized blinded blocks to the static archive. `RootsIterator` yields - // the same `block_root` for every skipped slot covered by that block, so we dedupe - // on the root. The seed handles the boundary case where the migration's first slot - // is a skip-slot extension of a block archived in a previous migration: reading - // `BeaconBlockRoots[current_split.slot - 1]` gives that block's root, which the - // dedup then matches and skips. `Hash256::ZERO` is a safe sentinel for the genesis - // case — no real block root collides with it. - if let Some(static_blocks) = &store.static_blocks { - // The slot in this range of slots might by a skipped slot. Read the previous block_root - // from the existing slot -> block_root index. - let mut prev_block_root: Hash256 = if current_split.slot > 0 { - let prev_slot = current_split.slot - 1; - store - .get_cold_block_root(prev_slot)? - .ok_or(Error::MigrationError(format!( - "missing BeaconBlockRoots entry for slot {prev_slot}", - )))? - } else { - // For the genesis case set the prev_root to zero to trigger a write - Hash256::ZERO - }; - - for (block_root, _, slot) in state_roots.iter().rev() { - // Previous slot's root is the same, therefore this slot is a skipped slot - if *block_root == prev_block_root { - continue; - } - prev_block_root = *block_root; - - // The new-split block stays in hot KV — it isn't yet finalized below split. - if *slot >= finalized_state.slot() { - continue; - } - - let bytes = store - .hot_db - .get_bytes(DBColumn::BeaconBlock, block_root.as_slice())? - .ok_or(Error::BlockNotFound(*block_root))?; - static_blocks.put(*slot, &bytes)?; - cold_db_block_ops.push(KeyValueStoreOp::PutKeyValue( - DBColumn::BeaconBlockSlot, - block_root.as_slice().to_vec(), - slot.as_ssz_bytes(), - )); - hot_db_block_delete_ops.push(KeyValueStoreOp::DeleteKey( - DBColumn::BeaconBlock, - block_root.as_slice().to_vec(), - )); - } + // slot-keyed cold data before staging new entries. Index commits ride along to the end of + // the migration so all root indices land after every cold-bulk write is durable. + store.commit_cold_items(cold_state_items)?; } // Warning: Critical section. We have to take care not to put any of the two databases in an @@ -3795,8 +3618,17 @@ pub fn migrate_database, Cold: ItemStore>( // potentially re-doing the migration to copy data to the freezer, for consistency. If we crash // after writing all new block & state data to the freezer but before updating the split, then // in the worst case we will restart with the old split and re-run the migration. - store.cold_db.do_atomically(cold_db_block_ops)?; + // + // Slot-keyed cold data lands first; the BeaconColdStateSummary root index is committed after, + // so a mid-migration crash leaves cold data without dangling indices. + store + .cold_db + .put_batch(DBColumn::BeaconBlockRoots, cold_block_root_items)?; store.cold_db.sync()?; + store.cold_db.put_index_batch( + DBColumnColdIndex::ColdStateSummary, + cold_state_summary_index, + )?; let new_split = { let mut split_guard = store.split.write(); let latest_split = *split_guard; @@ -3840,13 +3672,6 @@ pub fn migrate_database, Cold: ItemStore>( finalized_state.clone(), )?; - // Reclaim hot-KV space for blinded bodies now durable in the static archive. Runs - // after split commit so a retried migration never tries to fetch a body that was - // already deleted. A crash between the split commit and this delete leaves the - // bodies in hot KV; reads still succeed via the static archive (the reverse-index - // entry, committed atomically with the split's cold-DB ops, points at it). - store.hot_db.do_atomically(hot_db_block_delete_ops)?; - debug!( slot = %finalized_state.slot(), "Freezer migration complete" @@ -3919,7 +3744,7 @@ pub enum StateSummaryIteratorError { /// Return the ancestor state root of a state beyond SlotsPerHistoricalRoot using the roots iterator /// and the store -pub fn get_ancestor_state_root<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore>( +pub fn get_ancestor_state_root<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore>( store: &'a HotColdDB, from_state: &'a BeaconState, target_slot: Slot, @@ -4126,7 +3951,7 @@ impl StoreItem for HotStateSummary { impl HotStateSummary { /// Construct a new summary of the given state. - pub fn new, Cold: ItemStore>( + pub fn new, Cold: ColdStore>( store: &HotColdDB, state_root: Hash256, state: &BeaconState, @@ -4174,26 +3999,6 @@ impl HotStateSummary { } } -/// Struct for summarising a state in the freezer database. -#[derive(Debug, Clone, Copy, Default, Encode, Decode)] -pub(crate) struct ColdStateSummary { - pub slot: Slot, -} - -impl StoreItem for ColdStateSummary { - fn db_column() -> DBColumn { - DBColumn::BeaconColdStateSummary - } - - fn as_store_bytes(&self) -> Vec { - self.as_ssz_bytes() - } - - fn from_store_bytes(bytes: &[u8]) -> Result { - Ok(Self::from_ssz_bytes(bytes)?) - } -} - #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct BytesKey { pub key: Vec, diff --git a/beacon_node/store/src/invariants.rs b/beacon_node/store/src/invariants.rs index d251fb8800a..d82be578655 100644 --- a/beacon_node/store/src/invariants.rs +++ b/beacon_node/store/src/invariants.rs @@ -6,8 +6,8 @@ //! See the `check_invariants` and `check_database_invariants` methods for the full list. use crate::hdiff::StorageStrategy; -use crate::hot_cold_store::{ColdStateSummary, HotStateSummary}; -use crate::{DBColumn, Error, ItemStore}; +use crate::hot_cold_store::HotStateSummary; +use crate::{ColdStore, DBColumn, Error, ItemStore}; use crate::{HotColdDB, Split}; use serde::Serialize; use ssz::Decode; @@ -242,7 +242,7 @@ pub enum InvariantViolation { ColdStateBaseSummaryMissing { slot: Slot, base_slot: Slot }, } -impl, Cold: ItemStore> HotColdDB { +impl, Cold: ColdStore> HotColdDB { /// Run all database invariant checks. /// /// The `ctx` parameter provides data from the beacon chain layer (fork choice, state cache, @@ -581,10 +581,7 @@ impl, Cold: ItemStore> HotColdDB for slot_val in anchor_info.oldest_block_slot.as_u64()..split.slot.as_u64() { let slot = Slot::new(slot_val); - let slot_bytes = slot_val.to_be_bytes(); - let block_root_bytes = self - .cold_db - .get_bytes(DBColumn::BeaconBlockRoots, &slot_bytes)?; + let block_root_bytes = self.cold_db.get(DBColumn::BeaconBlockRoots, slot)?; let Some(root_bytes) = block_root_bytes else { result.add_violation(InvariantViolation::ColdBlockRootMissing { @@ -635,11 +632,7 @@ impl, Cold: ItemStore> HotColdDB if slot <= anchor_info.state_lower_limit || slot >= cmp::min(split.slot, anchor_info.state_upper_limit) { - let slot_bytes = slot_val.to_be_bytes(); - let Some(root_bytes) = self - .cold_db - .get_bytes(DBColumn::BeaconStateRoots, &slot_bytes)? - else { + let Some(root_bytes) = self.cold_db.get(DBColumn::BeaconStateRoots, slot)? else { result.add_violation(InvariantViolation::ColdStateRootMissing { slot, state_lower_limit: anchor_info.state_lower_limit, @@ -660,7 +653,7 @@ impl, Cold: ItemStore> HotColdDB match self .cold_db - .get_bytes(DBColumn::BeaconColdStateSummary, state_root.as_slice())? + .get_index(crate::DBColumnColdIndex::ColdStateSummary, state_root)? { None => { result.add_violation(InvariantViolation::ColdStateRootMissingSummary { @@ -668,13 +661,12 @@ impl, Cold: ItemStore> HotColdDB state_root, }); } - Some(summary_bytes) => { - let summary = ColdStateSummary::from_ssz_bytes(&summary_bytes)?; - if summary.slot != slot { + Some(summary_slot) => { + if summary_slot != slot { result.add_violation(InvariantViolation::ColdStateRootSlotMismatch { slot, state_root, - summary_slot: summary.slot, + summary_slot, }); } } @@ -698,49 +690,56 @@ impl, Cold: ItemStore> HotColdDB fn check_cold_state_diff_consistency(&self) -> Result { let mut result = InvariantCheckResult::new(); + // Iterate cold states by slot via the slot-keyed BeaconStateRoots index. The + // root-keyed `BeaconColdStateSummary` index is owned by the cold backend and is + // not directly iterable through `ColdStore`, so the pivot point is the slot. + let split = self.get_split_info(); + let anchor_info = self.get_anchor_info(); let mut summary_slots = HashSet::new(); let mut base_slot_refs = Vec::new(); - for res in self - .cold_db - .iter_column::(DBColumn::BeaconColdStateSummary) - { - let (state_root, value) = res?; - let summary = ColdStateSummary::from_ssz_bytes(&value)?; + for slot_val in 0..split.slot.as_u64() { + let slot = Slot::new(slot_val); + if !(slot <= anchor_info.state_lower_limit + || slot >= cmp::min(split.slot, anchor_info.state_upper_limit)) + { + continue; + } - summary_slots.insert(summary.slot); + let Some(root_bytes) = self.cold_db.get(DBColumn::BeaconStateRoots, slot)? else { + continue; + }; + if root_bytes.len() != 32 { + continue; + } + let state_root = Hash256::from_slice(&root_bytes); - let slot_bytes = summary.slot.as_u64().to_be_bytes(); + // Summary presence is already checked by invariant 11; here we just need the + // hierarchy classification, which is a pure function of the slot. + summary_slots.insert(slot); - match self - .hierarchy - .storage_strategy(summary.slot, Slot::new(0))? - { + match self.hierarchy.storage_strategy(slot, Slot::new(0))? { StorageStrategy::Snapshot => { - let has_snapshot = self - .cold_db - .key_exists(DBColumn::BeaconStateSnapshot, &slot_bytes)?; + let has_snapshot = self.cold_db.exists(DBColumn::BeaconStateSnapshot, slot)?; if !has_snapshot { result.add_violation(InvariantViolation::ColdStateMissingSnapshot { state_root, - slot: summary.slot, + slot, }); } } StorageStrategy::DiffFrom(base_slot) => { - let has_diff = self - .cold_db - .key_exists(DBColumn::BeaconStateDiff, &slot_bytes)?; + let has_diff = self.cold_db.exists(DBColumn::BeaconStateDiff, slot)?; if !has_diff { result.add_violation(InvariantViolation::ColdStateMissingDiff { state_root, - slot: summary.slot, + slot, }); } - base_slot_refs.push((summary.slot, base_slot)); + base_slot_refs.push((slot, base_slot)); } StorageStrategy::ReplayFrom(base_slot) => { - base_slot_refs.push((summary.slot, base_slot)); + base_slot_refs.push((slot, base_slot)); } } } diff --git a/beacon_node/store/src/iter.rs b/beacon_node/store/src/iter.rs index 0cb803d1ed7..07aad13d21c 100644 --- a/beacon_node/store/src/iter.rs +++ b/beacon_node/store/src/iter.rs @@ -1,5 +1,5 @@ use crate::errors::HandleUnavailable; -use crate::{Error, HotColdDB, ItemStore}; +use crate::{ColdStore, Error, HotColdDB, ItemStore}; use std::borrow::Cow; use std::marker::PhantomData; use typenum::Unsigned; @@ -13,12 +13,12 @@ use types::{ /// /// It is assumed that all ancestors for this object are stored in the database. If this is not the /// case, the iterator will start returning `None` prior to genesis. -pub trait AncestorIter<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore, I: Iterator> { +pub trait AncestorIter<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore, I: Iterator> { /// Returns an iterator over the roots of the ancestors of `self`. fn try_iter_ancestor_roots(&self, store: &'a HotColdDB) -> Option; } -impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> +impl<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> AncestorIter<'a, E, Hot, Cold, BlockRootsIterator<'a, E, Hot, Cold>> for SignedBeaconBlock { /// Iterates across all available prior block roots of `self`, starting at the most recent and ending @@ -37,7 +37,7 @@ impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> } } -impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> +impl<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> AncestorIter<'a, E, Hot, Cold, StateRootsIterator<'a, E, Hot, Cold>> for BeaconState { /// Iterates across all available prior state roots of `self`, starting at the most recent and ending @@ -51,11 +51,11 @@ impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> } } -pub struct StateRootsIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> { +pub struct StateRootsIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> { inner: RootsIterator<'a, E, Hot, Cold>, } -impl, Cold: ItemStore> Clone +impl, Cold: ColdStore> Clone for StateRootsIterator<'_, E, Hot, Cold> { fn clone(&self) -> Self { @@ -65,7 +65,7 @@ impl, Cold: ItemStore> Clone } } -impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> StateRootsIterator<'a, E, Hot, Cold> { +impl<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> StateRootsIterator<'a, E, Hot, Cold> { pub fn new(store: &'a HotColdDB, beacon_state: &'a BeaconState) -> Self { Self { inner: RootsIterator::new(store, beacon_state), @@ -79,7 +79,7 @@ impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> StateRootsIterator<' } } -impl, Cold: ItemStore> Iterator +impl, Cold: ColdStore> Iterator for StateRootsIterator<'_, E, Hot, Cold> { type Item = Result<(Hash256, Slot), Error>; @@ -99,11 +99,11 @@ impl, Cold: ItemStore> Iterator /// exhausted. /// /// Returns `None` for roots prior to genesis or when there is an error reading from `Store`. -pub struct BlockRootsIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> { +pub struct BlockRootsIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> { inner: RootsIterator<'a, E, Hot, Cold>, } -impl, Cold: ItemStore> Clone +impl, Cold: ColdStore> Clone for BlockRootsIterator<'_, E, Hot, Cold> { fn clone(&self) -> Self { @@ -113,7 +113,7 @@ impl, Cold: ItemStore> Clone } } -impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> BlockRootsIterator<'a, E, Hot, Cold> { +impl<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> BlockRootsIterator<'a, E, Hot, Cold> { /// Create a new iterator over all block roots in the given `beacon_state` and prior states. pub fn new(store: &'a HotColdDB, beacon_state: &'a BeaconState) -> Self { Self { @@ -138,7 +138,7 @@ impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> BlockRootsIterator<' } } -impl, Cold: ItemStore> Iterator +impl, Cold: ColdStore> Iterator for BlockRootsIterator<'_, E, Hot, Cold> { type Item = Result<(Hash256, Slot), Error>; @@ -151,13 +151,13 @@ impl, Cold: ItemStore> Iterator } /// Iterator over state and block roots that backtracks using the vectors from a `BeaconState`. -pub struct RootsIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> { +pub struct RootsIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> { store: &'a HotColdDB, beacon_state: Cow<'a, BeaconState>, slot: Slot, } -impl, Cold: ItemStore> Clone for RootsIterator<'_, E, Hot, Cold> { +impl, Cold: ColdStore> Clone for RootsIterator<'_, E, Hot, Cold> { fn clone(&self) -> Self { Self { store: self.store, @@ -167,7 +167,7 @@ impl, Cold: ItemStore> Clone for RootsIterator< } } -impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> RootsIterator<'a, E, Hot, Cold> { +impl<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> RootsIterator<'a, E, Hot, Cold> { pub fn new(store: &'a HotColdDB, beacon_state: &'a BeaconState) -> Self { Self { store, @@ -234,7 +234,7 @@ impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> RootsIterator<'a, E, } } -impl, Cold: ItemStore> Iterator +impl, Cold: ColdStore> Iterator for RootsIterator<'_, E, Hot, Cold> { /// (block_root, state_root, slot) @@ -246,13 +246,13 @@ impl, Cold: ItemStore> Iterator } /// Block iterator that uses the `parent_root` of each block to backtrack. -pub struct ParentRootBlockIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> { +pub struct ParentRootBlockIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> { store: &'a HotColdDB, next_block_root: Hash256, _phantom: PhantomData, } -impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> +impl<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> ParentRootBlockIterator<'a, E, Hot, Cold> { pub fn new(store: &'a HotColdDB, start_block_root: Hash256) -> Self { @@ -283,7 +283,7 @@ impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> } } -impl, Cold: ItemStore> Iterator +impl, Cold: ColdStore> Iterator for ParentRootBlockIterator<'_, E, Hot, Cold> { type Item = Result<(Hash256, SignedBeaconBlock>), Error>; @@ -295,11 +295,11 @@ impl, Cold: ItemStore> Iterator #[derive(Clone)] /// Extends `BlockRootsIterator`, returning `SignedBeaconBlock` instances, instead of their roots. -pub struct BlockIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> { +pub struct BlockIterator<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> { roots: BlockRootsIterator<'a, E, Hot, Cold>, } -impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> BlockIterator<'a, E, Hot, Cold> { +impl<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> BlockIterator<'a, E, Hot, Cold> { /// Create a new iterator over all blocks in the given `beacon_state` and prior states. pub fn new(store: &'a HotColdDB, beacon_state: &'a BeaconState) -> Self { Self { @@ -324,7 +324,7 @@ impl<'a, E: EthSpec, Hot: ItemStore, Cold: ItemStore> BlockIterator<'a, E, } } -impl, Cold: ItemStore> Iterator +impl, Cold: ColdStore> Iterator for BlockIterator<'_, E, Hot, Cold> { type Item = Result>, Error>; @@ -338,7 +338,7 @@ impl, Cold: ItemStore> Iterator /// /// Return `Err(HistoryUnavailable)` in the case where no more backtrack states are available /// due to weak subjectivity sync. -fn next_historical_root_backtrack_state, Cold: ItemStore>( +fn next_historical_root_backtrack_state, Cold: ColdStore>( store: &HotColdDB, current_state: &BeaconState, ) -> Result, Error> { diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index 8d7c2a3c165..88f5a77a785 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -32,7 +32,7 @@ pub use self::config::StoreConfig; pub use self::hot_cold_store::{HotColdDB, HotStateSummary, Split}; pub use self::memory_store::MemoryStore; pub use self::static_blobs::StaticBlobStore; -pub use self::static_blocks::StaticBlockStore; +pub use self::static_blocks::StaticColdStore; pub use crate::metadata::BlobInfo; pub use errors::Error; pub use metadata::AnchorInfo; @@ -109,6 +109,48 @@ pub trait KeyValueStore: Sync + Send + Sized + 'static { ) -> Result<(), Error>; } +pub type SlotIter<'a> = Box), Error>> + 'a>; + +/// Root-keyed indices owned by the cold backend. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum DBColumnColdIndex { + /// `block_root -> slot` for finalized blocks. + BlockSlot, + /// `state_root -> slot` for cold state summaries. + ColdStateSummary, +} + +impl DBColumnColdIndex { + pub fn db_column(self) -> DBColumn { + match self { + Self::BlockSlot => DBColumn::BeaconBlockSlot, + Self::ColdStateSummary => DBColumn::BeaconColdStateSummary, + } + } +} + +pub trait ColdStore: Sync + Send + Sized + 'static { + // Slot-keyed bulk data. + fn get(&self, column: DBColumn, slot: Slot) -> Result>, Error>; + + fn put_batch(&self, column: DBColumn, items: Vec<(Slot, Vec)>) -> Result<(), Error>; + + fn exists(&self, column: DBColumn, slot: Slot) -> Result; + + fn iter_from(&self, column: DBColumn, from: Slot) -> SlotIter<'_>; + + // Root-keyed indices owned by the cold backend. + fn get_index(&self, column: DBColumnColdIndex, root: Hash256) -> Result, Error>; + + fn put_index_batch( + &self, + column: DBColumnColdIndex, + items: Vec<(Hash256, Slot)>, + ) -> Result<(), Error>; + + fn sync(&self) -> Result<(), Error>; +} + pub trait Key: Sized + 'static { fn from_bytes(key: &[u8]) -> Result; } @@ -246,7 +288,7 @@ pub enum StoreOp<'a, E: EthSpec> { } /// A unique column identifier. -#[derive(Debug, Clone, Copy, PartialEq, IntoStaticStr, EnumString, EnumIter)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, IntoStaticStr, EnumString, EnumIter)] pub enum DBColumn { /// For data related to the database itself. #[strum(serialize = "bma")] diff --git a/beacon_node/store/src/memory_store.rs b/beacon_node/store/src/memory_store.rs index 6baef61c9d8..d7e9eafbaec 100644 --- a/beacon_node/store/src/memory_store.rs +++ b/beacon_node/store/src/memory_store.rs @@ -1,10 +1,12 @@ use crate::{ - ColumnIter, ColumnKeyIter, DBColumn, Error, ItemStore, Key, KeyValueStore, KeyValueStoreOp, - errors::Error as DBError, get_key_for_col, hot_cold_store::BytesKey, + ColdStore, ColumnIter, ColumnKeyIter, DBColumn, DBColumnColdIndex, Error, ItemStore, Key, + KeyValueStore, KeyValueStoreOp, SlotIter, get_key_for_col, hot_cold_store::BytesKey, }; use parking_lot::RwLock; +use ssz::{Decode, Encode}; use std::collections::{BTreeMap, HashSet}; use std::marker::PhantomData; +use types::Hash256; use types::*; type DBMap = BTreeMap>; @@ -93,11 +95,13 @@ impl KeyValueStore for MemoryStore { .filter_map(|(k, _)| k.remove_column_variable(column).map(|k| k.to_vec())) .collect::>(); Box::new(keys.into_iter().filter_map(move |key| { - self.get_bytes(column, &key).transpose().map(|res| { - let k = K::from_bytes(&key)?; - let v = res?; - Ok((k, v)) - }) + KeyValueStore::get_bytes(self, column, &key) + .transpose() + .map(|res| { + let k = K::from_bytes(&key)?; + let v = res?; + Ok((k, v)) + }) })) } @@ -124,7 +128,7 @@ impl KeyValueStore for MemoryStore { Box::new(keys.into_iter().map(move |key| K::from_bytes(&key))) } - fn delete_batch(&self, col: DBColumn, ops: HashSet<&[u8]>) -> Result<(), DBError> { + fn delete_batch(&self, col: DBColumn, ops: HashSet<&[u8]>) -> Result<(), Error> { for op in ops { let column_key = get_key_for_col(col, op); self.db.write().remove(&BytesKey::from_vec(column_key)); @@ -149,3 +153,63 @@ impl KeyValueStore for MemoryStore { } impl ItemStore for MemoryStore {} + +impl ColdStore for MemoryStore { + fn get(&self, column: DBColumn, slot: Slot) -> Result>, Error> { + KeyValueStore::get_bytes(self, column, &slot.as_u64().to_be_bytes()) + } + + fn put_batch(&self, column: DBColumn, items: Vec<(Slot, Vec)>) -> Result<(), Error> { + let ops = items + .into_iter() + .map(|(slot, value)| { + KeyValueStoreOp::PutKeyValue(column, slot.as_u64().to_be_bytes().to_vec(), value) + }) + .collect(); + KeyValueStore::do_atomically(self, ops) + } + + fn exists(&self, column: DBColumn, slot: Slot) -> Result { + KeyValueStore::key_exists(self, column, &slot.as_u64().to_be_bytes()) + } + + fn iter_from(&self, column: DBColumn, from: Slot) -> SlotIter<'_> { + Box::new( + KeyValueStore::iter_column_from::>(self, column, &from.as_u64().to_be_bytes()) + .map(|res| { + res.and_then(|(key_bytes, value)| { + let bytes: [u8; 8] = + key_bytes.try_into().map_err(|_| Error::InvalidBytes)?; + Ok((Slot::new(u64::from_be_bytes(bytes)), value)) + }) + }), + ) + } + + fn get_index(&self, column: DBColumnColdIndex, root: Hash256) -> Result, Error> { + Ok( + KeyValueStore::get_bytes(self, column.db_column(), root.as_slice())? + .map(|bytes| Slot::from_ssz_bytes(&bytes)) + .transpose()?, + ) + } + + fn put_index_batch( + &self, + column: DBColumnColdIndex, + items: Vec<(Hash256, Slot)>, + ) -> Result<(), Error> { + let col = column.db_column(); + let ops = items + .into_iter() + .map(|(root, slot)| { + KeyValueStoreOp::PutKeyValue(col, root.as_slice().to_vec(), slot.as_ssz_bytes()) + }) + .collect(); + KeyValueStore::do_atomically(self, ops) + } + + fn sync(&self) -> Result<(), Error> { + KeyValueStore::sync(self) + } +} diff --git a/beacon_node/store/src/metadata.rs b/beacon_node/store/src/metadata.rs index 215cdb2b64d..cf16c010817 100644 --- a/beacon_node/store/src/metadata.rs +++ b/beacon_node/store/src/metadata.rs @@ -141,11 +141,6 @@ impl AnchorInfo { self.state_lower_limit == 0 && self.state_upper_limit >= split_slot } - /// Return true if no historic states other than genesis *will ever be stored*. - pub fn full_state_pruning_enabled(&self) -> bool { - self.state_lower_limit == 0 && self.state_upper_limit == STATE_UPPER_LIMIT_NO_RETAIN - } - /// Compute the correct `AnchorInfo` for an archive node created from the current node. /// /// This method ensures that the `anchor_slot` which is used for the hot database's diff grid is diff --git a/beacon_node/store/src/reconstruct.rs b/beacon_node/store/src/reconstruct.rs index 04a519af020..6415c275374 100644 --- a/beacon_node/store/src/reconstruct.rs +++ b/beacon_node/store/src/reconstruct.rs @@ -2,7 +2,7 @@ use crate::forwards_iter::FrozenForwardsIterator; use crate::hot_cold_store::{HotColdDB, HotColdDBError}; use crate::metrics; -use crate::{DBColumn, Error, ItemStore}; +use crate::{ColdStore, DBColumn, DBColumnColdIndex, Error, ItemStore}; use itertools::{Itertools, process_results}; use state_processing::{ BlockSignatureStrategy, ConsensusContext, VerifyBlockRoot, per_block_processing, @@ -10,13 +10,13 @@ use state_processing::{ }; use std::sync::Arc; use tracing::{debug, info}; -use types::{EthSpec, Slot}; +use types::{EthSpec, Hash256, Slot}; impl HotColdDB where E: EthSpec, Hot: ItemStore, - Cold: ItemStore, + Cold: ColdStore, { pub fn reconstruct_historic_states( self: &Arc, @@ -129,7 +129,8 @@ where state.build_caches(&self.spec)?; process_results(block_root_iter, |iter| -> Result<(), Error> { - let mut io_batch = vec![]; + let mut cold_items: Vec<(DBColumn, Slot, Vec)> = vec![]; + let mut summary_index: Vec<(Hash256, Slot)> = vec![]; let mut prev_state_root = None; for ((prev_block_root, _), (block_root, slot)) in iter.tuple_windows() { @@ -172,7 +173,7 @@ where .or_else(|_| state.update_tree_hash_cache())?; // Stage state for storage in freezer DB. - self.store_cold_state(&state_root, &state, &mut io_batch)?; + self.store_cold_state(&state_root, &state, &mut cold_items, &mut summary_index)?; let batch_complete = slot + 1 == to_slot; @@ -181,7 +182,13 @@ where // - The diff/snapshot for this slot is required for future slots, or // - The reconstruction batch is complete (we are about to return). if self.hierarchy.should_commit_immediately(slot)? || batch_complete { - self.cold_db.do_atomically(std::mem::take(&mut io_batch))?; + // Slot-keyed cold bulk first, root index after — a mid-flush crash leaves + // cold data with no dangling index entry. + self.commit_cold_items(std::mem::take(&mut cold_items))?; + self.cold_db.put_index_batch( + DBColumnColdIndex::ColdStateSummary, + std::mem::take(&mut summary_index), + )?; if batch_complete { // Perform one last integrity check on the state reached. diff --git a/beacon_node/store/src/static_blocks.rs b/beacon_node/store/src/static_blocks.rs index c0f5cfda458..06f7a9e1039 100644 --- a/beacon_node/store/src/static_blocks.rs +++ b/beacon_node/store/src/static_blocks.rs @@ -1,21 +1,34 @@ -//! Slot-keyed durable archive for finalized blinded blocks. +//! Slot-keyed durable archive for finalized cold-DB columns. //! -//! `StaticBlockStore` is a black box from `HotColdDB`'s perspective: hand it block bytes, -//! ask it for them back by slot. File mapping, recovery, and rename semantics are internal. +//! `StaticColdStore` is a black box from `HotColdDB`'s perspective: hand it +//! `(column, slot, bytes)`, ask it for them back by `(column, slot)`. File +//! mapping, recovery, and rename semantics are internal. +//! +//! Each column gets its own subdirectory under the store root, so the on-disk +//! format of a single column is the original single-column layout — +//! `static_blocks_{file_id:05}` data files, matching `.off` sidecars, and a +//! `static_blocks.conf` commit marker — just rooted at `//`. +//! +//! Per-column behaviour (compression, record-type tag, max decompressed size) +//! lives in `column_config`. Columns absent from that table are rejected. //! //! Contract: -//! - `put(slot, bytes)` is durable on return. The caller is allowed to rely on this for -//! source-of-truth flips (e.g. writing a reverse-index entry, deleting from hot KV). +//! - `put(column, slot, bytes)` is durable on return. +//! - Slots within a column must arrive strictly ascending; columns are +//! independent. //! //! See `specs/static-blocks.md` for the on-disk format. +use crate::DBColumn; +use parking_lot::Mutex; use snap::{read::FrameDecoder, write::FrameEncoder}; use std::{ + collections::HashMap, fmt, fs::{self, File, OpenOptions}, io::{self, Read, Seek, SeekFrom, Write}, path::{Path, PathBuf}, - sync::Mutex, + sync::Arc, }; use types::Slot; @@ -24,78 +37,195 @@ const OFFSET_SIZE: u64 = 8; const OFFSET_FILE_LEN: u64 = SLOTS_PER_FILE * OFFSET_SIZE; const CONFIG_FILE: &str = "static_blocks.conf"; const CONFIG_TMP_FILE: &str = "static_blocks.conf.tmp"; -const CONFIG_MAGIC: &[u8; 8] = b"LHSTBLK1"; -const CONFIG_LEN: usize = 24; -// Empty-store sentinel for `highest_written_slot` in `static_blocks.conf`. +const CONFIG_MAGIC: &[u8; 8] = b"LHSTBLK2"; +const CONFIG_LEN: usize = 36; +/// Empty-store sentinel for `highest_written_slot` in the per-column config. const EMPTY_SLOT: u64 = u64::MAX; -// e2store version record. +/// e2store version record, written once at the start of each data file. const VERSION_RECORD: [u8; 8] = [0x65, 0x32, 0, 0, 0, 0, 0, 0]; -// CompressedSignedBeaconBlock e2store record type. -const BLOCK_RECORD_TYPE: [u8; 2] = [0x01, 0x00]; -const MAX_DECOMPRESSED_BLOCK_BYTES: u64 = 10 * 1024 * 1024; -#[derive(Debug)] -pub struct StaticBlockStore { - root_dir: PathBuf, - highest_written_slot: Mutex>, +const COMPRESSION_NONE: u8 = 0; +const COMPRESSION_SNAPPY: u8 = 1; + +/// Per-column configuration. On first creation of a column the values come +/// from `column_config`; thereafter they are persisted in the column file-set +/// `static_blocks.conf` and the on-disk values win over current-build defaults. +#[derive(Debug, Clone, Copy)] +struct ColumnConfig { + /// On-disk subdirectory name under the store root. Stable across builds. + subdir: &'static str, + /// e2store record type tag for this column. + record_type: [u8; 2], + /// Whether values are snappy-framed before write. + compression: bool, + /// Upper bound on a single decoded record's size in bytes. + max_decompressed: u64, } -struct Config { - highest_written_slot: Option, - current_data_len: u64, +/// Static-cold backing for a single cold column. Returns `None` for columns +/// that don't live in the static archive. +fn column_config(column: DBColumn) -> Option { + match column { + DBColumn::BeaconBlock => Some(ColumnConfig { + subdir: "blk", + record_type: [0x01, 0x00], + compression: true, + max_decompressed: 10 * 1024 * 1024, + }), + DBColumn::BeaconBlockRoots => Some(ColumnConfig { + subdir: "bbr", + record_type: [0x02, 0x00], + compression: false, + max_decompressed: 64, + }), + DBColumn::BeaconStateRoots => Some(ColumnConfig { + subdir: "bsr", + record_type: [0x03, 0x00], + compression: false, + max_decompressed: 64, + }), + DBColumn::BeaconStateSnapshot => Some(ColumnConfig { + subdir: "bss", + record_type: [0x04, 0x00], + compression: false, + max_decompressed: 1024 * 1024 * 1024, + }), + DBColumn::BeaconStateDiff => Some(ColumnConfig { + subdir: "bsd", + record_type: [0x05, 0x00], + compression: true, + max_decompressed: 1024 * 1024 * 1024, + }), + _ => None, + } } -type StoreResult = std::result::Result; +#[derive(Debug)] +pub struct StaticColdStore { + root_dir: PathBuf, + /// Lazily-opened per-column file sets. The outer mutex guards creation; + /// reads/writes within a column take that column's own lock. + columns: Mutex>>, +} + +type StoreResult = std::result::Result; #[derive(Debug)] -pub enum StaticBlockStoreError { +pub enum StaticColdStoreError { Io(io::Error), Compression(io::Error), Invalid(String), + UnsupportedColumn(DBColumn), } -impl fmt::Display for StaticBlockStoreError { +impl fmt::Display for StaticColdStoreError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::Io(e) => write!(f, "static block store io error: {e}"), - Self::Compression(e) => write!(f, "static block store compression error: {e}"), - Self::Invalid(message) => write!(f, "static block store invalid data: {message}"), + Self::Io(e) => write!(f, "static cold store io error: {e}"), + Self::Compression(e) => write!(f, "static cold store compression error: {e}"), + Self::Invalid(message) => write!(f, "static cold store invalid data: {message}"), + Self::UnsupportedColumn(c) => { + write!(f, "static cold store does not back column {c:?}") + } } } } -impl From for StaticBlockStoreError { +impl From for StaticColdStoreError { fn from(e: io::Error) -> Self { Self::Io(e) } } -impl StaticBlockStore { - /// Open the archive rooted at `path`. +impl StaticColdStore { + /// Open the archive rooted at `path`. Per-column subdirectories are + /// created lazily on first access. pub fn open(path: &Path) -> StoreResult { fs::create_dir_all(path)?; - - let store = Self { + Ok(Self { root_dir: path.to_path_buf(), - highest_written_slot: Mutex::new(None), - }; + columns: Mutex::new(HashMap::new()), + }) + } - if !store.config_path().exists() { - store.write_config(None, 0)?; + /// Read the value at `(column, slot)`, if present. + pub fn get(&self, column: DBColumn, slot: Slot) -> StoreResult>> { + self.open_column(column)?.get(slot) + } + + /// Durably store `bytes` at `(column, slot)`. Slots within a column must + /// arrive strictly ascending. + pub fn put(&self, column: DBColumn, slot: Slot, bytes: &[u8]) -> StoreResult<()> { + self.open_column(column)?.put(slot, bytes) + } + + /// Resolve (and lazily create) the file set for `column`. + fn open_column(&self, column: DBColumn) -> StoreResult> { + let mut columns = self.columns.lock(); + if let Some(handle) = columns.get(&column) { + return Ok(handle.clone()); } + let cfg = column_config(column).ok_or(StaticColdStoreError::UnsupportedColumn(column))?; + let handle = Arc::new(Column::open(self.root_dir.join(cfg.subdir), cfg)?); + columns.insert(column, handle.clone()); + Ok(handle) + } +} - let config = store.read_config()?; - if let Some(slot) = config.highest_written_slot { - store.heal_current_file(slot, config.current_data_len)?; +/// Single-column slot-keyed file set. Owns one subdirectory of data + `.off` + +/// config files. +#[derive(Debug)] +struct Column { + root_dir: PathBuf, + config: ColumnConfig, + highest_written_slot: Mutex>, +} + +struct ColumnConfigOnDisk { + highest_written_slot: Option, + current_data_len: u64, + record_type: [u8; 2], + compression: bool, + max_decompressed: u64, +} + +impl Column { + fn open(root_dir: PathBuf, defaults: ColumnConfig) -> StoreResult { + fs::create_dir_all(&root_dir)?; + + // First-open: persist current-build defaults. Re-open: persisted + // settings win over `defaults`, which preserves on-disk readability + // even if the build's defaults change later. + let config_path = root_dir.join(CONFIG_FILE); + let tmp_path = root_dir.join(CONFIG_TMP_FILE); + if !config_path.exists() { + atomic_write_config(&config_path, &tmp_path, &root_dir, None, 0, &defaults)?; } - *store.lock_highest()? = config.highest_written_slot; - Ok(store) + let on_disk = read_config(&config_path)?; + let config = ColumnConfig { + subdir: defaults.subdir, + record_type: on_disk.record_type, + compression: on_disk.compression, + max_decompressed: on_disk.max_decompressed, + }; + + let handle = Self { + root_dir, + config, + highest_written_slot: Mutex::new(None), + }; + + if let Some(slot) = on_disk.highest_written_slot { + handle.heal_current_file(slot, on_disk.current_data_len)?; + } + *handle.highest_written_slot.lock() = on_disk.highest_written_slot; + + Ok(handle) } - /// Read the block at `slot`, if present. - pub fn get(&self, slot: Slot) -> StoreResult>> { - let Some(highest_written_slot) = *self.lock_highest()? else { + fn get(&self, slot: Slot) -> StoreResult>> { + let Some(highest_written_slot) = *self.highest_written_slot.lock() else { return Ok(None); }; if slot > highest_written_slot { @@ -114,33 +244,43 @@ impl StaticBlockStore { let mut header = [0; 8]; data_file.read_exact(&mut header)?; - if header[0..2] != BLOCK_RECORD_TYPE || header[6..8] != [0, 0] { - return Err(StaticBlockStoreError::Invalid( - "invalid static block record header".into(), + if header[0..2] != self.config.record_type || header[6..8] != [0, 0] { + return Err(StaticColdStoreError::Invalid( + "invalid static cold record header".into(), )); } let len = u32::from_le_bytes([header[2], header[3], header[4], header[5]]) as usize; - let mut compressed = vec![0; len]; - data_file.read_exact(&mut compressed)?; - - decompress_block(&compressed) + let mut payload = vec![0; len]; + data_file.read_exact(&mut payload)?; + + if self.config.compression { + decompress_record(&payload, self.config.max_decompressed) + } else { + if (payload.len() as u64) > self.config.max_decompressed { + return Err(StaticColdStoreError::Invalid( + "static cold record exceeds size limit".into(), + )); + } + Ok(Some(payload)) + } } - /// Durably store `bytes` at `slot`. Must not return `Ok` until the bytes are recoverable - /// after a crash. - pub fn put(&self, slot: Slot, bytes: &[u8]) -> StoreResult<()> { - let mut highest_written_slot = self.lock_highest()?; + fn put(&self, slot: Slot, bytes: &[u8]) -> StoreResult<()> { + let mut highest_written_slot = self.highest_written_slot.lock(); if highest_written_slot.is_some_and(|highest| slot <= highest) { - return Err(StaticBlockStoreError::Invalid( - "static block put out of order".into(), + return Err(StaticColdStoreError::Invalid( + "static cold put out of order".into(), )); } - let compressed = compress_block(bytes)?; - let compressed_len = u32::try_from(compressed.len()).map_err(|_| { - StaticBlockStoreError::Invalid("compressed static block too large".into()) - })?; + let payload = if self.config.compression { + compress_record(bytes)? + } else { + bytes.to_vec() + }; + let payload_len = u32::try_from(payload.len()) + .map_err(|_| StaticColdStoreError::Invalid("static cold record too large".into()))?; let target_file_id = file_id(slot); // Discard an uncommitted next-file tail after a crash. @@ -163,7 +303,12 @@ impl StaticBlockStore { } let offset = data_file.seek(SeekFrom::End(0))?; - write_block_record(&mut data_file, compressed_len, &compressed)?; + write_record( + &mut data_file, + self.config.record_type, + payload_len, + &payload, + )?; let data_len = data_file.seek(SeekFrom::End(0))?; // Data and offset files must hit disk before the config commit marker. data_file.sync_all()?; @@ -191,15 +336,14 @@ impl StaticBlockStore { Ok(()) } - /// Truncate uncommitted data and clear uncommitted offsets after restart. fn heal_current_file(&self, slot: Slot, current_data_len: u64) -> StoreResult<()> { let file_id = file_id(slot); let data_path = self.data_path(file_id); let data_file = OpenOptions::new().read(true).write(true).open(&data_path)?; let data_len = data_file.metadata()?.len(); if data_len < current_data_len { - return Err(StaticBlockStoreError::Invalid( - "static block data file shorter than committed length".into(), + return Err(StaticColdStoreError::Invalid( + "static cold data file shorter than committed length".into(), )); } if data_len != current_data_len { @@ -212,8 +356,8 @@ impl StaticBlockStore { let required_len = offset_position(slot) + OFFSET_SIZE; let off_len = off_file.metadata()?.len(); if off_len < required_len { - return Err(StaticBlockStoreError::Invalid( - "static block offset file shorter than committed slot".into(), + return Err(StaticColdStoreError::Invalid( + "static cold offset file shorter than committed slot".into(), )); } if off_len < OFFSET_FILE_LEN { @@ -232,54 +376,21 @@ impl StaticBlockStore { Ok(()) } - /// Read the global commit marker. - fn read_config(&self) -> StoreResult { - let path = self.config_path(); - let bytes = fs::read(&path)?; - if bytes.len() != CONFIG_LEN || &bytes[0..8] != CONFIG_MAGIC { - return Err(StaticBlockStoreError::Invalid( - "invalid static block config".into(), - )); - } - - let highest = u64::from_le_bytes(bytes[8..16].try_into().expect("slice length checked")); - let current_data_len = - u64::from_le_bytes(bytes[16..24].try_into().expect("slice length checked")); - - Ok(Config { - highest_written_slot: (highest != EMPTY_SLOT).then(|| Slot::new(highest)), - current_data_len, - }) - } - - /// Atomically write the global commit marker. fn write_config( &self, highest_written_slot: Option, current_data_len: u64, ) -> StoreResult<()> { - let path = self.config_path(); - let tmp_path = self.root_dir.join(CONFIG_TMP_FILE); - let mut bytes = [0; CONFIG_LEN]; - bytes[0..8].copy_from_slice(CONFIG_MAGIC); - bytes[8..16].copy_from_slice( - &highest_written_slot - .map_or(EMPTY_SLOT, |slot| slot.as_u64()) - .to_le_bytes(), - ); - bytes[16..24].copy_from_slice(¤t_data_len.to_le_bytes()); - - { - let mut tmp = File::create(&tmp_path)?; - tmp.write_all(&bytes)?; - tmp.sync_all()?; - } - - fs::rename(&tmp_path, &path)?; - sync_dir(&self.root_dir) + atomic_write_config( + &self.config_path(), + &self.root_dir.join(CONFIG_TMP_FILE), + &self.root_dir, + highest_written_slot, + current_data_len, + &self.config, + ) } - /// Read the slot's absolute data-file offset. fn read_offset(&self, file_id: u64, slot: Slot) -> StoreResult { let off_path = self.offset_path(file_id); let mut off_file = File::open(&off_path)?; @@ -289,78 +400,131 @@ impl StaticBlockStore { Ok(u64::from_le_bytes(bytes)) } - /// Lock writer state. - fn lock_highest(&self) -> StoreResult>> { - self.highest_written_slot - .lock() - .map_err(|_| StaticBlockStoreError::Invalid("static block mutex poisoned".into())) - } - - /// Path to the global config file. fn config_path(&self) -> PathBuf { self.root_dir.join(CONFIG_FILE) } - /// Path to a data file. fn data_path(&self, file_id: u64) -> PathBuf { self.root_dir.join(format!("static_blocks_{file_id:05}")) } - /// Path to a sidecar offset file. fn offset_path(&self, file_id: u64) -> PathBuf { self.root_dir .join(format!("static_blocks_{file_id:05}.off")) } } -/// File id containing `slot`. +fn read_config(path: &Path) -> StoreResult { + let bytes = fs::read(path)?; + if bytes.len() != CONFIG_LEN || &bytes[0..8] != CONFIG_MAGIC { + return Err(StaticColdStoreError::Invalid( + "invalid static cold config".into(), + )); + } + let highest = u64::from_le_bytes(bytes[8..16].try_into().expect("slice length checked")); + let current_data_len = + u64::from_le_bytes(bytes[16..24].try_into().expect("slice length checked")); + let record_type = [bytes[24], bytes[25]]; + let compression = match bytes[26] { + COMPRESSION_NONE => false, + COMPRESSION_SNAPPY => true, + other => { + return Err(StaticColdStoreError::Invalid(format!( + "unknown compression flag {other}" + ))); + } + }; + let max_decompressed = + u64::from_le_bytes(bytes[28..36].try_into().expect("slice length checked")); + Ok(ColumnConfigOnDisk { + highest_written_slot: (highest != EMPTY_SLOT).then(|| Slot::new(highest)), + current_data_len, + record_type, + compression, + max_decompressed, + }) +} + +fn atomic_write_config( + config_path: &Path, + tmp_path: &Path, + root_dir: &Path, + highest_written_slot: Option, + current_data_len: u64, + config: &ColumnConfig, +) -> StoreResult<()> { + let mut bytes = [0u8; CONFIG_LEN]; + bytes[0..8].copy_from_slice(CONFIG_MAGIC); + bytes[8..16].copy_from_slice( + &highest_written_slot + .map_or(EMPTY_SLOT, |slot| slot.as_u64()) + .to_le_bytes(), + ); + bytes[16..24].copy_from_slice(¤t_data_len.to_le_bytes()); + bytes[24..26].copy_from_slice(&config.record_type); + bytes[26] = if config.compression { + COMPRESSION_SNAPPY + } else { + COMPRESSION_NONE + }; + bytes[27] = 0; + bytes[28..36].copy_from_slice(&config.max_decompressed.to_le_bytes()); + + { + let mut tmp = File::create(tmp_path)?; + tmp.write_all(&bytes)?; + tmp.sync_all()?; + } + + fs::rename(tmp_path, config_path)?; + sync_dir(root_dir) +} + fn file_id(slot: Slot) -> u64 { slot.as_u64() / SLOTS_PER_FILE } -/// Byte position of `slot` in its `.off` file. fn offset_position(slot: Slot) -> u64 { (slot.as_u64() % SLOTS_PER_FILE) * OFFSET_SIZE } -/// Snappy-frame SSZ block bytes. -fn compress_block(bytes: &[u8]) -> StoreResult> { +fn compress_record(bytes: &[u8]) -> StoreResult> { let mut encoder = FrameEncoder::new(Vec::new()); encoder .write_all(bytes) - .map_err(StaticBlockStoreError::Compression)?; - encoder - .flush() - .map_err(StaticBlockStoreError::Compression)?; + .map_err(StaticColdStoreError::Compression)?; + encoder.flush().map_err(StaticColdStoreError::Compression)?; Ok(encoder.get_ref().clone()) } -/// Append one compressed block record. -fn write_block_record(file: &mut File, compressed_len: u32, compressed: &[u8]) -> StoreResult<()> { - file.write_all(&BLOCK_RECORD_TYPE)?; - file.write_all(&compressed_len.to_le_bytes())?; +fn write_record( + file: &mut File, + record_type: [u8; 2], + payload_len: u32, + payload: &[u8], +) -> StoreResult<()> { + file.write_all(&record_type)?; + file.write_all(&payload_len.to_le_bytes())?; file.write_all(&0u16.to_le_bytes())?; - file.write_all(compressed)?; + file.write_all(payload)?; Ok(()) } -/// Decode one compressed block record payload. -fn decompress_block(bytes: &[u8]) -> StoreResult>> { +fn decompress_record(bytes: &[u8], max_decompressed: u64) -> StoreResult>> { let decoder = FrameDecoder::new(bytes); - let mut limited = decoder.take(MAX_DECOMPRESSED_BLOCK_BYTES + 1); + let mut limited = decoder.take(max_decompressed + 1); let mut decompressed = Vec::new(); limited .read_to_end(&mut decompressed) - .map_err(StaticBlockStoreError::Compression)?; - if decompressed.len() as u64 > MAX_DECOMPRESSED_BLOCK_BYTES { - return Err(StaticBlockStoreError::Invalid( - "static block exceeds decompressed size limit".into(), + .map_err(StaticColdStoreError::Compression)?; + if decompressed.len() as u64 > max_decompressed { + return Err(StaticColdStoreError::Invalid( + "static cold record exceeds decompressed size limit".into(), )); } Ok(Some(decompressed)) } -/// Fsync directory entries after rename/create. fn sync_dir(path: &Path) -> StoreResult<()> { let dir = File::open(path)?; dir.sync_all()?; diff --git a/database_manager/src/cli.rs b/database_manager/src/cli.rs index cb332546f94..b8d1ea2ea84 100644 --- a/database_manager/src/cli.rs +++ b/database_manager/src/cli.rs @@ -78,7 +78,6 @@ pub enum DatabaseManagerSubcommand { Version(Version), PrunePayloads(PrunePayloads), PruneBlobs(PruneBlobs), - PruneStates(PruneStates), Compact(Compact), } @@ -176,21 +175,6 @@ pub struct PrunePayloads {} )] pub struct PruneBlobs {} -#[derive(Parser, Clone, Deserialize, Serialize, Debug)] -#[clap( - about = "Prune all beacon states from the freezer database.", - alias = "prune_states" -)] -pub struct PruneStates { - #[clap( - long, - help = "Commit to pruning states irreversably. Without this flag the command will \ - just check that the database is capable of being pruned.", - help_heading = FLAG_HEADER, - )] - pub confirm: bool, -} - #[derive(Parser, Clone, Deserialize, Serialize, Debug)] #[clap(about = "Compact database manually.")] pub struct Compact { diff --git a/database_manager/src/lib.rs b/database_manager/src/lib.rs index 608400fa7ed..2e082bc383d 100644 --- a/database_manager/src/lib.rs +++ b/database_manager/src/lib.rs @@ -1,7 +1,6 @@ pub mod cli; use crate::cli::DatabaseManager; use crate::cli::Migrate; -use crate::cli::PruneStates; use beacon_chain::{ builder::Witness, schema_change::migrate_schema, slot_clock::SystemTimeSlotClock, }; @@ -22,8 +21,8 @@ use store::{ metadata::{CURRENT_SCHEMA_VERSION, SchemaVersion}, }; use strum::{EnumString, VariantNames}; -use tracing::{info, warn}; -use types::{BeaconState, EthSpec, Slot}; +use tracing::info; +use types::EthSpec; fn parse_client_config( cli_args: &ArgMatches, @@ -377,75 +376,6 @@ pub fn prune_blobs( db.try_prune_most_blobs(true) } -pub struct PruneStatesConfig { - confirm: bool, -} -fn parse_prune_states_config( - prune_states_config: &PruneStates, -) -> Result { - let confirm = prune_states_config.confirm; - Ok(PruneStatesConfig { confirm }) -} - -pub fn prune_states( - client_config: ClientConfig, - prune_config: PruneStatesConfig, - mut genesis_state: BeaconState, - runtime_context: &RuntimeContext, -) -> Result<(), String> { - let spec = &runtime_context.eth2_config.spec; - let hot_path = client_config.get_db_path(); - let cold_path = client_config.get_freezer_db_path(); - let blobs_path = client_config.get_blobs_db_path(); - - let db = HotColdDB::, BeaconNodeBackend>::open( - &hot_path, - &cold_path, - &blobs_path, - |_, _, _| Ok(()), - client_config.store, - spec.clone(), - ) - .map_err(|e| format!("Unable to open database: {e:?}"))?; - - // Load the genesis state from the database to ensure we're deleting states for the - // correct network, and that we don't end up storing the wrong genesis state. - let genesis_from_db = db - .load_cold_state_by_slot(Slot::new(0)) - .map_err(|e| format!("Error reading genesis state: {e:?}"))?; - - if genesis_from_db.genesis_validators_root() != genesis_state.genesis_validators_root() { - return Err(format!( - "Error: Wrong network. Genesis state in DB does not match {} genesis.", - spec.config_name.as_deref().unwrap_or("") - )); - } - - // Check that the user has confirmed they want to proceed. - if !prune_config.confirm { - if db.get_anchor_info().full_state_pruning_enabled() { - info!("States have already been pruned"); - return Ok(()); - } - - info!("Ready to prune states"); - warn!("Pruning states is irreversible"); - warn!("Re-run this command with --confirm to commit to state deletion"); - info!("Nothing has been pruned on this run"); - return Err("Error: confirmation flag required".into()); - } - - // Delete all historic state data and *re-store* the genesis state. - let genesis_state_root = genesis_state - .update_tree_hash_cache() - .map_err(|e| format!("Error computing genesis state root: {e:?}"))?; - db.prune_historic_states(genesis_state_root, &genesis_state) - .map_err(|e| format!("Failed to prune due to error: {e:?}"))?; - - info!("Historic states pruned successfully"); - Ok(()) -} - /// Run the database manager, returning an error string if the operation did not succeed. pub fn run( cli_args: &ArgMatches, @@ -456,26 +386,6 @@ pub fn run( let context = env.core_context(); let format_err = |e| format!("Fatal error: {:?}", e); - let get_genesis_state = || { - let executor = env.core_context().executor; - let network_config = context - .eth2_network_config - .clone() - .ok_or("Missing network config")?; - - executor - .block_on_dangerous( - network_config.genesis_state::( - client_config.genesis_state_url.as_deref(), - client_config.genesis_state_url_timeout, - ), - "get_genesis_state", - ) - .ok_or("Shutting down")? - .map_err(|e| format!("Error getting genesis state: {e}"))? - .ok_or("Genesis state missing".to_string()) - }; - match &db_manager_config.subcommand { cli::DatabaseManagerSubcommand::Migrate(migrate_config) => { let migrate_config = parse_migrate_config(migrate_config)?; @@ -494,11 +404,6 @@ pub fn run( cli::DatabaseManagerSubcommand::PruneBlobs(_) => { prune_blobs(client_config, &context).map_err(format_err) } - cli::DatabaseManagerSubcommand::PruneStates(prune_states_config) => { - let prune_config = parse_prune_states_config(prune_states_config)?; - let genesis_state = get_genesis_state()?; - prune_states(client_config, prune_config, genesis_state, &context) - } cli::DatabaseManagerSubcommand::Compact(compact_config) => { let compact_config = parse_compact_config(compact_config)?; compact_db::(compact_config, client_config).map_err(format_err) diff --git a/specs/static-cold-backend.md b/specs/static-cold-backend.md new file mode 100644 index 00000000000..84add548ff3 --- /dev/null +++ b/specs/static-cold-backend.md @@ -0,0 +1,61 @@ +# Static Cold Backend + +Goal: make the cold archive backend pluggable. + +Supported cold backends: + +- current KV cold DB +- static range files + +## Node modes + +| Startup path | Mode | +| - | - | +| Genesis sync with static archive enabled | archive | +| Checkpoint sync with complete static history imported | archive | +| Checkpoint sync without complete static history | full node | + +A full node does not become archive by P2P backfill or online reconstruction. + +## Ownership + +| Store | Owns | +| - | - | +| Hot DB | head data, fork-choice data, unfinalized data, P2P-required recent block window, metadata | +| Cold backend | finalized archive ranges, root-to-slot indices for finalized data (block_root → slot, state_root → slot) | + +## Writers + +Static cold files are written only by: + +- genesis sync, in finalized slot order +- verified complete range import + +Network backfill may write recent blocks to Hot DB, but never to static cold. +Online reconstruction never writes static cold. + +## Availability + +A static range is either complete or absent. Reads below the hot/recent window +require the matching static range. If it is absent, the node is not archive for +that range. + +The current KV cold DB remains a valid cold backend. + +## Backend API + +Slot-keyed bulk: `get`, `put_batch`, `exists`, `iter_from`, `sync`. No deletes. +Batched puts are best-effort, not atomic. + +Root-keyed indices: `get_index(col, root)`, `put_index_batch(col, items)`, where +`col` is one of `BlockSlot` or `ColdStateSummary`. The static-file backend embeds +the same KV implementation Lighthouse uses for the main DB at `/index/` to +serve these. Crash-safety rule: slot-keyed bulk data is committed before the +matching root index entry, so a crash leaves cold data without a dangling index. + +## Removed + +- `lighthouse db prune-states` and `HotColdDB::prune_historic_states`. They + produce a "cold blocks present, cold states absent" mode that is not in the + startup-path table above, and the spec does not support runtime mode + transitions in either direction. From cbb4824c55475d918c1c84f6aa91daeddcfb438b Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 19:35:06 +0200 Subject: [PATCH 08/24] Rename static_blocks -> static_cold, tighten column types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move beacon_node/store/src/static_blocks.rs to static_cold.rs (the type is no longer block-specific). - Add DBColumnCold (slot-keyed cold columns) alongside DBColumnColdIndex. StaticColdStore is keyed by DBColumnCold all the way through; no DBColumn conversion happens inside static_cold.rs. column_config returns a plain ColumnConfig (was Option) and UnsupportedColumn errors go away — the tighter enum makes them unrepresentable. - Eager-open every cold column at boot, freeze the columns map. No outer Mutex/RwLock; the per-column writer state mutex is the only sync point. - Rename ColumnConfig::max_decompressed -> max_value_bytes (it bounds the raw payload size on uncompressed reads too, defending against corrupt headers). - BeaconStateDiff: compression: false. HDiff is already compressed internally (zstd'd validator/balance chunks) so snappy on top is wasteful. --- .../overflow_lru_cache.rs | 4 +- beacon_node/store/src/database/interface.rs | 2 +- beacon_node/store/src/errors.rs | 2 +- beacon_node/store/src/invariants.rs | 5 +- beacon_node/store/src/lib.rs | 39 +++++- beacon_node/store/src/memory_store.rs | 2 +- .../src/{static_blocks.rs => static_cold.rs} | 130 +++++++++--------- 7 files changed, 112 insertions(+), 72 deletions(-) rename beacon_node/store/src/{static_blocks.rs => static_cold.rs} (82%) diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs index e5158bcc8cb..4c43d5bd0a3 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs @@ -791,7 +791,9 @@ mod test { use fork_choice::PayloadVerificationStatus; use logging::create_test_tracing_subscriber; use state_processing::ConsensusContext; - use store::{HotColdDB, ItemStore, StoreConfig, database::interface::BeaconNodeBackend}; + use store::{ + ColdStore, HotColdDB, ItemStore, StoreConfig, database::interface::BeaconNodeBackend, + }; use tempfile::{TempDir, tempdir}; use tracing::info; use types::MinimalEthSpec; diff --git a/beacon_node/store/src/database/interface.rs b/beacon_node/store/src/database/interface.rs index 88bf2758172..18a49178f92 100644 --- a/beacon_node/store/src/database/interface.rs +++ b/beacon_node/store/src/database/interface.rs @@ -41,7 +41,7 @@ impl ColdStore for BeaconNodeBackend { KeyValueStore::do_atomically(self, ops) } - fn exists(&self, column: DBColumn, slot: Slot) -> Result { + fn contains(&self, column: DBColumn, slot: Slot) -> Result { KeyValueStore::key_exists(self, column, &slot.as_u64().to_be_bytes()) } diff --git a/beacon_node/store/src/errors.rs b/beacon_node/store/src/errors.rs index e479e6cf791..be2766765ea 100644 --- a/beacon_node/store/src/errors.rs +++ b/beacon_node/store/src/errors.rs @@ -1,7 +1,7 @@ use crate::config::StoreConfigError; use crate::hot_cold_store::{HotColdDBError, StateSummaryIteratorError}; use crate::static_blobs::StaticBlobStoreError; -use crate::static_blocks::StaticColdStoreError; +use crate::static_cold::StaticColdStoreError; use crate::{DBColumn, hdiff}; #[cfg(feature = "leveldb")] use leveldb::error::Error as LevelDBError; diff --git a/beacon_node/store/src/invariants.rs b/beacon_node/store/src/invariants.rs index d82be578655..f8ed0ab66b3 100644 --- a/beacon_node/store/src/invariants.rs +++ b/beacon_node/store/src/invariants.rs @@ -720,7 +720,8 @@ impl, Cold: ColdStore> HotColdDB match self.hierarchy.storage_strategy(slot, Slot::new(0))? { StorageStrategy::Snapshot => { - let has_snapshot = self.cold_db.exists(DBColumn::BeaconStateSnapshot, slot)?; + let has_snapshot = + self.cold_db.contains(DBColumn::BeaconStateSnapshot, slot)?; if !has_snapshot { result.add_violation(InvariantViolation::ColdStateMissingSnapshot { state_root, @@ -729,7 +730,7 @@ impl, Cold: ColdStore> HotColdDB } } StorageStrategy::DiffFrom(base_slot) => { - let has_diff = self.cold_db.exists(DBColumn::BeaconStateDiff, slot)?; + let has_diff = self.cold_db.contains(DBColumn::BeaconStateDiff, slot)?; if !has_diff { result.add_violation(InvariantViolation::ColdStateMissingDiff { state_root, diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index 88f5a77a785..5d3371cf570 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -22,7 +22,7 @@ pub mod metrics; pub mod reconstruct; pub mod state_cache; pub mod static_blobs; -pub mod static_blocks; +pub mod static_cold; pub mod database; pub mod iter; @@ -32,7 +32,7 @@ pub use self::config::StoreConfig; pub use self::hot_cold_store::{HotColdDB, HotStateSummary, Split}; pub use self::memory_store::MemoryStore; pub use self::static_blobs::StaticBlobStore; -pub use self::static_blocks::StaticColdStore; +pub use self::static_cold::StaticColdStore; pub use crate::metadata::BlobInfo; pub use errors::Error; pub use metadata::AnchorInfo; @@ -111,6 +111,39 @@ pub trait KeyValueStore: Sync + Send + Sized + 'static { pub type SlotIter<'a> = Box), Error>> + 'a>; +/// Slot-keyed cold columns served by the static archive. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumIter)] +pub enum DBColumnCold { + Block, + BlockRoots, + StateRoots, + StateSnapshot, + StateDiff, +} + +impl DBColumnCold { + pub fn db_column(self) -> DBColumn { + match self { + Self::Block => DBColumn::BeaconBlock, + Self::BlockRoots => DBColumn::BeaconBlockRoots, + Self::StateRoots => DBColumn::BeaconStateRoots, + Self::StateSnapshot => DBColumn::BeaconStateSnapshot, + Self::StateDiff => DBColumn::BeaconStateDiff, + } + } + + pub fn try_from_db_column(column: DBColumn) -> Option { + match column { + DBColumn::BeaconBlock => Some(Self::Block), + DBColumn::BeaconBlockRoots => Some(Self::BlockRoots), + DBColumn::BeaconStateRoots => Some(Self::StateRoots), + DBColumn::BeaconStateSnapshot => Some(Self::StateSnapshot), + DBColumn::BeaconStateDiff => Some(Self::StateDiff), + _ => None, + } + } +} + /// Root-keyed indices owned by the cold backend. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum DBColumnColdIndex { @@ -135,7 +168,7 @@ pub trait ColdStore: Sync + Send + Sized + 'static { fn put_batch(&self, column: DBColumn, items: Vec<(Slot, Vec)>) -> Result<(), Error>; - fn exists(&self, column: DBColumn, slot: Slot) -> Result; + fn contains(&self, column: DBColumn, slot: Slot) -> Result; fn iter_from(&self, column: DBColumn, from: Slot) -> SlotIter<'_>; diff --git a/beacon_node/store/src/memory_store.rs b/beacon_node/store/src/memory_store.rs index d7e9eafbaec..ea769d11f73 100644 --- a/beacon_node/store/src/memory_store.rs +++ b/beacon_node/store/src/memory_store.rs @@ -169,7 +169,7 @@ impl ColdStore for MemoryStore { KeyValueStore::do_atomically(self, ops) } - fn exists(&self, column: DBColumn, slot: Slot) -> Result { + fn contains(&self, column: DBColumn, slot: Slot) -> Result { KeyValueStore::key_exists(self, column, &slot.as_u64().to_be_bytes()) } diff --git a/beacon_node/store/src/static_blocks.rs b/beacon_node/store/src/static_cold.rs similarity index 82% rename from beacon_node/store/src/static_blocks.rs rename to beacon_node/store/src/static_cold.rs index 06f7a9e1039..cbe8e47c662 100644 --- a/beacon_node/store/src/static_blocks.rs +++ b/beacon_node/store/src/static_cold.rs @@ -9,8 +9,8 @@ //! `static_blocks_{file_id:05}` data files, matching `.off` sidecars, and a //! `static_blocks.conf` commit marker — just rooted at `//`. //! -//! Per-column behaviour (compression, record-type tag, max decompressed size) -//! lives in `column_config`. Columns absent from that table are rejected. +//! Per-column behaviour (compression, record-type tag, max value bytes) lives +//! in `column_config`, keyed by the tight `DBColumnCold` enum. //! //! Contract: //! - `put(column, slot, bytes)` is durable on return. @@ -19,7 +19,7 @@ //! //! See `specs/static-blocks.md` for the on-disk format. -use crate::DBColumn; +use crate::DBColumnCold; use parking_lot::Mutex; use snap::{read::FrameDecoder, write::FrameEncoder}; use std::{ @@ -28,8 +28,8 @@ use std::{ fs::{self, File, OpenOptions}, io::{self, Read, Seek, SeekFrom, Write}, path::{Path, PathBuf}, - sync::Arc, }; +use strum::IntoEnumIterator; use types::Slot; const SLOTS_PER_FILE: u64 = 8192; @@ -59,53 +59,54 @@ struct ColumnConfig { /// Whether values are snappy-framed before write. compression: bool, /// Upper bound on a single decoded record's size in bytes. - max_decompressed: u64, + max_value_bytes: u64, } -/// Static-cold backing for a single cold column. Returns `None` for columns -/// that don't live in the static archive. -fn column_config(column: DBColumn) -> Option { +/// Per-column file format defaults. +fn column_config(column: DBColumnCold) -> ColumnConfig { match column { - DBColumn::BeaconBlock => Some(ColumnConfig { + DBColumnCold::Block => ColumnConfig { subdir: "blk", record_type: [0x01, 0x00], compression: true, - max_decompressed: 10 * 1024 * 1024, - }), - DBColumn::BeaconBlockRoots => Some(ColumnConfig { + max_value_bytes: 10 * 1024 * 1024, + }, + DBColumnCold::BlockRoots => ColumnConfig { subdir: "bbr", record_type: [0x02, 0x00], compression: false, - max_decompressed: 64, - }), - DBColumn::BeaconStateRoots => Some(ColumnConfig { + max_value_bytes: 64, + }, + DBColumnCold::StateRoots => ColumnConfig { subdir: "bsr", record_type: [0x03, 0x00], compression: false, - max_decompressed: 64, - }), - DBColumn::BeaconStateSnapshot => Some(ColumnConfig { + max_value_bytes: 64, + }, + DBColumnCold::StateSnapshot => ColumnConfig { subdir: "bss", record_type: [0x04, 0x00], compression: false, - max_decompressed: 1024 * 1024 * 1024, - }), - DBColumn::BeaconStateDiff => Some(ColumnConfig { + max_value_bytes: 1024 * 1024 * 1024, + }, + DBColumnCold::StateDiff => ColumnConfig { + // HDiff is already compressed internally (zstd'd validator and + // balance chunks; xdelta3 state diff). No benefit to wrapping it + // in snappy here. subdir: "bsd", record_type: [0x05, 0x00], - compression: true, - max_decompressed: 1024 * 1024 * 1024, - }), - _ => None, + compression: false, + max_value_bytes: 1024 * 1024 * 1024, + }, } } #[derive(Debug)] pub struct StaticColdStore { - root_dir: PathBuf, - /// Lazily-opened per-column file sets. The outer mutex guards creation; - /// reads/writes within a column take that column's own lock. - columns: Mutex>>, + /// All cold columns the static archive backs, opened eagerly at boot. + /// Frozen after construction; per-column writer state is locked inside + /// each `Column`. + columns: HashMap, } type StoreResult = std::result::Result; @@ -115,7 +116,6 @@ pub enum StaticColdStoreError { Io(io::Error), Compression(io::Error), Invalid(String), - UnsupportedColumn(DBColumn), } impl fmt::Display for StaticColdStoreError { @@ -124,9 +124,6 @@ impl fmt::Display for StaticColdStoreError { Self::Io(e) => write!(f, "static cold store io error: {e}"), Self::Compression(e) => write!(f, "static cold store compression error: {e}"), Self::Invalid(message) => write!(f, "static cold store invalid data: {message}"), - Self::UnsupportedColumn(c) => { - write!(f, "static cold store does not back column {c:?}") - } } } } @@ -138,37 +135,34 @@ impl From for StaticColdStoreError { } impl StaticColdStore { - /// Open the archive rooted at `path`. Per-column subdirectories are - /// created lazily on first access. + /// Open the archive rooted at `path`. Every cold column is opened eagerly + /// so subsequent reads/writes are pure hashmap lookups with no I/O on the + /// hot path. pub fn open(path: &Path) -> StoreResult { fs::create_dir_all(path)?; - Ok(Self { - root_dir: path.to_path_buf(), - columns: Mutex::new(HashMap::new()), - }) + let mut columns = HashMap::new(); + for column in DBColumnCold::iter() { + let cfg = column_config(column); + columns.insert(column, Column::open(path.join(cfg.subdir), cfg)?); + } + Ok(Self { columns }) } /// Read the value at `(column, slot)`, if present. - pub fn get(&self, column: DBColumn, slot: Slot) -> StoreResult>> { - self.open_column(column)?.get(slot) + pub fn get(&self, column: DBColumnCold, slot: Slot) -> StoreResult>> { + self.columns[&column].get(slot) } /// Durably store `bytes` at `(column, slot)`. Slots within a column must /// arrive strictly ascending. - pub fn put(&self, column: DBColumn, slot: Slot, bytes: &[u8]) -> StoreResult<()> { - self.open_column(column)?.put(slot, bytes) + pub fn put(&self, column: DBColumnCold, slot: Slot, bytes: &[u8]) -> StoreResult<()> { + self.columns[&column].put(slot, bytes) } - /// Resolve (and lazily create) the file set for `column`. - fn open_column(&self, column: DBColumn) -> StoreResult> { - let mut columns = self.columns.lock(); - if let Some(handle) = columns.get(&column) { - return Ok(handle.clone()); - } - let cfg = column_config(column).ok_or(StaticColdStoreError::UnsupportedColumn(column))?; - let handle = Arc::new(Column::open(self.root_dir.join(cfg.subdir), cfg)?); - columns.insert(column, handle.clone()); - Ok(handle) + /// Return `true` if a value exists at `(column, slot)`. Cheaper than `get` + /// because only the `.off` sidecar is consulted; the data file is not read. + pub fn contains(&self, column: DBColumnCold, slot: Slot) -> StoreResult { + self.columns[&column].contains(slot) } } @@ -186,7 +180,7 @@ struct ColumnConfigOnDisk { current_data_len: u64, record_type: [u8; 2], compression: bool, - max_decompressed: u64, + max_value_bytes: u64, } impl Column { @@ -207,7 +201,7 @@ impl Column { subdir: defaults.subdir, record_type: on_disk.record_type, compression: on_disk.compression, - max_decompressed: on_disk.max_decompressed, + max_value_bytes: on_disk.max_value_bytes, }; let handle = Self { @@ -255,9 +249,9 @@ impl Column { data_file.read_exact(&mut payload)?; if self.config.compression { - decompress_record(&payload, self.config.max_decompressed) + decompress_record(&payload, self.config.max_value_bytes) } else { - if (payload.len() as u64) > self.config.max_decompressed { + if (payload.len() as u64) > self.config.max_value_bytes { return Err(StaticColdStoreError::Invalid( "static cold record exceeds size limit".into(), )); @@ -266,6 +260,16 @@ impl Column { } } + fn contains(&self, slot: Slot) -> StoreResult { + let Some(highest_written_slot) = *self.highest_written_slot.lock() else { + return Ok(false); + }; + if slot > highest_written_slot { + return Ok(false); + } + Ok(self.read_offset(file_id(slot), slot)? != 0) + } + fn put(&self, slot: Slot, bytes: &[u8]) -> StoreResult<()> { let mut highest_written_slot = self.highest_written_slot.lock(); if highest_written_slot.is_some_and(|highest| slot <= highest) { @@ -434,14 +438,14 @@ fn read_config(path: &Path) -> StoreResult { ))); } }; - let max_decompressed = + let max_value_bytes = u64::from_le_bytes(bytes[28..36].try_into().expect("slice length checked")); Ok(ColumnConfigOnDisk { highest_written_slot: (highest != EMPTY_SLOT).then(|| Slot::new(highest)), current_data_len, record_type, compression, - max_decompressed, + max_value_bytes, }) } @@ -468,7 +472,7 @@ fn atomic_write_config( COMPRESSION_NONE }; bytes[27] = 0; - bytes[28..36].copy_from_slice(&config.max_decompressed.to_le_bytes()); + bytes[28..36].copy_from_slice(&config.max_value_bytes.to_le_bytes()); { let mut tmp = File::create(tmp_path)?; @@ -510,14 +514,14 @@ fn write_record( Ok(()) } -fn decompress_record(bytes: &[u8], max_decompressed: u64) -> StoreResult>> { +fn decompress_record(bytes: &[u8], max_value_bytes: u64) -> StoreResult>> { let decoder = FrameDecoder::new(bytes); - let mut limited = decoder.take(max_decompressed + 1); + let mut limited = decoder.take(max_value_bytes + 1); let mut decompressed = Vec::new(); limited .read_to_end(&mut decompressed) .map_err(StaticColdStoreError::Compression)?; - if decompressed.len() as u64 > max_decompressed { + if decompressed.len() as u64 > max_value_bytes { return Err(StaticColdStoreError::Invalid( "static cold record exceeds decompressed size limit".into(), )); From c770749a349009a15ebe78d1be710b45554bd96f Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 20:02:06 +0200 Subject: [PATCH 09/24] Tighten ColdStore slot methods to take DBColumnCold The slot-keyed methods on ColdStore (get/put_batch/contains/iter_from) now take the tight DBColumnCold enum instead of DBColumn, mirroring the existing DBColumnColdIndex shape on the index methods. This drops DBColumn from static_cold.rs entirely. KV backend impls (BeaconNodeBackend, MemoryStore) translate via column.db_column(). FrozenForwardsIterator::new still accepts DBColumn at the public boundary and converts at the call to cold_db.iter_from. Also: delete static_blobs.rs (was a stub returning Unsupported on every call, with no callers). Revert noise renames (io_batch, cold_db_block_ops, cold_db_state_ops, ops, .map_err(|e| e.into())) to keep the diff against unstable focused on real semantic changes. --- beacon_node/beacon_chain/src/builder.rs | 6 +- .../beacon_chain/src/historical_blocks.rs | 5 +- beacon_node/store/src/database/interface.rs | 38 ++++++------ beacon_node/store/src/errors.rs | 8 --- beacon_node/store/src/forwards_iter.rs | 12 ++-- beacon_node/store/src/hot_cold_store.rs | 57 ++++++++---------- beacon_node/store/src/invariants.rs | 13 ++-- beacon_node/store/src/lib.rs | 10 ++-- beacon_node/store/src/memory_store.rs | 39 ++++++------ beacon_node/store/src/reconstruct.rs | 8 +-- beacon_node/store/src/static_blobs.rs | 59 ------------------- 11 files changed, 96 insertions(+), 159 deletions(-) delete mode 100644 beacon_node/store/src/static_blobs.rs diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index 5de28f43f20..2e9231a689d 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -40,7 +40,7 @@ use state_processing::per_slot_processing; use std::marker::PhantomData; use std::sync::Arc; use std::time::Duration; -use store::{ColdStore, DBColumn, Error as StoreError, HotColdDB, ItemStore, KeyValueStoreOp}; +use store::{ColdStore, DBColumnCold, Error as StoreError, HotColdDB, ItemStore, KeyValueStoreOp}; use task_executor::{ShutdownReason, TaskExecutor}; use tracing::{debug, error, info, warn}; use tree_hash::TreeHash; @@ -340,7 +340,7 @@ where .map_err(|e| format!("Failed to store genesis block: {:?}", e))?; store .store_frozen_block_root_at_skip_slots(Slot::new(0), Slot::new(1), beacon_block_root) - .and_then(|items| store.cold_db.put_batch(DBColumn::BeaconBlockRoots, items)) + .and_then(|ops| store.cold_db.put_batch(DBColumnCold::BlockRoots, ops)) .map_err(|e| format!("Failed to store genesis block root: {e:?}"))?; // Store the genesis block under the `ZERO_HASH` key. @@ -558,7 +558,7 @@ where .map_err(|e| format!("Error writing frozen block roots: {e:?}"))?; store .cold_db - .put_batch(DBColumn::BeaconBlockRoots, block_root_batch) + .put_batch(DBColumnCold::BlockRoots, block_root_batch) .map_err(|e| format!("Error writing frozen block roots: {e:?}"))?; debug!( from = %weak_subj_block.slot(), diff --git a/beacon_node/beacon_chain/src/historical_blocks.rs b/beacon_node/beacon_chain/src/historical_blocks.rs index 9c156394bd0..bdad76abbb1 100644 --- a/beacon_node/beacon_chain/src/historical_blocks.rs +++ b/beacon_node/beacon_chain/src/historical_blocks.rs @@ -11,7 +11,8 @@ use std::iter; use std::time::Duration; use store::metadata::DataColumnInfo; use store::{ - AnchorInfo, BlobInfo, ColdStore, DBColumn, Error as StoreError, KeyValueStore, KeyValueStoreOp, + AnchorInfo, BlobInfo, ColdStore, DBColumnCold, Error as StoreError, KeyValueStore, + KeyValueStoreOp, }; use strum::IntoStaticStr; use tracing::{debug, debug_span, instrument}; @@ -257,7 +258,7 @@ impl BeaconChain { let _span = debug_span!("backfill_write_cold_db").entered(); self.store .cold_db - .put_batch(DBColumn::BeaconBlockRoots, cold_batch)?; + .put_batch(DBColumnCold::BlockRoots, cold_batch)?; } let mut anchor_and_blob_batch = Vec::with_capacity(3); diff --git a/beacon_node/store/src/database/interface.rs b/beacon_node/store/src/database/interface.rs index 18a49178f92..12fdb2a079a 100644 --- a/beacon_node/store/src/database/interface.rs +++ b/beacon_node/store/src/database/interface.rs @@ -3,8 +3,8 @@ use crate::database::leveldb_impl; #[cfg(feature = "redb")] use crate::database::redb_impl; use crate::{ - ColdStore, ColumnIter, ColumnKeyIter, DBColumn, DBColumnColdIndex, Error, ItemStore, Key, - KeyValueStore, SlotIter, metrics, + ColdStore, ColumnIter, ColumnKeyIter, DBColumn, DBColumnCold, DBColumnColdIndex, Error, + ItemStore, Key, KeyValueStore, SlotIter, metrics, }; use crate::{KeyValueStoreOp, StoreConfig, config::DatabaseBackend}; use ssz::{Decode, Encode}; @@ -23,16 +23,17 @@ pub enum BeaconNodeBackend { impl ItemStore for BeaconNodeBackend {} impl ColdStore for BeaconNodeBackend { - fn get(&self, column: DBColumn, slot: Slot) -> Result>, Error> { - KeyValueStore::get_bytes(self, column, &slot.as_u64().to_be_bytes()) + fn get(&self, column: DBColumnCold, slot: Slot) -> Result>, Error> { + KeyValueStore::get_bytes(self, column.db_column(), &slot.as_u64().to_be_bytes()) } - fn put_batch(&self, column: DBColumn, items: Vec<(Slot, Vec)>) -> Result<(), Error> { + fn put_batch(&self, column: DBColumnCold, items: Vec<(Slot, Vec)>) -> Result<(), Error> { + let col = column.db_column(); let ops = items .into_iter() .map(|(slot, value)| { crate::KeyValueStoreOp::PutKeyValue( - column, + col, slot.as_u64().to_be_bytes().to_vec(), value, ) @@ -41,20 +42,23 @@ impl ColdStore for BeaconNodeBackend { KeyValueStore::do_atomically(self, ops) } - fn contains(&self, column: DBColumn, slot: Slot) -> Result { - KeyValueStore::key_exists(self, column, &slot.as_u64().to_be_bytes()) + fn contains(&self, column: DBColumnCold, slot: Slot) -> Result { + KeyValueStore::key_exists(self, column.db_column(), &slot.as_u64().to_be_bytes()) } - fn iter_from(&self, column: DBColumn, from: Slot) -> SlotIter<'_> { + fn iter_from(&self, column: DBColumnCold, from: Slot) -> SlotIter<'_> { Box::new( - KeyValueStore::iter_column_from::>(self, column, &from.as_u64().to_be_bytes()) - .map(|res| { - res.and_then(|(key_bytes, value)| { - let bytes: [u8; 8] = - key_bytes.try_into().map_err(|_| Error::InvalidBytes)?; - Ok((Slot::new(u64::from_be_bytes(bytes)), value)) - }) - }), + KeyValueStore::iter_column_from::>( + self, + column.db_column(), + &from.as_u64().to_be_bytes(), + ) + .map(|res| { + res.and_then(|(key_bytes, value)| { + let bytes: [u8; 8] = key_bytes.try_into().map_err(|_| Error::InvalidBytes)?; + Ok((Slot::new(u64::from_be_bytes(bytes)), value)) + }) + }), ) } diff --git a/beacon_node/store/src/errors.rs b/beacon_node/store/src/errors.rs index be2766765ea..1e974cf4b36 100644 --- a/beacon_node/store/src/errors.rs +++ b/beacon_node/store/src/errors.rs @@ -1,6 +1,5 @@ use crate::config::StoreConfigError; use crate::hot_cold_store::{HotColdDBError, StateSummaryIteratorError}; -use crate::static_blobs::StaticBlobStoreError; use crate::static_cold::StaticColdStoreError; use crate::{DBColumn, hdiff}; #[cfg(feature = "leveldb")] @@ -17,7 +16,6 @@ pub enum Error { BeaconStateError(BeaconStateError), HotColdDBError(HotColdDBError), StaticColdStoreError(StaticColdStoreError), - StaticBlobStoreError(StaticBlobStoreError), DBError { message: String, }, @@ -139,12 +137,6 @@ impl From for Error { } } -impl From for Error { - fn from(e: StaticBlobStoreError) -> Error { - Error::StaticBlobStoreError(e) - } -} - impl From for Error { fn from(e: BeaconStateError) -> Error { Error::BeaconStateError(e) diff --git a/beacon_node/store/src/forwards_iter.rs b/beacon_node/store/src/forwards_iter.rs index 4c994192b0a..6478e6695e0 100644 --- a/beacon_node/store/src/forwards_iter.rs +++ b/beacon_node/store/src/forwards_iter.rs @@ -1,6 +1,6 @@ use crate::errors::{Error, Result}; use crate::iter::{BlockRootsIterator, StateRootsIterator}; -use crate::{ColdStore, DBColumn, HotColdDB, ItemStore, SlotIter}; +use crate::{ColdStore, DBColumn, DBColumnCold, HotColdDB, ItemStore, SlotIter}; use itertools::process_results; use std::marker::PhantomData; use types::{BeaconState, EthSpec, Hash256, Slot}; @@ -134,11 +134,13 @@ impl<'a, E: EthSpec, Hot: ItemStore, Cold: ColdStore> start_slot: Slot, end_slot: Slot, ) -> Result { - if column != DBColumn::BeaconBlockRoots && column != DBColumn::BeaconStateRoots { - return Err(Error::ForwardsIterInvalidColumn(column)); - } + let cold_column = match column { + DBColumn::BeaconBlockRoots => DBColumnCold::BlockRoots, + DBColumn::BeaconStateRoots => DBColumnCold::StateRoots, + _ => return Err(Error::ForwardsIterInvalidColumn(column)), + }; Ok(Self { - inner: store.cold_db.iter_from(column, start_slot), + inner: store.cold_db.iter_from(cold_column, start_slot), column, next_slot: start_slot, end_slot, diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index e117709527b..5f9c00d46ba 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -13,8 +13,8 @@ use crate::metadata::{ }; use crate::state_cache::{PutStateOutcome, StateCache}; use crate::{ - BlobSidecarListFromRoot, ColdStore, DBColumn, DBColumnColdIndex, DatabaseBlock, Error, - ItemStore, KeyValueStoreOp, StoreItem, StoreOp, get_data_column_key, + BlobSidecarListFromRoot, ColdStore, DBColumn, DBColumnCold, DBColumnColdIndex, DatabaseBlock, + Error, ItemStore, KeyValueStoreOp, StoreItem, StoreOp, get_data_column_key, metrics::{self, COLD_METRIC, HOT_METRIC}, parse_data_column_key, }; @@ -735,7 +735,7 @@ impl, Cold: ColdStore> HotColdDB .get_bytes(DBColumn::BeaconBlock, block_root.as_slice())? .map(|block_bytes| decoder(&block_bytes)) .transpose() - .map_err(Into::into) + .map_err(|e| e.into()) } pub fn get_payload_envelope( @@ -2087,9 +2087,9 @@ impl, Cold: ColdStore> HotColdDB /// must be durable before any hot index entry that references it. pub fn commit_cold_items( &self, - cold_items: Vec<(DBColumn, Slot, Vec)>, + cold_items: Vec<(DBColumnCold, Slot, Vec)>, ) -> Result<(), Error> { - let mut groups: HashMap)>> = HashMap::new(); + let mut groups: HashMap)>> = HashMap::new(); for (col, slot, value) in cold_items { groups.entry(col).or_default().push((slot, value)); } @@ -2103,7 +2103,7 @@ impl, Cold: ColdStore> HotColdDB &self, state_root: &Hash256, slot: Slot, - cold_items: &mut Vec<(DBColumn, Slot, Vec)>, + cold_items: &mut Vec<(DBColumnCold, Slot, Vec)>, summary_index: &mut Vec<(Hash256, Slot)>, ) -> Result<(), Error> { // BeaconColdStateSummary is a state_root → slot index owned by the cold backend. @@ -2111,7 +2111,7 @@ impl, Cold: ColdStore> HotColdDB // caller is responsible for the ordering. summary_index.push((*state_root, slot)); cold_items.push(( - DBColumn::BeaconStateRoots, + DBColumnCold::StateRoots, slot, state_root.as_slice().to_vec(), )); @@ -2123,7 +2123,7 @@ impl, Cold: ColdStore> HotColdDB &self, state_root: &Hash256, state: &BeaconState, - cold_items: &mut Vec<(DBColumn, Slot, Vec)>, + cold_items: &mut Vec<(DBColumnCold, Slot, Vec)>, summary_index: &mut Vec<(Hash256, Slot)>, ) -> Result<(), Error> { self.store_cold_state_summary(state_root, state.slot(), cold_items, summary_index)?; @@ -2163,7 +2163,7 @@ impl, Cold: ColdStore> HotColdDB pub fn store_cold_state_as_snapshot( &self, state: &BeaconState, - cold_items: &mut Vec<(DBColumn, Slot, Vec)>, + cold_items: &mut Vec<(DBColumnCold, Slot, Vec)>, ) -> Result<(), Error> { let bytes = state.as_ssz_bytes(); let compressed_value = { @@ -2176,16 +2176,12 @@ impl, Cold: ColdStore> HotColdDB out }; - cold_items.push(( - DBColumn::BeaconStateSnapshot, - state.slot(), - compressed_value, - )); + cold_items.push((DBColumnCold::StateSnapshot, state.slot(), compressed_value)); Ok(()) } fn load_cold_state_bytes_as_snapshot(&self, slot: Slot) -> Result>, Error> { - match self.cold_db.get(DBColumn::BeaconStateSnapshot, slot)? { + match self.cold_db.get(DBColumnCold::StateSnapshot, slot)? { Some(bytes) => { let _timer = metrics::start_timer(&metrics::STORE_BEACON_STATE_FREEZER_DECOMPRESS_TIME); @@ -2276,7 +2272,7 @@ impl, Cold: ColdStore> HotColdDB &self, state: &BeaconState, from_slot: Slot, - cold_items: &mut Vec<(DBColumn, Slot, Vec)>, + cold_items: &mut Vec<(DBColumnCold, Slot, Vec)>, ) -> Result<(), Error> { // Load diff base state bytes. let (_, base_buffer) = { @@ -2299,7 +2295,7 @@ impl, Cold: ColdStore> HotColdDB diff_bytes.len() as f64, ); - cold_items.push((DBColumn::BeaconStateDiff, state.slot(), diff_bytes)); + cold_items.push((DBColumnCold::StateDiff, state.slot(), diff_bytes)); Ok(()) } @@ -2421,7 +2417,7 @@ impl, Cold: ColdStore> HotColdDB let bytes = { let _t = metrics::start_timer_vec(&metrics::BEACON_HDIFF_READ_TIME, COLD_METRIC); self.cold_db - .get(DBColumn::BeaconStateDiff, slot)? + .get(DBColumnCold::StateDiff, slot)? .ok_or(HotColdDBError::MissingHDiff(slot))? }; let hdiff = { @@ -3163,11 +3159,11 @@ impl, Cold: ColdStore> HotColdDB end_slot: Slot, block_root: Hash256, ) -> Result)>, Error> { - let mut items = vec![]; + let mut ops = vec![]; for slot in start_slot.as_u64()..end_slot.as_u64() { - items.push((Slot::new(slot), block_root.as_slice().to_vec())); + ops.push((Slot::new(slot), block_root.as_slice().to_vec())); } - Ok(items) + Ok(ops) } /// Return a single block root from the cold DB. @@ -3176,7 +3172,7 @@ impl, Cold: ColdStore> HotColdDB pub fn get_cold_block_root(&self, slot: Slot) -> Result, Error> { Ok(self .cold_db - .get(DBColumn::BeaconBlockRoots, slot)? + .get(DBColumnCold::BlockRoots, slot)? .map(|bytes| Hash256::from_ssz_bytes(&bytes)) .transpose()?) } @@ -3189,7 +3185,7 @@ impl, Cold: ColdStore> HotColdDB pub fn get_cold_state_root(&self, slot: Slot) -> Result, Error> { Ok(self .cold_db - .get(DBColumn::BeaconStateRoots, slot)? + .get(DBColumnCold::StateRoots, slot)? .map(|bytes| Hash256::from_ssz_bytes(&bytes)) .transpose()?) } @@ -3545,8 +3541,7 @@ pub fn migrate_database, Cold: ColdStore>( return Err(HotColdDBError::FreezeSlotUnaligned(finalized_state.slot()).into()); } - // Block-side cold puts (BeaconBlockRoots), accumulated across all states in this batch. - let mut cold_block_root_items: Vec<(Slot, Vec)> = vec![]; + let mut cold_db_block_ops: Vec<(Slot, Vec)> = vec![]; // Cold-DB root index for state summaries (state_root -> slot). // Committed after the slot-keyed cold data so a crash leaves no dangling indices. let mut cold_state_summary_index: Vec<(Hash256, Slot)> = vec![]; @@ -3561,7 +3556,7 @@ pub fn migrate_database, Cold: ColdStore>( // Then, iterate states in slot ascending order, as they are stored wrt previous states. for (block_root, state_root, slot) in state_roots.iter().rev() { // Store the slot to block root mapping. - cold_block_root_items.push((*slot, block_root.as_slice().to_vec())); + cold_db_block_ops.push((*slot, block_root.as_slice().to_vec())); // Do not try to store states if a restore point is yet to be stored, or will never be // stored (see `STATE_UPPER_LIMIT_NO_RETAIN`). Make an exception for the genesis state @@ -3570,7 +3565,7 @@ pub fn migrate_database, Cold: ColdStore>( continue; } - let mut cold_state_items: Vec<(DBColumn, Slot, Vec)> = vec![]; + let mut cold_db_state_ops: Vec<(DBColumnCold, Slot, Vec)> = vec![]; // Only store the cold state if it's on a diff boundary. // Calling `store_cold_state_summary` instead of `store_cold_state` for those allows us @@ -3586,7 +3581,7 @@ pub fn migrate_database, Cold: ColdStore>( store.store_cold_state_summary( state_root, *slot, - &mut cold_state_items, + &mut cold_db_state_ops, &mut cold_state_summary_index, )?; } else { @@ -3599,7 +3594,7 @@ pub fn migrate_database, Cold: ColdStore>( store.store_cold_state( state_root, &state, - &mut cold_state_items, + &mut cold_db_state_ops, &mut cold_state_summary_index, )?; } @@ -3607,7 +3602,7 @@ pub fn migrate_database, Cold: ColdStore>( // Cold states are diffed with respect to each other, so we need to finish writing previous // slot-keyed cold data before staging new entries. Index commits ride along to the end of // the migration so all root indices land after every cold-bulk write is durable. - store.commit_cold_items(cold_state_items)?; + store.commit_cold_items(cold_db_state_ops)?; } // Warning: Critical section. We have to take care not to put any of the two databases in an @@ -3623,7 +3618,7 @@ pub fn migrate_database, Cold: ColdStore>( // so a mid-migration crash leaves cold data without dangling indices. store .cold_db - .put_batch(DBColumn::BeaconBlockRoots, cold_block_root_items)?; + .put_batch(DBColumnCold::BlockRoots, cold_db_block_ops)?; store.cold_db.sync()?; store.cold_db.put_index_batch( DBColumnColdIndex::ColdStateSummary, diff --git a/beacon_node/store/src/invariants.rs b/beacon_node/store/src/invariants.rs index f8ed0ab66b3..46f54be565d 100644 --- a/beacon_node/store/src/invariants.rs +++ b/beacon_node/store/src/invariants.rs @@ -7,7 +7,7 @@ use crate::hdiff::StorageStrategy; use crate::hot_cold_store::HotStateSummary; -use crate::{ColdStore, DBColumn, Error, ItemStore}; +use crate::{ColdStore, DBColumn, DBColumnCold, Error, ItemStore}; use crate::{HotColdDB, Split}; use serde::Serialize; use ssz::Decode; @@ -581,7 +581,7 @@ impl, Cold: ColdStore> HotColdDB for slot_val in anchor_info.oldest_block_slot.as_u64()..split.slot.as_u64() { let slot = Slot::new(slot_val); - let block_root_bytes = self.cold_db.get(DBColumn::BeaconBlockRoots, slot)?; + let block_root_bytes = self.cold_db.get(DBColumnCold::BlockRoots, slot)?; let Some(root_bytes) = block_root_bytes else { result.add_violation(InvariantViolation::ColdBlockRootMissing { @@ -632,7 +632,7 @@ impl, Cold: ColdStore> HotColdDB if slot <= anchor_info.state_lower_limit || slot >= cmp::min(split.slot, anchor_info.state_upper_limit) { - let Some(root_bytes) = self.cold_db.get(DBColumn::BeaconStateRoots, slot)? else { + let Some(root_bytes) = self.cold_db.get(DBColumnCold::StateRoots, slot)? else { result.add_violation(InvariantViolation::ColdStateRootMissing { slot, state_lower_limit: anchor_info.state_lower_limit, @@ -706,7 +706,7 @@ impl, Cold: ColdStore> HotColdDB continue; } - let Some(root_bytes) = self.cold_db.get(DBColumn::BeaconStateRoots, slot)? else { + let Some(root_bytes) = self.cold_db.get(DBColumnCold::StateRoots, slot)? else { continue; }; if root_bytes.len() != 32 { @@ -720,8 +720,7 @@ impl, Cold: ColdStore> HotColdDB match self.hierarchy.storage_strategy(slot, Slot::new(0))? { StorageStrategy::Snapshot => { - let has_snapshot = - self.cold_db.contains(DBColumn::BeaconStateSnapshot, slot)?; + let has_snapshot = self.cold_db.contains(DBColumnCold::StateSnapshot, slot)?; if !has_snapshot { result.add_violation(InvariantViolation::ColdStateMissingSnapshot { state_root, @@ -730,7 +729,7 @@ impl, Cold: ColdStore> HotColdDB } } StorageStrategy::DiffFrom(base_slot) => { - let has_diff = self.cold_db.contains(DBColumn::BeaconStateDiff, slot)?; + let has_diff = self.cold_db.contains(DBColumnCold::StateDiff, slot)?; if !has_diff { result.add_violation(InvariantViolation::ColdStateMissingDiff { state_root, diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index 5d3371cf570..26a4705b08d 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -21,7 +21,6 @@ pub mod metadata; pub mod metrics; pub mod reconstruct; pub mod state_cache; -pub mod static_blobs; pub mod static_cold; pub mod database; @@ -31,7 +30,6 @@ pub use self::blob_sidecar_list_from_root::BlobSidecarListFromRoot; pub use self::config::StoreConfig; pub use self::hot_cold_store::{HotColdDB, HotStateSummary, Split}; pub use self::memory_store::MemoryStore; -pub use self::static_blobs::StaticBlobStore; pub use self::static_cold::StaticColdStore; pub use crate::metadata::BlobInfo; pub use errors::Error; @@ -164,13 +162,13 @@ impl DBColumnColdIndex { pub trait ColdStore: Sync + Send + Sized + 'static { // Slot-keyed bulk data. - fn get(&self, column: DBColumn, slot: Slot) -> Result>, Error>; + fn get(&self, column: DBColumnCold, slot: Slot) -> Result>, Error>; - fn put_batch(&self, column: DBColumn, items: Vec<(Slot, Vec)>) -> Result<(), Error>; + fn put_batch(&self, column: DBColumnCold, items: Vec<(Slot, Vec)>) -> Result<(), Error>; - fn contains(&self, column: DBColumn, slot: Slot) -> Result; + fn contains(&self, column: DBColumnCold, slot: Slot) -> Result; - fn iter_from(&self, column: DBColumn, from: Slot) -> SlotIter<'_>; + fn iter_from(&self, column: DBColumnCold, from: Slot) -> SlotIter<'_>; // Root-keyed indices owned by the cold backend. fn get_index(&self, column: DBColumnColdIndex, root: Hash256) -> Result, Error>; diff --git a/beacon_node/store/src/memory_store.rs b/beacon_node/store/src/memory_store.rs index ea769d11f73..337c11149f5 100644 --- a/beacon_node/store/src/memory_store.rs +++ b/beacon_node/store/src/memory_store.rs @@ -1,6 +1,7 @@ use crate::{ - ColdStore, ColumnIter, ColumnKeyIter, DBColumn, DBColumnColdIndex, Error, ItemStore, Key, - KeyValueStore, KeyValueStoreOp, SlotIter, get_key_for_col, hot_cold_store::BytesKey, + ColdStore, ColumnIter, ColumnKeyIter, DBColumn, DBColumnCold, DBColumnColdIndex, Error, + ItemStore, Key, KeyValueStore, KeyValueStoreOp, SlotIter, get_key_for_col, + hot_cold_store::BytesKey, }; use parking_lot::RwLock; use ssz::{Decode, Encode}; @@ -155,34 +156,38 @@ impl KeyValueStore for MemoryStore { impl ItemStore for MemoryStore {} impl ColdStore for MemoryStore { - fn get(&self, column: DBColumn, slot: Slot) -> Result>, Error> { - KeyValueStore::get_bytes(self, column, &slot.as_u64().to_be_bytes()) + fn get(&self, column: DBColumnCold, slot: Slot) -> Result>, Error> { + KeyValueStore::get_bytes(self, column.db_column(), &slot.as_u64().to_be_bytes()) } - fn put_batch(&self, column: DBColumn, items: Vec<(Slot, Vec)>) -> Result<(), Error> { + fn put_batch(&self, column: DBColumnCold, items: Vec<(Slot, Vec)>) -> Result<(), Error> { + let col = column.db_column(); let ops = items .into_iter() .map(|(slot, value)| { - KeyValueStoreOp::PutKeyValue(column, slot.as_u64().to_be_bytes().to_vec(), value) + KeyValueStoreOp::PutKeyValue(col, slot.as_u64().to_be_bytes().to_vec(), value) }) .collect(); KeyValueStore::do_atomically(self, ops) } - fn contains(&self, column: DBColumn, slot: Slot) -> Result { - KeyValueStore::key_exists(self, column, &slot.as_u64().to_be_bytes()) + fn contains(&self, column: DBColumnCold, slot: Slot) -> Result { + KeyValueStore::key_exists(self, column.db_column(), &slot.as_u64().to_be_bytes()) } - fn iter_from(&self, column: DBColumn, from: Slot) -> SlotIter<'_> { + fn iter_from(&self, column: DBColumnCold, from: Slot) -> SlotIter<'_> { Box::new( - KeyValueStore::iter_column_from::>(self, column, &from.as_u64().to_be_bytes()) - .map(|res| { - res.and_then(|(key_bytes, value)| { - let bytes: [u8; 8] = - key_bytes.try_into().map_err(|_| Error::InvalidBytes)?; - Ok((Slot::new(u64::from_be_bytes(bytes)), value)) - }) - }), + KeyValueStore::iter_column_from::>( + self, + column.db_column(), + &from.as_u64().to_be_bytes(), + ) + .map(|res| { + res.and_then(|(key_bytes, value)| { + let bytes: [u8; 8] = key_bytes.try_into().map_err(|_| Error::InvalidBytes)?; + Ok((Slot::new(u64::from_be_bytes(bytes)), value)) + }) + }), ) } diff --git a/beacon_node/store/src/reconstruct.rs b/beacon_node/store/src/reconstruct.rs index 6415c275374..2792cfb7aa4 100644 --- a/beacon_node/store/src/reconstruct.rs +++ b/beacon_node/store/src/reconstruct.rs @@ -2,7 +2,7 @@ use crate::forwards_iter::FrozenForwardsIterator; use crate::hot_cold_store::{HotColdDB, HotColdDBError}; use crate::metrics; -use crate::{ColdStore, DBColumn, DBColumnColdIndex, Error, ItemStore}; +use crate::{ColdStore, DBColumn, DBColumnCold, DBColumnColdIndex, Error, ItemStore}; use itertools::{Itertools, process_results}; use state_processing::{ BlockSignatureStrategy, ConsensusContext, VerifyBlockRoot, per_block_processing, @@ -129,7 +129,7 @@ where state.build_caches(&self.spec)?; process_results(block_root_iter, |iter| -> Result<(), Error> { - let mut cold_items: Vec<(DBColumn, Slot, Vec)> = vec![]; + let mut io_batch: Vec<(DBColumnCold, Slot, Vec)> = vec![]; let mut summary_index: Vec<(Hash256, Slot)> = vec![]; let mut prev_state_root = None; @@ -173,7 +173,7 @@ where .or_else(|_| state.update_tree_hash_cache())?; // Stage state for storage in freezer DB. - self.store_cold_state(&state_root, &state, &mut cold_items, &mut summary_index)?; + self.store_cold_state(&state_root, &state, &mut io_batch, &mut summary_index)?; let batch_complete = slot + 1 == to_slot; @@ -184,7 +184,7 @@ where if self.hierarchy.should_commit_immediately(slot)? || batch_complete { // Slot-keyed cold bulk first, root index after — a mid-flush crash leaves // cold data with no dangling index entry. - self.commit_cold_items(std::mem::take(&mut cold_items))?; + self.commit_cold_items(std::mem::take(&mut io_batch))?; self.cold_db.put_index_batch( DBColumnColdIndex::ColdStateSummary, std::mem::take(&mut summary_index), diff --git a/beacon_node/store/src/static_blobs.rs b/beacon_node/store/src/static_blobs.rs deleted file mode 100644 index c221a9c299e..00000000000 --- a/beacon_node/store/src/static_blobs.rs +++ /dev/null @@ -1,59 +0,0 @@ -//! Slot-keyed archive API for finalized blob sidecars. -//! -//! This is the minimal surface needed to test HotColdDB integration. The file -//! backend is intentionally not implemented yet. - -use std::{ - fmt, io, - path::{Path, PathBuf}, -}; -use types::Slot; - -#[derive(Debug)] -pub struct StaticBlobStore { - root_dir: PathBuf, -} - -#[derive(Debug)] -pub enum StaticBlobStoreError { - Io(io::Error), - Unsupported(&'static str), -} - -impl fmt::Display for StaticBlobStoreError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Io(e) => write!(f, "static blob store io error: {e}"), - Self::Unsupported(message) => { - write!(f, "static blob store unsupported operation: {message}") - } - } - } -} - -impl From for StaticBlobStoreError { - fn from(e: io::Error) -> Self { - Self::Io(e) - } -} - -impl StaticBlobStore { - /// Open the archive rooted at `path`. - pub fn open(path: &Path) -> Result { - Ok(Self { - root_dir: path.to_path_buf(), - }) - } - - /// Read SSZ-encoded blob sidecars for `slot`, if present. - pub fn get(&self, _slot: Slot) -> Result>, StaticBlobStoreError> { - let _ = &self.root_dir; - Err(StaticBlobStoreError::Unsupported("get")) - } - - /// Store SSZ-encoded blob sidecars at `slot`. - pub fn put(&self, _slot: Slot, _bytes: &[u8]) -> Result<(), StaticBlobStoreError> { - let _ = &self.root_dir; - Err(StaticBlobStoreError::Unsupported("put")) - } -} From f671da1fd5f1515bde37eb87114d91096f1c3498 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 20:12:17 +0200 Subject: [PATCH 10/24] Drop dead BeaconBlockSlot column and refresh TODO `BeaconBlockSlot` (and the `DBColumnColdIndex::BlockSlot` variant that wrapped it) was added for a static-archive read-fallback path that was removed earlier in this branch. Nothing writes or reads it now, so drop the variant from the DBColumn enum, the matching DBColumnColdIndex variant, the `MissingFrozenBlockSlot` error, and the corresponding key_size match arm. Rewrite TODO-static-block-storage.md to reflect the current branch state: the static-cold generalization is in, the prune-states removal is in, and the remaining work is cold-backend selection (flag), review of block read/write paths now that BeaconBlockSlot is gone, an invariants review, and tests. --- TODO-static-block-storage.md | 81 +++++++++++++------------ beacon_node/store/src/hot_cold_store.rs | 7 +-- beacon_node/store/src/lib.rs | 10 --- 3 files changed, 46 insertions(+), 52 deletions(-) diff --git a/TODO-static-block-storage.md b/TODO-static-block-storage.md index 385cb13b59c..763a39d6d8c 100644 --- a/TODO-static-block-storage.md +++ b/TODO-static-block-storage.md @@ -1,42 +1,47 @@ -# Static Block Storage TODO - -Current spec: [`specs/static-blocks.md`](./specs/static-blocks.md) - -Implemented: -- static block file format spec -- `StaticBlockStore::open/get/put` -- snappy-framed block records -- fixed-size `.off` sidecar files -- global `static_blocks.conf` commit marker -- startup healing for interrupted writes +# Static Cold Storage TODO + +Current spec: [`specs/static-cold-backend.md`](./specs/static-cold-backend.md) +(file format inherited from [`specs/static-blocks.md`](./specs/static-blocks.md) +and generalised per-column). + +Implemented in this branch: +- multi-column slot-keyed store: `StaticColdStore` (one type, dispatched on + `DBColumnCold`) +- per-column subdirectory + per-column conf with persisted `record_type`, + `compression`, `max_value_bytes` (conf magic `LHSTBLK2`) +- `ColdStore` trait covering both slot-keyed bulk and root-keyed indices + (`DBColumnColdIndex`); KV backends impl by translating slot/root keys into + the underlying `KeyValueStore` +- startup healing for interrupted writes (per-column) +- `prune_historic_states` removed (mode it produced is not in the spec's + startup-path table) Remaining: -1. Wire startup/config. - - add CLI/config path for enabling static block storage - - initialize `HotColdDB::static_blocks` - - reject checkpoint sync, late activation, and historical backfill init modes - -2. Bump schema. - - `DBColumn::BeaconBlockSlot` was added - - update schema version in `beacon_node/store/src/metadata.rs` - -3. Verify static fallback reads. - - after `static_blocks.get(slot)`, decode and verify the block root matches the requested root - - treat mismatches as corruption - -4. Update invariants. - - archived finalized blocks no longer require hot-db block bodies - - root/slot indices must remain consistent with static storage - -5. Add tests. - - archive/read happy path - - skip-slot dedup +1. Cold backend selection. + - add a CLI/config flag to switch the cold backend between the existing + KV implementation and the static-file implementation + - reject startup combinations the spec doesn't allow (e.g. checkpoint sync + without complete static history into static-archive mode) + +2. Review block read/write paths. + - decide where finalized blocks live in the static-cold mode + (`DBColumn::BeaconBlock`? a new slot-keyed `DBColumnCold::Block`?) + - root → slot resolution: with `BeaconBlockSlot` removed, no on-disk index + maps a block_root to its slot. Choose a path: bring the index back + (whether in hot or in the cold backend), perform a slot-walk, or reject + root-keyed reads in static-cold mode + - update `HotColdDB::get_block_with` and `block_exists` accordingly + +3. Review invariants. + - it is unclear whether invariants 10/11/12 still hold under static-cold + mode. Walk through each and confirm or update — in particular, archived + blocks no longer needing hot-DB block bodies, and the consistency of + root-to-slot indices once their location is decided in (2) + +4. Tests. + - happy path for `StaticColdStore::open/get/put` per cold column - out-of-order put rejection - - crash windows around data, `.off`, and `.conf` - - wrong `BeaconBlockSlot` - - unsupported startup modes - -6. Decide decompression bound wiring. - - current implementation uses a local 10 MiB bound - - consider passing consensus `max_payload_size` or another store config value + - crash windows around data, `.off`, and per-column `.conf` + - cold backend selection via CLI flag + - rejected startup-mode combinations diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 5f9c00d46ba..a98d32193e8 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -189,7 +189,6 @@ pub enum HotColdDBError { MissingExecutionPayloadEnvelope(Hash256), MissingFullBlockExecutionPayloadPruned(Hash256, Slot), MissingAnchorInfo, - MissingFrozenBlockSlot(Hash256), MissingFrozenBlock(Slot), MissingPathToBlobsDatabase, BlobsPreviouslyInDefaultStore, @@ -2082,9 +2081,9 @@ impl, Cold: ColdStore> HotColdDB /// Group `cold_items` by column and write each column to the cold backend. /// - /// Used to commit pre-finalization cold writes ahead of the matching hot-DB index puts - /// (BeaconColdStateSummary, BeaconBlockSlot). Order matters for crash safety: cold data - /// must be durable before any hot index entry that references it. + /// Used to commit pre-finalization slot-keyed cold writes ahead of the matching + /// `BeaconColdStateSummary` root-index put. Order matters for crash safety: slot-keyed + /// cold data must be durable before the index entry that references it. pub fn commit_cold_items( &self, cold_items: Vec<(DBColumnCold, Slot, Vec)>, diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index 26a4705b08d..d36327ebbff 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -145,8 +145,6 @@ impl DBColumnCold { /// Root-keyed indices owned by the cold backend. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum DBColumnColdIndex { - /// `block_root -> slot` for finalized blocks. - BlockSlot, /// `state_root -> slot` for cold state summaries. ColdStateSummary, } @@ -154,7 +152,6 @@ pub enum DBColumnColdIndex { impl DBColumnColdIndex { pub fn db_column(self) -> DBColumn { match self { - Self::BlockSlot => DBColumn::BeaconBlockSlot, Self::ColdStateSummary => DBColumn::BeaconColdStateSummary, } } @@ -421,12 +418,6 @@ pub enum DBColumn { /// necessary to guarantee atomicity of the upgrade migration. #[strum(serialize = "bbx")] BeaconBlockRoots, - /// Mapping from block root to slot for blocks whose blinded bodies have been sealed - /// into static block files. Populated by the era-sealer; consulted by - /// `HotColdDB::get_finalized_blinded_block_slot` to resolve root-keyed reads against - /// the (slot-keyed) `StaticBlockStore`. - #[strum(serialize = "bbs")] - BeaconBlockSlot, /// DEPRECATED. This is the previous column for beacon block roots stored by "chunk index". /// /// Can be removed once schema v22 is buried by a hard fork. @@ -487,7 +478,6 @@ impl DBColumn { Self::OverflowLRUCache => 33, // DEPRECATED Self::BeaconMeta | Self::BeaconBlock - | Self::BeaconBlockSlot | Self::BeaconState | Self::BeaconBlob | Self::BeaconStateSummary From 054d81c692c87b8b0a504f138236e35e90569257 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 20:17:46 +0200 Subject: [PATCH 11/24] Blanket impl ColdStore for any KeyValueStore The two explicit impls (BeaconNodeBackend, MemoryStore) were identical boilerplate translating slot/root keys into the underlying byte-keyed KeyValueStore. Replace with a single blanket impl in lib.rs. Forecloses a future ColdStore impl that isn't a KeyValueStore (e.g. wiring StaticColdStore directly as the Cold parameter); reversible if/when that becomes wanted. --- beacon_node/store/src/database/interface.rs | 79 +-------------------- beacon_node/store/src/lib.rs | 67 +++++++++++++++++ beacon_node/store/src/memory_store.rs | 71 +----------------- 3 files changed, 70 insertions(+), 147 deletions(-) diff --git a/beacon_node/store/src/database/interface.rs b/beacon_node/store/src/database/interface.rs index 12fdb2a079a..5646f1179c8 100644 --- a/beacon_node/store/src/database/interface.rs +++ b/beacon_node/store/src/database/interface.rs @@ -2,16 +2,11 @@ use crate::database::leveldb_impl; #[cfg(feature = "redb")] use crate::database::redb_impl; -use crate::{ - ColdStore, ColumnIter, ColumnKeyIter, DBColumn, DBColumnCold, DBColumnColdIndex, Error, - ItemStore, Key, KeyValueStore, SlotIter, metrics, -}; +use crate::{ColumnIter, ColumnKeyIter, DBColumn, Error, ItemStore, Key, KeyValueStore, metrics}; use crate::{KeyValueStoreOp, StoreConfig, config::DatabaseBackend}; -use ssz::{Decode, Encode}; use std::collections::HashSet; use std::path::Path; use types::EthSpec; -use types::{Hash256, Slot}; pub enum BeaconNodeBackend { #[cfg(feature = "leveldb")] @@ -22,78 +17,6 @@ pub enum BeaconNodeBackend { impl ItemStore for BeaconNodeBackend {} -impl ColdStore for BeaconNodeBackend { - fn get(&self, column: DBColumnCold, slot: Slot) -> Result>, Error> { - KeyValueStore::get_bytes(self, column.db_column(), &slot.as_u64().to_be_bytes()) - } - - fn put_batch(&self, column: DBColumnCold, items: Vec<(Slot, Vec)>) -> Result<(), Error> { - let col = column.db_column(); - let ops = items - .into_iter() - .map(|(slot, value)| { - crate::KeyValueStoreOp::PutKeyValue( - col, - slot.as_u64().to_be_bytes().to_vec(), - value, - ) - }) - .collect(); - KeyValueStore::do_atomically(self, ops) - } - - fn contains(&self, column: DBColumnCold, slot: Slot) -> Result { - KeyValueStore::key_exists(self, column.db_column(), &slot.as_u64().to_be_bytes()) - } - - fn iter_from(&self, column: DBColumnCold, from: Slot) -> SlotIter<'_> { - Box::new( - KeyValueStore::iter_column_from::>( - self, - column.db_column(), - &from.as_u64().to_be_bytes(), - ) - .map(|res| { - res.and_then(|(key_bytes, value)| { - let bytes: [u8; 8] = key_bytes.try_into().map_err(|_| Error::InvalidBytes)?; - Ok((Slot::new(u64::from_be_bytes(bytes)), value)) - }) - }), - ) - } - - fn get_index(&self, column: DBColumnColdIndex, root: Hash256) -> Result, Error> { - Ok( - KeyValueStore::get_bytes(self, column.db_column(), root.as_slice())? - .map(|bytes| Slot::from_ssz_bytes(&bytes)) - .transpose()?, - ) - } - - fn put_index_batch( - &self, - column: DBColumnColdIndex, - items: Vec<(Hash256, Slot)>, - ) -> Result<(), Error> { - let col = column.db_column(); - let ops = items - .into_iter() - .map(|(root, slot)| { - crate::KeyValueStoreOp::PutKeyValue( - col, - root.as_slice().to_vec(), - slot.as_ssz_bytes(), - ) - }) - .collect(); - KeyValueStore::do_atomically(self, ops) - } - - fn sync(&self) -> Result<(), Error> { - KeyValueStore::sync(self) - } -} - impl KeyValueStore for BeaconNodeBackend { fn get_bytes(&self, column: DBColumn, key: &[u8]) -> Result>, Error> { match self { diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index d36327ebbff..f17b88a8887 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -35,6 +35,7 @@ pub use crate::metadata::BlobInfo; pub use errors::Error; pub use metadata::AnchorInfo; pub use metrics::scrape_for_metrics; +use ssz::{Decode, Encode}; use std::collections::HashSet; use std::sync::Arc; use strum::{EnumIter, EnumString, IntoStaticStr}; @@ -179,6 +180,72 @@ pub trait ColdStore: Sync + Send + Sized + 'static { fn sync(&self) -> Result<(), Error>; } +/// Every `KeyValueStore` is a `ColdStore`: the slot/root keys round-trip through +/// the underlying byte-keyed columns. +impl> ColdStore for T { + fn get(&self, column: DBColumnCold, slot: Slot) -> Result>, Error> { + KeyValueStore::get_bytes(self, column.db_column(), &slot.as_u64().to_be_bytes()) + } + + fn put_batch(&self, column: DBColumnCold, items: Vec<(Slot, Vec)>) -> Result<(), Error> { + let col = column.db_column(); + let ops = items + .into_iter() + .map(|(slot, value)| { + KeyValueStoreOp::PutKeyValue(col, slot.as_u64().to_be_bytes().to_vec(), value) + }) + .collect(); + KeyValueStore::do_atomically(self, ops) + } + + fn contains(&self, column: DBColumnCold, slot: Slot) -> Result { + KeyValueStore::key_exists(self, column.db_column(), &slot.as_u64().to_be_bytes()) + } + + fn iter_from(&self, column: DBColumnCold, from: Slot) -> SlotIter<'_> { + Box::new( + KeyValueStore::iter_column_from::>( + self, + column.db_column(), + &from.as_u64().to_be_bytes(), + ) + .map(|res| { + res.and_then(|(key_bytes, value)| { + let bytes: [u8; 8] = key_bytes.try_into().map_err(|_| Error::InvalidBytes)?; + Ok((Slot::new(u64::from_be_bytes(bytes)), value)) + }) + }), + ) + } + + fn get_index(&self, column: DBColumnColdIndex, root: Hash256) -> Result, Error> { + Ok( + KeyValueStore::get_bytes(self, column.db_column(), root.as_slice())? + .map(|bytes| Slot::from_ssz_bytes(&bytes)) + .transpose()?, + ) + } + + fn put_index_batch( + &self, + column: DBColumnColdIndex, + items: Vec<(Hash256, Slot)>, + ) -> Result<(), Error> { + let col = column.db_column(); + let ops = items + .into_iter() + .map(|(root, slot)| { + KeyValueStoreOp::PutKeyValue(col, root.as_slice().to_vec(), slot.as_ssz_bytes()) + }) + .collect(); + KeyValueStore::do_atomically(self, ops) + } + + fn sync(&self) -> Result<(), Error> { + KeyValueStore::sync(self) + } +} + pub trait Key: Sized + 'static { fn from_bytes(key: &[u8]) -> Result; } diff --git a/beacon_node/store/src/memory_store.rs b/beacon_node/store/src/memory_store.rs index 337c11149f5..3f127aabfb1 100644 --- a/beacon_node/store/src/memory_store.rs +++ b/beacon_node/store/src/memory_store.rs @@ -1,13 +1,10 @@ use crate::{ - ColdStore, ColumnIter, ColumnKeyIter, DBColumn, DBColumnCold, DBColumnColdIndex, Error, - ItemStore, Key, KeyValueStore, KeyValueStoreOp, SlotIter, get_key_for_col, - hot_cold_store::BytesKey, + ColumnIter, ColumnKeyIter, DBColumn, Error, ItemStore, Key, KeyValueStore, KeyValueStoreOp, + get_key_for_col, hot_cold_store::BytesKey, }; use parking_lot::RwLock; -use ssz::{Decode, Encode}; use std::collections::{BTreeMap, HashSet}; use std::marker::PhantomData; -use types::Hash256; use types::*; type DBMap = BTreeMap>; @@ -154,67 +151,3 @@ impl KeyValueStore for MemoryStore { } impl ItemStore for MemoryStore {} - -impl ColdStore for MemoryStore { - fn get(&self, column: DBColumnCold, slot: Slot) -> Result>, Error> { - KeyValueStore::get_bytes(self, column.db_column(), &slot.as_u64().to_be_bytes()) - } - - fn put_batch(&self, column: DBColumnCold, items: Vec<(Slot, Vec)>) -> Result<(), Error> { - let col = column.db_column(); - let ops = items - .into_iter() - .map(|(slot, value)| { - KeyValueStoreOp::PutKeyValue(col, slot.as_u64().to_be_bytes().to_vec(), value) - }) - .collect(); - KeyValueStore::do_atomically(self, ops) - } - - fn contains(&self, column: DBColumnCold, slot: Slot) -> Result { - KeyValueStore::key_exists(self, column.db_column(), &slot.as_u64().to_be_bytes()) - } - - fn iter_from(&self, column: DBColumnCold, from: Slot) -> SlotIter<'_> { - Box::new( - KeyValueStore::iter_column_from::>( - self, - column.db_column(), - &from.as_u64().to_be_bytes(), - ) - .map(|res| { - res.and_then(|(key_bytes, value)| { - let bytes: [u8; 8] = key_bytes.try_into().map_err(|_| Error::InvalidBytes)?; - Ok((Slot::new(u64::from_be_bytes(bytes)), value)) - }) - }), - ) - } - - fn get_index(&self, column: DBColumnColdIndex, root: Hash256) -> Result, Error> { - Ok( - KeyValueStore::get_bytes(self, column.db_column(), root.as_slice())? - .map(|bytes| Slot::from_ssz_bytes(&bytes)) - .transpose()?, - ) - } - - fn put_index_batch( - &self, - column: DBColumnColdIndex, - items: Vec<(Hash256, Slot)>, - ) -> Result<(), Error> { - let col = column.db_column(); - let ops = items - .into_iter() - .map(|(root, slot)| { - KeyValueStoreOp::PutKeyValue(col, root.as_slice().to_vec(), slot.as_ssz_bytes()) - }) - .collect(); - KeyValueStore::do_atomically(self, ops) - } - - fn sync(&self) -> Result<(), Error> { - KeyValueStore::sync(self) - } -} From e93faae418a32a2f20784bf696bdc88681619bc3 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 20:24:35 +0200 Subject: [PATCH 12/24] Pin SSZ compatibility of slot index against legacy summary wrapper The blanket `ColdStore` impl writes `slot.as_ssz_bytes()` for `BeaconColdStateSummary`, where older releases wrote SSZ-encoded `ColdStateSummary { slot }`. The two encodings are byte-identical (an SSZ container of one fixed-size field equals the field), but the equality is load-bearing for read compatibility with existing databases. Add a regression test that pins it. --- beacon_node/store/src/lib.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index f17b88a8887..fc0dc6cb625 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -218,6 +218,10 @@ impl> ColdStore for T { ) } + // `Slot::as_ssz_bytes()` is byte-identical to the legacy + // `ColdStateSummary { slot }` wrapper, so existing dbs round-trip without + // migration. Pinned by `ssz_compat_with_legacy_summary`. + fn get_index(&self, column: DBColumnColdIndex, root: Hash256) -> Result, Error> { Ok( KeyValueStore::get_bytes(self, column.db_column(), root.as_slice())? @@ -634,6 +638,21 @@ mod tests { } } + /// Mirrors the wrapper that older releases stored in `BeaconColdStateSummary`. + #[derive(Encode, Decode)] + struct LegacyColdStateSummary { + slot: Slot, + } + + #[test] + fn ssz_compat_with_legacy_summary() { + let slot = Slot::new(42); + assert_eq!( + slot.as_ssz_bytes(), + LegacyColdStateSummary { slot }.as_ssz_bytes(), + ); + } + fn test_impl(store: impl ItemStore) { let key = Hash256::random(); let item = StorableThing { a: 1, b: 42 }; From 52f9632d2e7ce8d4f8d73d809ee591a36676a20d Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 20:42:29 +0200 Subject: [PATCH 13/24] Add iter_index to ColdStore; revert invariant 12 to original shape The slot-walk rewrite of `check_cold_state_diff_consistency` was forced by not having an index iterator on the trait. Add `iter_index(col)` (yields `(Hash256, Slot)`) and restore the invariant to iterating `BeaconColdStateSummary` directly, matching unstable's structure modulo the slot-typed API. --- beacon_node/store/src/invariants.rs | 51 +++++++++++------------------ beacon_node/store/src/lib.rs | 10 ++++++ 2 files changed, 29 insertions(+), 32 deletions(-) diff --git a/beacon_node/store/src/invariants.rs b/beacon_node/store/src/invariants.rs index 46f54be565d..a47b1ded41c 100644 --- a/beacon_node/store/src/invariants.rs +++ b/beacon_node/store/src/invariants.rs @@ -7,7 +7,7 @@ use crate::hdiff::StorageStrategy; use crate::hot_cold_store::HotStateSummary; -use crate::{ColdStore, DBColumn, DBColumnCold, Error, ItemStore}; +use crate::{ColdStore, DBColumn, DBColumnCold, DBColumnColdIndex, Error, ItemStore}; use crate::{HotColdDB, Split}; use serde::Serialize; use ssz::Decode; @@ -653,7 +653,7 @@ impl, Cold: ColdStore> HotColdDB match self .cold_db - .get_index(crate::DBColumnColdIndex::ColdStateSummary, state_root)? + .get_index(DBColumnColdIndex::ColdStateSummary, state_root)? { None => { result.add_violation(InvariantViolation::ColdStateRootMissingSummary { @@ -690,56 +690,43 @@ impl, Cold: ColdStore> HotColdDB fn check_cold_state_diff_consistency(&self) -> Result { let mut result = InvariantCheckResult::new(); - // Iterate cold states by slot via the slot-keyed BeaconStateRoots index. The - // root-keyed `BeaconColdStateSummary` index is owned by the cold backend and is - // not directly iterable through `ColdStore`, so the pivot point is the slot. - let split = self.get_split_info(); - let anchor_info = self.get_anchor_info(); let mut summary_slots = HashSet::new(); let mut base_slot_refs = Vec::new(); - for slot_val in 0..split.slot.as_u64() { - let slot = Slot::new(slot_val); - if !(slot <= anchor_info.state_lower_limit - || slot >= cmp::min(split.slot, anchor_info.state_upper_limit)) - { - continue; - } - - let Some(root_bytes) = self.cold_db.get(DBColumnCold::StateRoots, slot)? else { - continue; - }; - if root_bytes.len() != 32 { - continue; - } - let state_root = Hash256::from_slice(&root_bytes); + for res in self.cold_db.iter_index(DBColumnColdIndex::ColdStateSummary) { + let (state_root, summary_slot) = res?; - // Summary presence is already checked by invariant 11; here we just need the - // hierarchy classification, which is a pure function of the slot. - summary_slots.insert(slot); + summary_slots.insert(summary_slot); - match self.hierarchy.storage_strategy(slot, Slot::new(0))? { + match self + .hierarchy + .storage_strategy(summary_slot, Slot::new(0))? + { StorageStrategy::Snapshot => { - let has_snapshot = self.cold_db.contains(DBColumnCold::StateSnapshot, slot)?; + let has_snapshot = self + .cold_db + .contains(DBColumnCold::StateSnapshot, summary_slot)?; if !has_snapshot { result.add_violation(InvariantViolation::ColdStateMissingSnapshot { state_root, - slot, + slot: summary_slot, }); } } StorageStrategy::DiffFrom(base_slot) => { - let has_diff = self.cold_db.contains(DBColumnCold::StateDiff, slot)?; + let has_diff = self + .cold_db + .contains(DBColumnCold::StateDiff, summary_slot)?; if !has_diff { result.add_violation(InvariantViolation::ColdStateMissingDiff { state_root, - slot, + slot: summary_slot, }); } - base_slot_refs.push((slot, base_slot)); + base_slot_refs.push((summary_slot, base_slot)); } StorageStrategy::ReplayFrom(base_slot) => { - base_slot_refs.push((slot, base_slot)); + base_slot_refs.push((summary_slot, base_slot)); } } } diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index fc0dc6cb625..ae6a0393ba9 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -109,6 +109,7 @@ pub trait KeyValueStore: Sync + Send + Sized + 'static { } pub type SlotIter<'a> = Box), Error>> + 'a>; +pub type IndexIter<'a> = Box> + 'a>; /// Slot-keyed cold columns served by the static archive. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumIter)] @@ -177,6 +178,8 @@ pub trait ColdStore: Sync + Send + Sized + 'static { items: Vec<(Hash256, Slot)>, ) -> Result<(), Error>; + fn iter_index(&self, column: DBColumnColdIndex) -> IndexIter<'_>; + fn sync(&self) -> Result<(), Error>; } @@ -245,6 +248,13 @@ impl> ColdStore for T { KeyValueStore::do_atomically(self, ops) } + fn iter_index(&self, column: DBColumnColdIndex) -> IndexIter<'_> { + Box::new( + KeyValueStore::iter_column::(self, column.db_column()) + .map(|res| res.and_then(|(root, value)| Ok((root, Slot::from_ssz_bytes(&value)?)))), + ) + } + fn sync(&self) -> Result<(), Error> { KeyValueStore::sync(self) } From a1ec72651aade50c9ea642da8d45efd0cf49c3ba Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 20:49:46 +0200 Subject: [PATCH 14/24] Bundle cold writes into ColdBatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the two-buffer (slot-keyed data + state-root index) helper signatures with a single `&mut ColdBatch` and add `commit_cold_batch` that flushes data, syncs, then commits the index — encoding the data-before-index ordering at the API. `put_state` and `reconstruct.rs` collapse to "build batch, commit batch." The migration loop keeps a top-level summary index that accumulates across states and is flushed at end-of-migration; per-iteration data still goes through `commit_cold_data` (renamed from `commit_cold_items`). --- beacon_node/store/src/hot_cold_store.rs | 108 +++++++++++++----------- beacon_node/store/src/lib.rs | 2 +- beacon_node/store/src/reconstruct.rs | 19 ++--- 3 files changed, 65 insertions(+), 64 deletions(-) diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index a98d32193e8..af0b4773357 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -88,6 +88,21 @@ pub struct HotColdDB, Cold: ColdStore> { _phantom: PhantomData, } +/// Pending cold-DB writes for a single commit unit (one state, one reconstruct +/// batch, etc.). +/// +/// Slot-keyed bulk data goes in `data`; the matching `BeaconColdStateSummary` +/// root index goes in `state_summary_index`. `commit_cold_batch` flushes them +/// in the correct order (data, sync, then index). +/// +/// Slots within `data` for any single column must arrive strictly ascending — +/// the static cold backend rejects out-of-order puts. +#[derive(Default)] +pub struct ColdBatch { + pub data: Vec<(DBColumnCold, Slot, Vec)>, + pub state_summary_index: Vec<(Hash256, Slot)>, +} + #[derive(Debug)] struct BlockCache { block_cache: LruCache>, @@ -1075,13 +1090,9 @@ impl, Cold: ColdStore> HotColdDB /// Store a state in the store. pub fn put_state(&self, state_root: &Hash256, state: &BeaconState) -> Result<(), Error> { if state.slot() < self.get_split_slot() { - let mut cold_items = Vec::new(); - let mut summary_index: Vec<(Hash256, Slot)> = Vec::new(); - self.store_cold_state(state_root, state, &mut cold_items, &mut summary_index)?; - // Cold bulk first; the index entry trails so a crash leaves no dangling pointer. - self.commit_cold_items(cold_items)?; - self.cold_db - .put_index_batch(DBColumnColdIndex::ColdStateSummary, summary_index) + let mut batch = ColdBatch::default(); + self.store_cold_state(state_root, state, &mut batch)?; + self.commit_cold_batch(batch) } else { let mut ops: Vec = Vec::new(); self.store_hot_state(state_root, state, &mut ops)?; @@ -2079,21 +2090,28 @@ impl, Cold: ColdStore> HotColdDB ) } - /// Group `cold_items` by column and write each column to the cold backend. - /// - /// Used to commit pre-finalization slot-keyed cold writes ahead of the matching - /// `BeaconColdStateSummary` root-index put. Order matters for crash safety: slot-keyed - /// cold data must be durable before the index entry that references it. - pub fn commit_cold_items( - &self, - cold_items: Vec<(DBColumnCold, Slot, Vec)>, - ) -> Result<(), Error> { + /// Commit `batch` to the cold backend. Slot-keyed bulk data goes first, the + /// `BeaconColdStateSummary` root index lands after a sync, so a crash leaves + /// no dangling index entry. + pub fn commit_cold_batch(&self, batch: ColdBatch) -> Result<(), Error> { + self.commit_cold_data(batch.data)?; + self.cold_db.sync()?; + self.cold_db.put_index_batch( + DBColumnColdIndex::ColdStateSummary, + batch.state_summary_index, + ) + } + + /// Group slot-keyed `data` by column and write each column to the cold backend. + /// Used by callers (e.g. migration) that want to commit data per-iteration but + /// defer the matching index entries to end-of-batch. + pub fn commit_cold_data(&self, data: Vec<(DBColumnCold, Slot, Vec)>) -> Result<(), Error> { let mut groups: HashMap)>> = HashMap::new(); - for (col, slot, value) in cold_items { + for (col, slot, value) in data { groups.entry(col).or_default().push((slot, value)); } - for (col, batch) in groups { - self.cold_db.put_batch(col, batch)?; + for (col, items) in groups { + self.cold_db.put_batch(col, items)?; } Ok(()) } @@ -2102,14 +2120,10 @@ impl, Cold: ColdStore> HotColdDB &self, state_root: &Hash256, slot: Slot, - cold_items: &mut Vec<(DBColumnCold, Slot, Vec)>, - summary_index: &mut Vec<(Hash256, Slot)>, + batch: &mut ColdBatch, ) -> Result<(), Error> { - // BeaconColdStateSummary is a state_root → slot index owned by the cold backend. - // Slot-keyed bulk data must be durable before we commit the index entry; the - // caller is responsible for the ordering. - summary_index.push((*state_root, slot)); - cold_items.push(( + batch.state_summary_index.push((*state_root, slot)); + batch.data.push(( DBColumnCold::StateRoots, slot, state_root.as_slice().to_vec(), @@ -2122,10 +2136,9 @@ impl, Cold: ColdStore> HotColdDB &self, state_root: &Hash256, state: &BeaconState, - cold_items: &mut Vec<(DBColumnCold, Slot, Vec)>, - summary_index: &mut Vec<(Hash256, Slot)>, + batch: &mut ColdBatch, ) -> Result<(), Error> { - self.store_cold_state_summary(state_root, state.slot(), cold_items, summary_index)?; + self.store_cold_state_summary(state_root, state.slot(), batch)?; let slot = state.slot(); match self.cold_storage_strategy(slot)? { @@ -2144,7 +2157,7 @@ impl, Cold: ColdStore> HotColdDB %slot, "Storing cold state" ); - self.store_cold_state_as_snapshot(state, cold_items)?; + self.store_cold_state_as_snapshot(state, batch)?; } StorageStrategy::DiffFrom(from) => { debug!( @@ -2153,7 +2166,7 @@ impl, Cold: ColdStore> HotColdDB %slot, "Storing cold state" ); - self.store_cold_state_as_diff(state, from, cold_items)?; + self.store_cold_state_as_diff(state, from, batch)?; } } Ok(()) @@ -2162,7 +2175,7 @@ impl, Cold: ColdStore> HotColdDB pub fn store_cold_state_as_snapshot( &self, state: &BeaconState, - cold_items: &mut Vec<(DBColumnCold, Slot, Vec)>, + batch: &mut ColdBatch, ) -> Result<(), Error> { let bytes = state.as_ssz_bytes(); let compressed_value = { @@ -2175,7 +2188,9 @@ impl, Cold: ColdStore> HotColdDB out }; - cold_items.push((DBColumnCold::StateSnapshot, state.slot(), compressed_value)); + batch + .data + .push((DBColumnCold::StateSnapshot, state.slot(), compressed_value)); Ok(()) } @@ -2271,7 +2286,7 @@ impl, Cold: ColdStore> HotColdDB &self, state: &BeaconState, from_slot: Slot, - cold_items: &mut Vec<(DBColumnCold, Slot, Vec)>, + batch: &mut ColdBatch, ) -> Result<(), Error> { // Load diff base state bytes. let (_, base_buffer) = { @@ -2294,7 +2309,9 @@ impl, Cold: ColdStore> HotColdDB diff_bytes.len() as f64, ); - cold_items.push((DBColumnCold::StateDiff, state.slot(), diff_bytes)); + batch + .data + .push((DBColumnCold::StateDiff, state.slot(), diff_bytes)); Ok(()) } @@ -3564,7 +3581,7 @@ pub fn migrate_database, Cold: ColdStore>( continue; } - let mut cold_db_state_ops: Vec<(DBColumnCold, Slot, Vec)> = vec![]; + let mut batch = ColdBatch::default(); // Only store the cold state if it's on a diff boundary. // Calling `store_cold_state_summary` instead of `store_cold_state` for those allows us @@ -3577,12 +3594,7 @@ pub fn migrate_database, Cold: ColdStore>( %slot, "Storing cold state" ); - store.store_cold_state_summary( - state_root, - *slot, - &mut cold_db_state_ops, - &mut cold_state_summary_index, - )?; + store.store_cold_state_summary(state_root, *slot, &mut batch)?; } else { // This is some state that we want to migrate to the freezer db. // There is no reason to cache this state. @@ -3590,18 +3602,14 @@ pub fn migrate_database, Cold: ColdStore>( .get_hot_state(state_root, false)? .ok_or(HotColdDBError::MissingStateToFreeze(*state_root))?; - store.store_cold_state( - state_root, - &state, - &mut cold_db_state_ops, - &mut cold_state_summary_index, - )?; + store.store_cold_state(state_root, &state, &mut batch)?; } // Cold states are diffed with respect to each other, so we need to finish writing previous - // slot-keyed cold data before staging new entries. Index commits ride along to the end of + // slot-keyed cold data before staging new entries. Index entries accumulate to the end of // the migration so all root indices land after every cold-bulk write is durable. - store.commit_cold_items(cold_db_state_ops)?; + store.commit_cold_data(batch.data)?; + cold_state_summary_index.append(&mut batch.state_summary_index); } // Warning: Critical section. We have to take care not to put any of the two databases in an diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index ae6a0393ba9..856189ce479 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -397,7 +397,7 @@ pub enum StoreOp<'a, E: EthSpec> { } /// A unique column identifier. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, IntoStaticStr, EnumString, EnumIter)] +#[derive(Debug, Clone, Copy, PartialEq, IntoStaticStr, EnumString, EnumIter)] pub enum DBColumn { /// For data related to the database itself. #[strum(serialize = "bma")] diff --git a/beacon_node/store/src/reconstruct.rs b/beacon_node/store/src/reconstruct.rs index 2792cfb7aa4..128be42dde3 100644 --- a/beacon_node/store/src/reconstruct.rs +++ b/beacon_node/store/src/reconstruct.rs @@ -1,8 +1,8 @@ //! Implementation of historic state reconstruction (given complete block history). use crate::forwards_iter::FrozenForwardsIterator; -use crate::hot_cold_store::{HotColdDB, HotColdDBError}; +use crate::hot_cold_store::{ColdBatch, HotColdDB, HotColdDBError}; use crate::metrics; -use crate::{ColdStore, DBColumn, DBColumnCold, DBColumnColdIndex, Error, ItemStore}; +use crate::{ColdStore, DBColumn, Error, ItemStore}; use itertools::{Itertools, process_results}; use state_processing::{ BlockSignatureStrategy, ConsensusContext, VerifyBlockRoot, per_block_processing, @@ -10,7 +10,7 @@ use state_processing::{ }; use std::sync::Arc; use tracing::{debug, info}; -use types::{EthSpec, Hash256, Slot}; +use types::{EthSpec, Slot}; impl HotColdDB where @@ -129,8 +129,7 @@ where state.build_caches(&self.spec)?; process_results(block_root_iter, |iter| -> Result<(), Error> { - let mut io_batch: Vec<(DBColumnCold, Slot, Vec)> = vec![]; - let mut summary_index: Vec<(Hash256, Slot)> = vec![]; + let mut batch = ColdBatch::default(); let mut prev_state_root = None; for ((prev_block_root, _), (block_root, slot)) in iter.tuple_windows() { @@ -173,7 +172,7 @@ where .or_else(|_| state.update_tree_hash_cache())?; // Stage state for storage in freezer DB. - self.store_cold_state(&state_root, &state, &mut io_batch, &mut summary_index)?; + self.store_cold_state(&state_root, &state, &mut batch)?; let batch_complete = slot + 1 == to_slot; @@ -182,13 +181,7 @@ where // - The diff/snapshot for this slot is required for future slots, or // - The reconstruction batch is complete (we are about to return). if self.hierarchy.should_commit_immediately(slot)? || batch_complete { - // Slot-keyed cold bulk first, root index after — a mid-flush crash leaves - // cold data with no dangling index entry. - self.commit_cold_items(std::mem::take(&mut io_batch))?; - self.cold_db.put_index_batch( - DBColumnColdIndex::ColdStateSummary, - std::mem::take(&mut summary_index), - )?; + self.commit_cold_batch(std::mem::take(&mut batch))?; if batch_complete { // Perform one last integrity check on the state reached. From 84853ab4205e678fd79a09eadc82005255f8f671 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 21:27:01 +0200 Subject: [PATCH 15/24] Add ColdBackend enum + cold-backend flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drops the `KeyValueStore -> ColdStore` blanket and replaces it with an explicit per-backend impl. `BeaconNodeBackend` no longer impls `ColdStore` directly — its byte-translation is inlined inside the `ColdBackend::Kv` arm where it's actually used. `MemoryStore` keeps an explicit impl (still used as the Cold parameter in tests via `EphemeralHarnessType`). `ColdBackend` is a new enum with `Kv(BeaconNodeBackend)` / `Static(StaticColdStore)` variants, picked at startup from `StoreConfig::cold_backend` (default `Kv`). Production type signatures swap the second `BeaconNodeBackend` slot to `ColdBackend` (3 production sites, 6 test sites, 3 database_manager sites). `StaticColdBackend` wrapper from the previous commit collapsed into a direct `impl ColdStore for StaticColdStore`. Index methods stub `Unsupported` for now — wiring the embedded KV is the next piece. --- .../overflow_lru_cache.rs | 7 +- beacon_node/beacon_chain/src/test_utils.rs | 8 +- .../beacon_chain/tests/op_verification.rs | 4 +- .../beacon_chain/tests/prepare_payload.rs | 10 +- .../beacon_chain/tests/schema_stability.rs | 4 +- beacon_node/beacon_chain/tests/store_tests.rs | 16 +-- beacon_node/client/src/builder.rs | 5 +- beacon_node/src/lib.rs | 4 +- beacon_node/store/src/config.rs | 26 ++++ beacon_node/store/src/database/interface.rs | 117 +++++++++++++++++- beacon_node/store/src/hot_cold_store.rs | 14 ++- beacon_node/store/src/lib.rs | 97 +++++++++------ beacon_node/store/src/memory_store.rs | 36 +++++- beacon_node/store/src/static_cold.rs | 56 +++++++++ database_manager/src/lib.rs | 10 +- 15 files changed, 337 insertions(+), 77 deletions(-) diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs index 4c43d5bd0a3..ab9dd1af1f0 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs @@ -792,7 +792,8 @@ mod test { use logging::create_test_tracing_subscriber; use state_processing::ConsensusContext; use store::{ - ColdStore, HotColdDB, ItemStore, StoreConfig, database::interface::BeaconNodeBackend, + ColdStore, HotColdDB, ItemStore, StoreConfig, + database::interface::{BeaconNodeBackend, ColdBackend}, }; use tempfile::{TempDir, tempdir}; use tracing::info; @@ -804,7 +805,7 @@ mod test { fn get_store_with_spec( db_path: &TempDir, spec: Arc, - ) -> Arc, BeaconNodeBackend>> { + ) -> Arc, ColdBackend>> { let hot_path = db_path.path().join("hot_db"); let cold_path = db_path.path().join("cold_db"); let blobs_path = db_path.path().join("blobs_db"); @@ -949,7 +950,7 @@ mod test { E: EthSpec, T: BeaconChainTypes< HotStore = BeaconNodeBackend, - ColdStore = BeaconNodeBackend, + ColdStore = ColdBackend, EthSpec = E, >, { diff --git a/beacon_node/beacon_chain/src/test_utils.rs b/beacon_node/beacon_chain/src/test_utils.rs index 287e51436fb..5cf36812d51 100644 --- a/beacon_node/beacon_chain/src/test_utils.rs +++ b/beacon_node/beacon_chain/src/test_utils.rs @@ -65,7 +65,7 @@ use std::str::FromStr; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, LazyLock}; use std::time::Duration; -use store::database::interface::BeaconNodeBackend; +use store::database::interface::{BeaconNodeBackend, ColdBackend}; use store::{ColdStore, HotColdDB, ItemStore, MemoryStore, config::StoreConfig}; use task_executor::TaskExecutor; use task_executor::{ShutdownReason, test_utils::TestRuntime}; @@ -124,7 +124,7 @@ pub fn get_kzg(spec: &ChainSpec) -> Arc { pub type BaseHarnessType = Witness; -pub type DiskHarnessType = BaseHarnessType, BeaconNodeBackend>; +pub type DiskHarnessType = BaseHarnessType, ColdBackend>; pub type EphemeralHarnessType = BaseHarnessType, MemoryStore>; pub type BoxedMutator = Box< @@ -350,7 +350,7 @@ impl Builder> { /// Disk store, start from genesis. pub fn fresh_disk_store( mut self, - store: Arc, BeaconNodeBackend>>, + store: Arc, ColdBackend>>, ) -> Self { let validator_keypairs = self .validator_keypairs @@ -384,7 +384,7 @@ impl Builder> { /// Disk store, resume. pub fn resumed_disk_store( mut self, - store: Arc, BeaconNodeBackend>>, + store: Arc, ColdBackend>>, ) -> Self { let mutator = move |builder: BeaconChainBuilder<_>| { builder diff --git a/beacon_node/beacon_chain/tests/op_verification.rs b/beacon_node/beacon_chain/tests/op_verification.rs index 2f97f10745e..df35b729883 100644 --- a/beacon_node/beacon_chain/tests/op_verification.rs +++ b/beacon_node/beacon_chain/tests/op_verification.rs @@ -15,7 +15,7 @@ use state_processing::per_block_processing::errors::{ }; use std::sync::{Arc, LazyLock}; use store::StoreConfig; -use store::database::interface::BeaconNodeBackend; +use store::database::interface::{BeaconNodeBackend, ColdBackend}; use tempfile::{TempDir, tempdir}; use types::*; @@ -27,7 +27,7 @@ static KEYPAIRS: LazyLock> = type E = MinimalEthSpec; type TestHarness = BeaconChainHarness>; -type HotColdDB = store::HotColdDB, BeaconNodeBackend>; +type HotColdDB = store::HotColdDB, ColdBackend>; fn get_store(db_path: &TempDir) -> Arc { let spec = Arc::new(test_spec::()); diff --git a/beacon_node/beacon_chain/tests/prepare_payload.rs b/beacon_node/beacon_chain/tests/prepare_payload.rs index 47dd1ef517e..66c9e4ba734 100644 --- a/beacon_node/beacon_chain/tests/prepare_payload.rs +++ b/beacon_node/beacon_chain/tests/prepare_payload.rs @@ -15,7 +15,7 @@ use state_processing::{ state_advance::complete_state_advance, }; use std::sync::{Arc, LazyLock}; -use store::database::interface::BeaconNodeBackend; +use store::database::interface::{BeaconNodeBackend, ColdBackend}; use store::{HotColdDB, StoreConfig}; use tempfile::{TempDir, tempdir}; use types::*; @@ -34,7 +34,7 @@ type TestHarness = BeaconChainHarness>; fn get_store( db_path: &TempDir, spec: Arc, -) -> Arc, BeaconNodeBackend>> { +) -> Arc, ColdBackend>> { let store_config = StoreConfig { prune_payloads: false, ..StoreConfig::default() @@ -46,7 +46,7 @@ fn get_store_generic( db_path: &TempDir, config: StoreConfig, spec: Arc, -) -> Arc, BeaconNodeBackend>> { +) -> Arc, ColdBackend>> { create_test_tracing_subscriber(); let hot_path = db_path.path().join("chain_db"); let cold_path = db_path.path().join("freezer_db"); @@ -64,7 +64,7 @@ fn get_store_generic( } fn get_harness( - store: Arc, BeaconNodeBackend>>, + store: Arc, ColdBackend>>, validator_count: usize, ) -> TestHarness { // Most tests expect to retain historic states, so we use this as the default. @@ -81,7 +81,7 @@ fn get_harness( } fn get_harness_generic( - store: Arc, BeaconNodeBackend>>, + store: Arc, ColdBackend>>, validator_count: usize, chain_config: ChainConfig, node_custody_type: NodeCustodyType, diff --git a/beacon_node/beacon_chain/tests/schema_stability.rs b/beacon_node/beacon_chain/tests/schema_stability.rs index 8200748ae6c..76e1a0804f6 100644 --- a/beacon_node/beacon_chain/tests/schema_stability.rs +++ b/beacon_node/beacon_chain/tests/schema_stability.rs @@ -11,7 +11,7 @@ use ssz::Encode; use std::sync::{Arc, LazyLock}; use store::{ DBColumn, HotColdDB, StoreConfig, StoreItem, - database::interface::BeaconNodeBackend, + database::interface::{BeaconNodeBackend, ColdBackend}, hot_cold_store::Split, metadata::{DataColumnCustodyInfo, DataColumnInfo}, }; @@ -20,7 +20,7 @@ use tempfile::{TempDir, tempdir}; use types::{ChainSpec, Hash256, MainnetEthSpec, Slot}; type E = MainnetEthSpec; -type Store = Arc, BeaconNodeBackend>>; +type Store = Arc, ColdBackend>>; type TestHarness = BeaconChainHarness>; const VALIDATOR_COUNT: usize = 32; diff --git a/beacon_node/beacon_chain/tests/store_tests.rs b/beacon_node/beacon_chain/tests/store_tests.rs index 06a77e9047f..25aa465b8ef 100644 --- a/beacon_node/beacon_chain/tests/store_tests.rs +++ b/beacon_node/beacon_chain/tests/store_tests.rs @@ -43,7 +43,7 @@ use std::convert::TryInto; use std::str::FromStr; use std::sync::{Arc, LazyLock}; use std::time::Duration; -use store::database::interface::BeaconNodeBackend; +use store::database::interface::{BeaconNodeBackend, ColdBackend}; use store::metadata::{CURRENT_SCHEMA_VERSION, SchemaVersion}; use store::{ BlobInfo, DBColumn, HotColdDB, StoreConfig, @@ -68,7 +68,7 @@ static KEYPAIRS: LazyLock> = type E = MinimalEthSpec; type TestHarness = BeaconChainHarness>; -fn get_store(db_path: &TempDir) -> Arc, BeaconNodeBackend>> { +fn get_store(db_path: &TempDir) -> Arc, ColdBackend>> { let store_config = StoreConfig { prune_payloads: false, ..StoreConfig::default() @@ -80,7 +80,7 @@ fn get_store_generic( db_path: &TempDir, config: StoreConfig, spec: ChainSpec, -) -> Arc, BeaconNodeBackend>> { +) -> Arc, ColdBackend>> { create_test_tracing_subscriber(); let hot_path = db_path.path().join("chain_db"); let cold_path = db_path.path().join("freezer_db"); @@ -98,7 +98,7 @@ fn get_store_generic( } fn get_harness( - store: Arc, BeaconNodeBackend>>, + store: Arc, ColdBackend>>, validator_count: usize, ) -> TestHarness { // Most tests expect to retain historic states, so we use this as the default. @@ -115,7 +115,7 @@ fn get_harness( } fn get_harness_import_all_data_columns( - store: Arc, BeaconNodeBackend>>, + store: Arc, ColdBackend>>, validator_count: usize, ) -> TestHarness { // Most tests expect to retain historic states, so we use this as the default. @@ -133,7 +133,7 @@ fn get_harness_import_all_data_columns( } fn get_harness_generic( - store: Arc, BeaconNodeBackend>>, + store: Arc, ColdBackend>>, validator_count: usize, chain_config: ChainConfig, node_custody_type: NodeCustodyType, @@ -167,7 +167,7 @@ fn check_db_invariants(harness: &TestHarness) { } fn get_states_descendant_of_block( - store: &HotColdDB, BeaconNodeBackend>, + store: &HotColdDB, ColdBackend>, block_root: Hash256, ) -> Vec<(Hash256, Slot)> { let summaries = store.load_hot_state_summaries().unwrap(); @@ -5782,7 +5782,7 @@ async fn test_gloas_hot_state_hierarchy() { /// Check that the HotColdDB's split_slot is equal to the start slot of the last finalized epoch. fn check_split_slot( harness: &TestHarness, - store: Arc, BeaconNodeBackend>>, + store: Arc, ColdBackend>>, ) { let split_slot = store.get_split_slot(); assert_eq!( diff --git a/beacon_node/client/src/builder.rs b/beacon_node/client/src/builder.rs index 6d5dacd6707..bf4467a2646 100644 --- a/beacon_node/client/src/builder.rs +++ b/beacon_node/client/src/builder.rs @@ -40,7 +40,7 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::Duration; use std::time::{SystemTime, UNIX_EPOCH}; -use store::database::interface::BeaconNodeBackend; +use store::database::interface::{BeaconNodeBackend, ColdBackend}; use timer::spawn_timer; use tracing::{debug, info, instrument, warn}; use types::data::compute_ordered_custody_column_indices; @@ -842,8 +842,7 @@ where } } -impl - ClientBuilder, BeaconNodeBackend>> +impl ClientBuilder, ColdBackend>> where TSlotClock: SlotClock + 'static, E: EthSpec + 'static, diff --git a/beacon_node/src/lib.rs b/beacon_node/src/lib.rs index e33da17e266..e1ab0c5a60e 100644 --- a/beacon_node/src/lib.rs +++ b/beacon_node/src/lib.rs @@ -14,13 +14,13 @@ use network_utils::enr_ext::peer_id_to_node_id; use slasher::{DatabaseBackendOverride, Slasher}; use std::ops::{Deref, DerefMut}; use std::sync::Arc; -use store::database::interface::BeaconNodeBackend; +use store::database::interface::{BeaconNodeBackend, ColdBackend}; use tracing::{info, warn}; use types::{ChainSpec, Epoch, EthSpec, ForkName}; /// A type-alias to the tighten the definition of a production-intended `Client`. pub type ProductionClient = - Client, BeaconNodeBackend>>; + Client, ColdBackend>>; /// The beacon node `Client` that is used in production. /// diff --git a/beacon_node/store/src/config.rs b/beacon_node/store/src/config.rs index 29705283fa9..721451bf559 100644 --- a/beacon_node/store/src/config.rs +++ b/beacon_node/store/src/config.rs @@ -55,6 +55,8 @@ pub struct StoreConfig { pub prune_payloads: bool, /// Database backend to use. pub backend: DatabaseBackend, + /// Which cold backend to use for the freezer DB. + pub cold_backend: ColdBackendKind, /// State diff hierarchy. pub hierarchy_config: HierarchyConfig, /// Whether to prune blobs older than the blob data availability boundary. @@ -116,6 +118,7 @@ impl Default for StoreConfig { compact_on_prune: true, prune_payloads: true, backend: DEFAULT_BACKEND, + cold_backend: ColdBackendKind::default(), hierarchy_config: HierarchyConfig::default(), prune_blobs: true, epochs_per_blob_prune: DEFAULT_EPOCHS_PER_BLOB_PRUNE, @@ -276,3 +279,26 @@ pub enum DatabaseBackend { #[cfg(feature = "redb")] Redb, } + +/// Cold backend selector. +#[derive( + Debug, + Default, + Clone, + Copy, + Eq, + PartialEq, + Serialize, + Deserialize, + Display, + EnumString, + VariantNames, +)] +#[strum(serialize_all = "lowercase")] +pub enum ColdBackendKind { + /// Cold data lives in the same KV backend as the hot DB. Default. + #[default] + Kv, + /// Cold data lives in slot-keyed static files. + Static, +} diff --git a/beacon_node/store/src/database/interface.rs b/beacon_node/store/src/database/interface.rs index 5646f1179c8..65a3ee4581f 100644 --- a/beacon_node/store/src/database/interface.rs +++ b/beacon_node/store/src/database/interface.rs @@ -2,11 +2,16 @@ use crate::database::leveldb_impl; #[cfg(feature = "redb")] use crate::database::redb_impl; -use crate::{ColumnIter, ColumnKeyIter, DBColumn, Error, ItemStore, Key, KeyValueStore, metrics}; +use crate::{ + ColdStore, ColumnIter, ColumnKeyIter, DBColumn, DBColumnCold, DBColumnColdIndex, Error, + IndexIter, ItemStore, Key, KeyValueStore, SlotIter, StaticColdStore, metrics, +}; use crate::{KeyValueStoreOp, StoreConfig, config::DatabaseBackend}; +use ssz::{Decode, Encode}; use std::collections::HashSet; use std::path::Path; use types::EthSpec; +use types::{Hash256, Slot}; pub enum BeaconNodeBackend { #[cfg(feature = "leveldb")] @@ -17,6 +22,116 @@ pub enum BeaconNodeBackend { impl ItemStore for BeaconNodeBackend {} +/// Runtime selector for the cold backend. +/// +/// Held by the production `HotColdDB` so the cold strategy can be picked at +/// startup based on `StoreConfig`. `Kv` keeps the existing behaviour +/// (everything in the KV store); `Static` uses the slot-keyed file backend. +/// +/// The `Kv` arm inlines the byte-translation (slot/root → bytes) directly here +/// rather than going through an intermediate `impl ColdStore for BeaconNodeBackend` +/// — `BeaconNodeBackend` is only ever a `ColdStore` via this enum, so the +/// indirection isn't earning anything. +pub enum ColdBackend { + Kv(BeaconNodeBackend), + Static(StaticColdStore), +} + +impl ColdStore for ColdBackend { + fn get(&self, c: DBColumnCold, slot: Slot) -> Result>, Error> { + match self { + Self::Kv(db) => db.get_bytes(c.db_column(), &slot.as_u64().to_be_bytes()), + Self::Static(db) => ColdStore::::get(db, c, slot), + } + } + fn put_batch(&self, c: DBColumnCold, items: Vec<(Slot, Vec)>) -> Result<(), Error> { + match self { + Self::Kv(db) => { + let col = c.db_column(); + let ops = items + .into_iter() + .map(|(slot, value)| { + crate::KeyValueStoreOp::PutKeyValue( + col, + slot.as_u64().to_be_bytes().to_vec(), + value, + ) + }) + .collect(); + db.do_atomically(ops) + } + Self::Static(db) => ColdStore::::put_batch(db, c, items), + } + } + fn contains(&self, c: DBColumnCold, slot: Slot) -> Result { + match self { + Self::Kv(db) => db.key_exists(c.db_column(), &slot.as_u64().to_be_bytes()), + Self::Static(db) => ColdStore::::contains(db, c, slot), + } + } + fn iter_from(&self, c: DBColumnCold, from: Slot) -> SlotIter<'_> { + match self { + Self::Kv(db) => Box::new( + db.iter_column_from::>(c.db_column(), &from.as_u64().to_be_bytes()) + .map(|res| { + res.and_then(|(key_bytes, value)| { + let bytes: [u8; 8] = + key_bytes.try_into().map_err(|_| Error::InvalidBytes)?; + Ok((Slot::new(u64::from_be_bytes(bytes)), value)) + }) + }), + ), + Self::Static(db) => ColdStore::::iter_from(db, c, from), + } + } + fn get_index(&self, c: DBColumnColdIndex, root: Hash256) -> Result, Error> { + match self { + Self::Kv(db) => Ok(db + .get_bytes(c.db_column(), root.as_slice())? + .map(|bytes| Slot::from_ssz_bytes(&bytes)) + .transpose()?), + Self::Static(db) => ColdStore::::get_index(db, c, root), + } + } + fn put_index_batch( + &self, + c: DBColumnColdIndex, + items: Vec<(Hash256, Slot)>, + ) -> Result<(), Error> { + match self { + Self::Kv(db) => { + let col = c.db_column(); + let ops = items + .into_iter() + .map(|(root, slot)| { + crate::KeyValueStoreOp::PutKeyValue( + col, + root.as_slice().to_vec(), + slot.as_ssz_bytes(), + ) + }) + .collect(); + db.do_atomically(ops) + } + Self::Static(db) => ColdStore::::put_index_batch(db, c, items), + } + } + fn iter_index(&self, c: DBColumnColdIndex) -> IndexIter<'_> { + match self { + Self::Kv(db) => Box::new(db.iter_column::(c.db_column()).map(|res| { + res.and_then(|(root, value)| Ok((root, Slot::from_ssz_bytes(&value)?))) + })), + Self::Static(db) => ColdStore::::iter_index(db, c), + } + } + fn sync(&self) -> Result<(), Error> { + match self { + Self::Kv(db) => KeyValueStore::sync(db), + Self::Static(db) => ColdStore::::sync(db), + } + } +} + impl KeyValueStore for BeaconNodeBackend { fn get_bytes(&self, column: DBColumn, key: &[u8]) -> Result>, Error> { match self { diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index af0b4773357..581c398bb4b 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -1,5 +1,5 @@ use crate::config::{OnDiskStoreConfig, StoreConfig}; -use crate::database::interface::BeaconNodeBackend; +use crate::database::interface::{BeaconNodeBackend, ColdBackend}; use crate::forwards_iter::{HybridForwardsBlockRootsIterator, HybridForwardsStateRootsIterator}; use crate::hdiff::{HDiff, HDiffBuffer, HierarchyConfig, HierarchyModuli, StorageStrategy}; use crate::historic_state_cache::HistoricStateCache; @@ -12,6 +12,7 @@ use crate::metadata::{ SCHEMA_VERSION_KEY, SPLIT_KEY, STATE_UPPER_LIMIT_NO_RETAIN, SchemaVersion, }; use crate::state_cache::{PutStateOutcome, StateCache}; +use crate::static_cold::StaticColdStore; use crate::{ BlobSidecarListFromRoot, ColdStore, DBColumn, DBColumnCold, DBColumnColdIndex, DatabaseBlock, Error, ItemStore, KeyValueStoreOp, StoreItem, StoreOp, get_data_column_key, @@ -272,7 +273,7 @@ impl HotColdDB, MemoryStore> { } } -impl HotColdDB, BeaconNodeBackend> { +impl HotColdDB, ColdBackend> { /// Open a new or existing database, with the given paths to the hot and cold DBs. /// /// The `migrate_schema` function is passed in so that the parent `BeaconChain` can provide @@ -302,7 +303,14 @@ impl HotColdDB, BeaconNodeBackend> { blob_info: RwLock::new(BlobInfo::default()), data_column_info: RwLock::new(DataColumnInfo::default()), blobs_db: BeaconNodeBackend::open(&config, blobs_db_path)?, - cold_db: BeaconNodeBackend::open(&config, cold_path)?, + cold_db: match config.cold_backend { + crate::config::ColdBackendKind::Kv => { + ColdBackend::Kv(BeaconNodeBackend::open(&config, cold_path)?) + } + crate::config::ColdBackendKind::Static => { + ColdBackend::Static(StaticColdStore::open(cold_path)?) + } + }, hot_db, block_cache: NonZeroUsize::new(config.block_cache_size) .map(BlockCache::new) diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index 856189ce479..6f5fa4d84af 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -163,6 +163,11 @@ pub trait ColdStore: Sync + Send + Sized + 'static { // Slot-keyed bulk data. fn get(&self, column: DBColumnCold, slot: Slot) -> Result>, Error>; + /// Append `items` to `column`. Slots within `items` must be strictly + /// ascending and strictly greater than every slot already written to the + /// column — the static-file backend rejects out-of-order puts. KV + /// backends accept any order, so this is enforced by the caller, not the + /// trait. fn put_batch(&self, column: DBColumnCold, items: Vec<(Slot, Vec)>) -> Result<(), Error>; fn contains(&self, column: DBColumnCold, slot: Slot) -> Result; @@ -183,14 +188,26 @@ pub trait ColdStore: Sync + Send + Sized + 'static { fn sync(&self) -> Result<(), Error>; } -/// Every `KeyValueStore` is a `ColdStore`: the slot/root keys round-trip through -/// the underlying byte-keyed columns. -impl> ColdStore for T { - fn get(&self, column: DBColumnCold, slot: Slot) -> Result>, Error> { - KeyValueStore::get_bytes(self, column.db_column(), &slot.as_u64().to_be_bytes()) +/// Helpers used by both KV-backed `ColdStore` impls (`BeaconNodeBackend`, +/// `MemoryStore`). Translation between `Slot`/`Hash256` keys and the byte-keyed +/// `KeyValueStore` API is identical regardless of which KV is underneath, so we +/// extract it here and let each backend's `impl ColdStore` thunk through. +pub(crate) mod kv_cold_store { + use super::*; + + pub fn get>( + kv: &T, + column: DBColumnCold, + slot: Slot, + ) -> Result>, Error> { + kv.get_bytes(column.db_column(), &slot.as_u64().to_be_bytes()) } - fn put_batch(&self, column: DBColumnCold, items: Vec<(Slot, Vec)>) -> Result<(), Error> { + pub fn put_batch>( + kv: &T, + column: DBColumnCold, + items: Vec<(Slot, Vec)>, + ) -> Result<(), Error> { let col = column.db_column(); let ops = items .into_iter() @@ -198,43 +215,50 @@ impl> ColdStore for T { KeyValueStoreOp::PutKeyValue(col, slot.as_u64().to_be_bytes().to_vec(), value) }) .collect(); - KeyValueStore::do_atomically(self, ops) + kv.do_atomically(ops) } - fn contains(&self, column: DBColumnCold, slot: Slot) -> Result { - KeyValueStore::key_exists(self, column.db_column(), &slot.as_u64().to_be_bytes()) + pub fn contains>( + kv: &T, + column: DBColumnCold, + slot: Slot, + ) -> Result { + kv.key_exists(column.db_column(), &slot.as_u64().to_be_bytes()) } - fn iter_from(&self, column: DBColumnCold, from: Slot) -> SlotIter<'_> { + pub fn iter_from>( + kv: &T, + column: DBColumnCold, + from: Slot, + ) -> SlotIter<'_> { Box::new( - KeyValueStore::iter_column_from::>( - self, - column.db_column(), - &from.as_u64().to_be_bytes(), - ) - .map(|res| { - res.and_then(|(key_bytes, value)| { - let bytes: [u8; 8] = key_bytes.try_into().map_err(|_| Error::InvalidBytes)?; - Ok((Slot::new(u64::from_be_bytes(bytes)), value)) - }) - }), + kv.iter_column_from::>(column.db_column(), &from.as_u64().to_be_bytes()) + .map(|res| { + res.and_then(|(key_bytes, value)| { + let bytes: [u8; 8] = + key_bytes.try_into().map_err(|_| Error::InvalidBytes)?; + Ok((Slot::new(u64::from_be_bytes(bytes)), value)) + }) + }), ) } // `Slot::as_ssz_bytes()` is byte-identical to the legacy // `ColdStateSummary { slot }` wrapper, so existing dbs round-trip without // migration. Pinned by `ssz_compat_with_legacy_summary`. - - fn get_index(&self, column: DBColumnColdIndex, root: Hash256) -> Result, Error> { - Ok( - KeyValueStore::get_bytes(self, column.db_column(), root.as_slice())? - .map(|bytes| Slot::from_ssz_bytes(&bytes)) - .transpose()?, - ) + pub fn get_index>( + kv: &T, + column: DBColumnColdIndex, + root: Hash256, + ) -> Result, Error> { + Ok(kv + .get_bytes(column.db_column(), root.as_slice())? + .map(|bytes| Slot::from_ssz_bytes(&bytes)) + .transpose()?) } - fn put_index_batch( - &self, + pub fn put_index_batch>( + kv: &T, column: DBColumnColdIndex, items: Vec<(Hash256, Slot)>, ) -> Result<(), Error> { @@ -245,19 +269,18 @@ impl> ColdStore for T { KeyValueStoreOp::PutKeyValue(col, root.as_slice().to_vec(), slot.as_ssz_bytes()) }) .collect(); - KeyValueStore::do_atomically(self, ops) + kv.do_atomically(ops) } - fn iter_index(&self, column: DBColumnColdIndex) -> IndexIter<'_> { + pub fn iter_index>( + kv: &T, + column: DBColumnColdIndex, + ) -> IndexIter<'_> { Box::new( - KeyValueStore::iter_column::(self, column.db_column()) + kv.iter_column::(column.db_column()) .map(|res| res.and_then(|(root, value)| Ok((root, Slot::from_ssz_bytes(&value)?)))), ) } - - fn sync(&self) -> Result<(), Error> { - KeyValueStore::sync(self) - } } pub trait Key: Sized + 'static { diff --git a/beacon_node/store/src/memory_store.rs b/beacon_node/store/src/memory_store.rs index 3f127aabfb1..a60a7cc4553 100644 --- a/beacon_node/store/src/memory_store.rs +++ b/beacon_node/store/src/memory_store.rs @@ -1,6 +1,7 @@ use crate::{ - ColumnIter, ColumnKeyIter, DBColumn, Error, ItemStore, Key, KeyValueStore, KeyValueStoreOp, - get_key_for_col, hot_cold_store::BytesKey, + ColdStore, ColumnIter, ColumnKeyIter, DBColumn, DBColumnCold, DBColumnColdIndex, Error, + IndexIter, ItemStore, Key, KeyValueStore, KeyValueStoreOp, SlotIter, get_key_for_col, + hot_cold_store::BytesKey, kv_cold_store, }; use parking_lot::RwLock; use std::collections::{BTreeMap, HashSet}; @@ -151,3 +152,34 @@ impl KeyValueStore for MemoryStore { } impl ItemStore for MemoryStore {} + +impl ColdStore for MemoryStore { + fn get(&self, c: DBColumnCold, slot: Slot) -> Result>, Error> { + kv_cold_store::get(self, c, slot) + } + fn put_batch(&self, c: DBColumnCold, items: Vec<(Slot, Vec)>) -> Result<(), Error> { + kv_cold_store::put_batch(self, c, items) + } + fn contains(&self, c: DBColumnCold, slot: Slot) -> Result { + kv_cold_store::contains(self, c, slot) + } + fn iter_from(&self, c: DBColumnCold, from: Slot) -> SlotIter<'_> { + kv_cold_store::iter_from(self, c, from) + } + fn get_index(&self, c: DBColumnColdIndex, root: Hash256) -> Result, Error> { + kv_cold_store::get_index(self, c, root) + } + fn put_index_batch( + &self, + c: DBColumnColdIndex, + items: Vec<(Hash256, Slot)>, + ) -> Result<(), Error> { + kv_cold_store::put_index_batch(self, c, items) + } + fn iter_index(&self, c: DBColumnColdIndex) -> IndexIter<'_> { + kv_cold_store::iter_index(self, c) + } + fn sync(&self) -> Result<(), Error> { + KeyValueStore::sync(self) + } +} diff --git a/beacon_node/store/src/static_cold.rs b/beacon_node/store/src/static_cold.rs index cbe8e47c662..866772ce02d 100644 --- a/beacon_node/store/src/static_cold.rs +++ b/beacon_node/store/src/static_cold.rs @@ -116,6 +116,7 @@ pub enum StaticColdStoreError { Io(io::Error), Compression(io::Error), Invalid(String), + Unsupported(&'static str), } impl fmt::Display for StaticColdStoreError { @@ -124,6 +125,7 @@ impl fmt::Display for StaticColdStoreError { Self::Io(e) => write!(f, "static cold store io error: {e}"), Self::Compression(e) => write!(f, "static cold store compression error: {e}"), Self::Invalid(message) => write!(f, "static cold store invalid data: {message}"), + Self::Unsupported(op) => write!(f, "static cold store does not support {op}"), } } } @@ -534,3 +536,57 @@ fn sync_dir(path: &Path) -> StoreResult<()> { dir.sync_all()?; Ok(()) } + +// `StaticColdStore` only handles slot-keyed bulk; index methods stub out +// with `Unsupported` for now. Wiring root indices is the follow-up tracked +// in `TODO-static-block-storage.md`. +impl crate::ColdStore for StaticColdStore { + fn get(&self, c: DBColumnCold, slot: Slot) -> Result>, crate::Error> { + StaticColdStore::get(self, c, slot).map_err(Into::into) + } + + fn put_batch(&self, c: DBColumnCold, items: Vec<(Slot, Vec)>) -> Result<(), crate::Error> { + for (slot, value) in items { + self.put(c, slot, &value)?; + } + Ok(()) + } + + fn contains(&self, c: DBColumnCold, slot: Slot) -> Result { + StaticColdStore::contains(self, c, slot).map_err(Into::into) + } + + fn iter_from(&self, _c: DBColumnCold, _from: Slot) -> crate::SlotIter<'_> { + Box::new(std::iter::once(Err(StaticColdStoreError::Unsupported( + "iter_from", + ) + .into()))) + } + + fn get_index( + &self, + _c: crate::DBColumnColdIndex, + _root: types::Hash256, + ) -> Result, crate::Error> { + Err(StaticColdStoreError::Unsupported("get_index").into()) + } + + fn put_index_batch( + &self, + _c: crate::DBColumnColdIndex, + _items: Vec<(types::Hash256, Slot)>, + ) -> Result<(), crate::Error> { + Err(StaticColdStoreError::Unsupported("put_index_batch").into()) + } + + fn iter_index(&self, _c: crate::DBColumnColdIndex) -> crate::IndexIter<'_> { + Box::new(std::iter::once(Err(StaticColdStoreError::Unsupported( + "iter_index", + ) + .into()))) + } + + fn sync(&self) -> Result<(), crate::Error> { + Ok(()) + } +} diff --git a/database_manager/src/lib.rs b/database_manager/src/lib.rs index 2e082bc383d..f6552c6b7b0 100644 --- a/database_manager/src/lib.rs +++ b/database_manager/src/lib.rs @@ -16,7 +16,7 @@ use std::path::PathBuf; use store::KeyValueStore; use store::{ DBColumn, HotColdDB, - database::interface::BeaconNodeBackend, + database::interface::{BeaconNodeBackend, ColdBackend}, errors::Error, metadata::{CURRENT_SCHEMA_VERSION, SchemaVersion}, }; @@ -54,7 +54,7 @@ pub fn display_db_version( let blobs_path = client_config.get_blobs_db_path(); let mut version = CURRENT_SCHEMA_VERSION; - HotColdDB::, BeaconNodeBackend>::open( + HotColdDB::, ColdBackend>::open( &hot_path, &cold_path, &blobs_path, @@ -308,7 +308,7 @@ pub fn migrate_db( let mut from = CURRENT_SCHEMA_VERSION; let to = migrate_config.to; - let db = HotColdDB::, BeaconNodeBackend>::open( + let db = HotColdDB::, ColdBackend>::open( &hot_path, &cold_path, &blobs_path, @@ -338,7 +338,7 @@ pub fn prune_payloads( let cold_path = client_config.get_freezer_db_path(); let blobs_path = client_config.get_blobs_db_path(); - let db = HotColdDB::, BeaconNodeBackend>::open( + let db = HotColdDB::, ColdBackend>::open( &hot_path, &cold_path, &blobs_path, @@ -362,7 +362,7 @@ pub fn prune_blobs( let cold_path = client_config.get_freezer_db_path(); let blobs_path = client_config.get_blobs_db_path(); - let db = HotColdDB::, BeaconNodeBackend>::open( + let db = HotColdDB::, ColdBackend>::open( &hot_path, &cold_path, &blobs_path, From 295f9ba23f7b04b34512dca4ec65e80a5c06c977 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 22:35:12 +0200 Subject: [PATCH 16/24] Wire embedded KV into static cold + idempotent re-put Genesis sync against the static cold backend was failing for two reasons: 1. `BeaconColdStateSummary` and friends are root-keyed indices; the static files are slot-keyed. The previous `Unsupported` stubs blocked the very first migration. Embed a `BeaconNodeBackend` at `/index/` and serve `get_index` / `put_index_batch` / `iter_index` from it. Forwards iteration over slot-keyed columns (`iter_from`) is now also implemented by walking the column's `.off` sidecar. 2. `BeaconChainBuilder::genesis` pre-writes the genesis block_root to cold `BlockRoots` at slot 0, then the first migration writes the same (slot, root) again. KV cold accepts the overwrite; the static backend's strict-ascending check rejected it. `Column::put` now treats a re-put of an identical value at the current highest slot as a no-op, and errors only on a value mismatch (a real bug). Threads `StoreConfig` into `StaticColdStore::open` so the embedded KV picks up the same backend (`leveldb` / `redb`) and tuning as the hot/blobs DBs. Adds `genesis_sync_static_cold` covering ~1000 finalized blocks with the static backend and a load of every cold state through the new index. --- beacon_node/beacon_chain/tests/store_tests.rs | 49 +++++++ beacon_node/store/src/database/interface.rs | 2 +- beacon_node/store/src/hot_cold_store.rs | 2 +- beacon_node/store/src/static_cold.rs | 138 +++++++++++++----- 4 files changed, 156 insertions(+), 35 deletions(-) diff --git a/beacon_node/beacon_chain/tests/store_tests.rs b/beacon_node/beacon_chain/tests/store_tests.rs index 25aa465b8ef..f69e1dc43b4 100644 --- a/beacon_node/beacon_chain/tests/store_tests.rs +++ b/beacon_node/beacon_chain/tests/store_tests.rs @@ -47,6 +47,7 @@ use store::database::interface::{BeaconNodeBackend, ColdBackend}; use store::metadata::{CURRENT_SCHEMA_VERSION, SchemaVersion}; use store::{ BlobInfo, DBColumn, HotColdDB, StoreConfig, + config::ColdBackendKind, hdiff::HierarchyConfig, iter::{BlockRootsIterator, StateRootsIterator}, }; @@ -6152,3 +6153,51 @@ fn get_blocks( fn clone_block(block: &AvailableBlock) -> AvailableBlock { block.__clone_without_recv().unwrap() } + +/// Genesis-sync ~1000 blocks against a `HotColdDB` configured with the static +/// cold backend. Exercises every code path that writes/reads cold state during +/// migration over many finalizations. +#[tokio::test] +async fn genesis_sync_static_cold() { + let num_blocks_produced: u64 = 1000; + let db_path = tempdir().unwrap(); + let store_config = StoreConfig { + prune_payloads: false, + cold_backend: ColdBackendKind::Static, + ..StoreConfig::default() + }; + let store = get_store_generic(&db_path, store_config, test_spec::()); + let harness = get_harness(store.clone(), LOW_VALIDATOR_COUNT); + + harness + .extend_chain( + num_blocks_produced as usize, + BlockStrategy::OnCanonicalHead, + AttestationStrategy::AllValidators, + ) + .await; + + check_finalization(&harness, num_blocks_produced); + check_split_slot(&harness, store.clone()); + + // Spot-check: load every state in the canonical chain prior to the split + // slot from the static cold backend (exercises the root-keyed index + + // slot-keyed bulk reads end-to-end). + let split_slot = store.get_split_slot(); + let head_state = harness.get_current_state(); + let cold_state_roots: Vec<_> = StateRootsIterator::new(&store, &head_state) + .filter_map(Result::ok) + .filter(|(_, slot)| *slot < split_slot) + .collect(); + assert!( + !cold_state_roots.is_empty(), + "expected at least one cold state to verify" + ); + for (state_root, slot) in cold_state_roots { + let state = store + .get_state(&state_root, Some(slot), false) + .expect("cold state load should not error") + .unwrap_or_else(|| panic!("missing cold state at slot {slot}")); + assert_eq!(state.slot(), slot); + } +} diff --git a/beacon_node/store/src/database/interface.rs b/beacon_node/store/src/database/interface.rs index 65a3ee4581f..11bd64f034b 100644 --- a/beacon_node/store/src/database/interface.rs +++ b/beacon_node/store/src/database/interface.rs @@ -34,7 +34,7 @@ impl ItemStore for BeaconNodeBackend {} /// indirection isn't earning anything. pub enum ColdBackend { Kv(BeaconNodeBackend), - Static(StaticColdStore), + Static(StaticColdStore), } impl ColdStore for ColdBackend { diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 581c398bb4b..3c511ec02f1 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -308,7 +308,7 @@ impl HotColdDB, ColdBackend> { ColdBackend::Kv(BeaconNodeBackend::open(&config, cold_path)?) } crate::config::ColdBackendKind::Static => { - ColdBackend::Static(StaticColdStore::open(cold_path)?) + ColdBackend::Static(StaticColdStore::open(cold_path, &config)?) } }, hot_db, diff --git a/beacon_node/store/src/static_cold.rs b/beacon_node/store/src/static_cold.rs index 866772ce02d..0f0115e7f1c 100644 --- a/beacon_node/store/src/static_cold.rs +++ b/beacon_node/store/src/static_cold.rs @@ -19,7 +19,9 @@ //! //! See `specs/static-blocks.md` for the on-disk format. -use crate::DBColumnCold; +use crate::config::StoreConfig; +use crate::database::interface::BeaconNodeBackend; +use crate::{DBColumnCold, KeyValueStore}; use parking_lot::Mutex; use snap::{read::FrameDecoder, write::FrameEncoder}; use std::{ @@ -27,10 +29,11 @@ use std::{ fmt, fs::{self, File, OpenOptions}, io::{self, Read, Seek, SeekFrom, Write}, + marker::PhantomData, path::{Path, PathBuf}, }; use strum::IntoEnumIterator; -use types::Slot; +use types::{EthSpec, Slot}; const SLOTS_PER_FILE: u64 = 8192; const OFFSET_SIZE: u64 = 8; @@ -101,12 +104,16 @@ fn column_config(column: DBColumnCold) -> ColumnConfig { } } -#[derive(Debug)] -pub struct StaticColdStore { +pub struct StaticColdStore { /// All cold columns the static archive backs, opened eagerly at boot. /// Frozen after construction; per-column writer state is locked inside /// each `Column`. columns: HashMap, + /// Embedded KV for root-keyed indices (e.g. `ColdStateSummary`). The + /// slot-keyed file backend is the bulk archive; this side-table lets us + /// look up `state_root → slot` without scanning the bulk files. + index_db: BeaconNodeBackend, + _phantom: PhantomData, } type StoreResult = std::result::Result; @@ -136,18 +143,24 @@ impl From for StaticColdStoreError { } } -impl StaticColdStore { +impl StaticColdStore { /// Open the archive rooted at `path`. Every cold column is opened eagerly /// so subsequent reads/writes are pure hashmap lookups with no I/O on the - /// hot path. - pub fn open(path: &Path) -> StoreResult { - fs::create_dir_all(path)?; + /// hot path. An embedded KV is opened at `/index/` for the + /// root-keyed indices. + pub fn open(path: &Path, config: &StoreConfig) -> Result { + fs::create_dir_all(path).map_err(StaticColdStoreError::Io)?; let mut columns = HashMap::new(); for column in DBColumnCold::iter() { let cfg = column_config(column); columns.insert(column, Column::open(path.join(cfg.subdir), cfg)?); } - Ok(Self { columns }) + let index_db = BeaconNodeBackend::open(config, &path.join("index"))?; + Ok(Self { + columns, + index_db, + _phantom: PhantomData, + }) } /// Read the value at `(column, slot)`, if present. @@ -156,7 +169,10 @@ impl StaticColdStore { } /// Durably store `bytes` at `(column, slot)`. Slots within a column must - /// arrive strictly ascending. + /// arrive strictly ascending. A re-put of an identical value at the + /// current highest slot is treated as a no-op so callers that pre-write + /// a slot at startup (e.g. genesis block_root) don't trip the + /// out-of-order check on the first migration. pub fn put(&self, column: DBColumnCold, slot: Slot, bytes: &[u8]) -> StoreResult<()> { self.columns[&column].put(slot, bytes) } @@ -227,7 +243,13 @@ impl Column { if slot > highest_written_slot { return Ok(None); } + self.read_record(slot) + } + /// Read a record at `slot` without consulting the writer mutex. Used by + /// callers that already hold the lock (`put` for the idempotency check) + /// or have another reason to know the slot is committed. + fn read_record(&self, slot: Slot) -> StoreResult>> { let file_id = file_id(slot); let offset = self.read_offset(file_id, slot)?; if offset == 0 { @@ -274,10 +296,29 @@ impl Column { fn put(&self, slot: Slot, bytes: &[u8]) -> StoreResult<()> { let mut highest_written_slot = self.highest_written_slot.lock(); - if highest_written_slot.is_some_and(|highest| slot <= highest) { - return Err(StaticColdStoreError::Invalid( - "static cold put out of order".into(), - )); + if let Some(highest) = *highest_written_slot { + if slot < highest { + return Err(StaticColdStoreError::Invalid( + "static cold put out of order".into(), + )); + } + if slot == highest { + // Idempotent re-put: tolerate a write of the identical value + // at the most recently committed slot. Errors on a value + // mismatch — that's a real bug, not a duplicate. Read the + // existing record without re-locking the writer mutex. + let existing = self.read_record(slot)?.ok_or_else(|| { + StaticColdStoreError::Invalid( + "static cold missing record at highest slot".into(), + ) + })?; + if existing == bytes { + return Ok(()); + } + return Err(StaticColdStoreError::Invalid( + "static cold re-put with mismatched value".into(), + )); + } } let payload = if self.config.compression { @@ -537,10 +578,9 @@ fn sync_dir(path: &Path) -> StoreResult<()> { Ok(()) } -// `StaticColdStore` only handles slot-keyed bulk; index methods stub out -// with `Unsupported` for now. Wiring root indices is the follow-up tracked -// in `TODO-static-block-storage.md`. -impl crate::ColdStore for StaticColdStore { +// Slot-keyed columns are served from the static files; root-keyed index +// columns are served from the embedded KV at `/index/`. +impl crate::ColdStore for StaticColdStore { fn get(&self, c: DBColumnCold, slot: Slot) -> Result>, crate::Error> { StaticColdStore::get(self, c, slot).map_err(Into::into) } @@ -556,37 +596,69 @@ impl crate::ColdStore for StaticColdStore { StaticColdStore::contains(self, c, slot).map_err(Into::into) } - fn iter_from(&self, _c: DBColumnCold, _from: Slot) -> crate::SlotIter<'_> { - Box::new(std::iter::once(Err(StaticColdStoreError::Unsupported( - "iter_from", + fn iter_from(&self, c: DBColumnCold, from: Slot) -> crate::SlotIter<'_> { + let column = &self.columns[&c]; + let Some(highest) = *column.highest_written_slot.lock() else { + return Box::new(std::iter::empty()); + }; + if from > highest { + return Box::new(std::iter::empty()); + } + let column_ref = column; + Box::new( + (from.as_u64()..=highest.as_u64()) + .map(Slot::new) + .filter_map(move |slot| match column_ref.read_record(slot) { + Ok(Some(value)) => Some(Ok((slot, value))), + Ok(None) => None, + Err(e) => Some(Err(e.into())), + }), ) - .into()))) } fn get_index( &self, - _c: crate::DBColumnColdIndex, - _root: types::Hash256, + c: crate::DBColumnColdIndex, + root: types::Hash256, ) -> Result, crate::Error> { - Err(StaticColdStoreError::Unsupported("get_index").into()) + use ssz::Decode; + Ok(self + .index_db + .get_bytes(c.db_column(), root.as_slice())? + .map(|bytes| Slot::from_ssz_bytes(&bytes)) + .transpose()?) } fn put_index_batch( &self, - _c: crate::DBColumnColdIndex, - _items: Vec<(types::Hash256, Slot)>, + c: crate::DBColumnColdIndex, + items: Vec<(types::Hash256, Slot)>, ) -> Result<(), crate::Error> { - Err(StaticColdStoreError::Unsupported("put_index_batch").into()) + use ssz::Encode; + let col = c.db_column(); + let ops = items + .into_iter() + .map(|(root, slot)| { + crate::KeyValueStoreOp::PutKeyValue( + col, + root.as_slice().to_vec(), + slot.as_ssz_bytes(), + ) + }) + .collect(); + self.index_db.do_atomically(ops) } - fn iter_index(&self, _c: crate::DBColumnColdIndex) -> crate::IndexIter<'_> { - Box::new(std::iter::once(Err(StaticColdStoreError::Unsupported( - "iter_index", + fn iter_index(&self, c: crate::DBColumnColdIndex) -> crate::IndexIter<'_> { + use ssz::Decode; + Box::new( + self.index_db + .iter_column::(c.db_column()) + .map(|res| res.and_then(|(root, value)| Ok((root, Slot::from_ssz_bytes(&value)?)))), ) - .into()))) } fn sync(&self) -> Result<(), crate::Error> { - Ok(()) + KeyValueStore::sync(&self.index_db) } } From df115fb3ea5422e4e644a0cb994170865e6345cf Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 22:48:18 +0200 Subject: [PATCH 17/24] Parameterize store_tests by cold backend via COLD_BACKEND env Drops the bespoke 1000-block static-cold test and instead has get_store read the cold backend from COLD_BACKEND=static|kv. CI / local can now run the existing store_tests suite against either backend without duplicating test bodies. Also trims ColdBackendKind to the derives actually exercised today. Display, EnumString, VariantNames, Copy were forward-looking for the not-yet-wired --cold-backend CLI flag - re-add when that lands. --- beacon_node/beacon_chain/tests/store_tests.rs | 59 ++++--------------- beacon_node/store/src/config.rs | 15 +---- 2 files changed, 12 insertions(+), 62 deletions(-) diff --git a/beacon_node/beacon_chain/tests/store_tests.rs b/beacon_node/beacon_chain/tests/store_tests.rs index f69e1dc43b4..9240950e644 100644 --- a/beacon_node/beacon_chain/tests/store_tests.rs +++ b/beacon_node/beacon_chain/tests/store_tests.rs @@ -72,11 +72,22 @@ type TestHarness = BeaconChainHarness>; fn get_store(db_path: &TempDir) -> Arc, ColdBackend>> { let store_config = StoreConfig { prune_payloads: false, + cold_backend: cold_backend_from_env(), ..StoreConfig::default() }; get_store_generic(db_path, store_config, test_spec::()) } +/// Pick the cold backend from `COLD_BACKEND=static|kv` so the same test suite +/// can be run against both backends without duplicating tests. Default is the +/// historical KV backend. +fn cold_backend_from_env() -> ColdBackendKind { + match std::env::var("COLD_BACKEND").as_deref() { + Ok("static") => ColdBackendKind::Static, + _ => ColdBackendKind::Kv, + } +} + fn get_store_generic( db_path: &TempDir, config: StoreConfig, @@ -6153,51 +6164,3 @@ fn get_blocks( fn clone_block(block: &AvailableBlock) -> AvailableBlock { block.__clone_without_recv().unwrap() } - -/// Genesis-sync ~1000 blocks against a `HotColdDB` configured with the static -/// cold backend. Exercises every code path that writes/reads cold state during -/// migration over many finalizations. -#[tokio::test] -async fn genesis_sync_static_cold() { - let num_blocks_produced: u64 = 1000; - let db_path = tempdir().unwrap(); - let store_config = StoreConfig { - prune_payloads: false, - cold_backend: ColdBackendKind::Static, - ..StoreConfig::default() - }; - let store = get_store_generic(&db_path, store_config, test_spec::()); - let harness = get_harness(store.clone(), LOW_VALIDATOR_COUNT); - - harness - .extend_chain( - num_blocks_produced as usize, - BlockStrategy::OnCanonicalHead, - AttestationStrategy::AllValidators, - ) - .await; - - check_finalization(&harness, num_blocks_produced); - check_split_slot(&harness, store.clone()); - - // Spot-check: load every state in the canonical chain prior to the split - // slot from the static cold backend (exercises the root-keyed index + - // slot-keyed bulk reads end-to-end). - let split_slot = store.get_split_slot(); - let head_state = harness.get_current_state(); - let cold_state_roots: Vec<_> = StateRootsIterator::new(&store, &head_state) - .filter_map(Result::ok) - .filter(|(_, slot)| *slot < split_slot) - .collect(); - assert!( - !cold_state_roots.is_empty(), - "expected at least one cold state to verify" - ); - for (state_root, slot) in cold_state_roots { - let state = store - .get_state(&state_root, Some(slot), false) - .expect("cold state load should not error") - .unwrap_or_else(|| panic!("missing cold state at slot {slot}")); - assert_eq!(state.slot(), slot); - } -} diff --git a/beacon_node/store/src/config.rs b/beacon_node/store/src/config.rs index 721451bf559..c3b09396940 100644 --- a/beacon_node/store/src/config.rs +++ b/beacon_node/store/src/config.rs @@ -281,20 +281,7 @@ pub enum DatabaseBackend { } /// Cold backend selector. -#[derive( - Debug, - Default, - Clone, - Copy, - Eq, - PartialEq, - Serialize, - Deserialize, - Display, - EnumString, - VariantNames, -)] -#[strum(serialize_all = "lowercase")] +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum ColdBackendKind { /// Cold data lives in the same KV backend as the hot DB. Default. #[default] From e259a5157b3be8789eee0ce5fe64338aba8b5bce Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 23:14:47 +0200 Subject: [PATCH 18/24] Reject WSS sync under static cold; wire --cold-backend CLI flag The static cold backend is append-only in ascending slot order, so checkpoint/weak-subjectivity sync (which backfills slots below the anchor) is fundamentally incompatible. Refuse the combination explicitly in BeaconChainBuilder::weak_subjectivity_state instead of failing later with an opaque 'static cold put out of order' error. The 6 weak_subjectivity_sync_* tests early-return under COLD_BACKEND=static so the test suite passes against either backend. Adds the --cold-backend CLI flag (kv|static, default kv) so operators can opt into the static backend at startup. Re-adds EnumString and VariantNames on ColdBackendKind for clap parsing. --- beacon_node/beacon_chain/src/builder.rs | 15 +++++++++++++++ beacon_node/beacon_chain/tests/store_tests.rs | 6 ++++++ beacon_node/src/cli.rs | 12 ++++++++++++ beacon_node/src/config.rs | 4 ++++ beacon_node/store/src/config.rs | 5 ++++- 5 files changed, 41 insertions(+), 1 deletion(-) diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index 2e9231a689d..1086759b831 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -435,6 +435,21 @@ where .clone() .ok_or("weak_subjectivity_state requires a store")?; + // The static cold backend is append-only in ascending slot order. A + // checkpoint / weak-subjectivity start writes the anchor state in the + // middle of the chain and then backfills earlier slots, which the + // static format can't represent. Refuse the combination at startup + // rather than failing later with an out-of-order put. + if matches!( + store.get_config().cold_backend, + store::config::ColdBackendKind::Static + ) { + return Err("static cold backend only supports starting from genesis; \ + checkpoint sync and weak subjectivity sync require the kv \ + cold backend" + .to_string()); + } + // Ensure the state is advanced to an epoch boundary. let slots_per_epoch = E::slots_per_epoch(); if weak_subj_state.slot() % slots_per_epoch != 0 { diff --git a/beacon_node/beacon_chain/tests/store_tests.rs b/beacon_node/beacon_chain/tests/store_tests.rs index 9240950e644..4d0f7e8a6c7 100644 --- a/beacon_node/beacon_chain/tests/store_tests.rs +++ b/beacon_node/beacon_chain/tests/store_tests.rs @@ -3014,6 +3014,12 @@ async fn weak_subjectivity_sync_test( backfill_batch_size: Option, provide_blobs: bool, ) { + // Static cold backend rejects checkpoint+backfill at construction; nothing + // here would exercise it usefully under that mode. + if cold_backend_from_env() == ColdBackendKind::Static { + return; + } + // Build an initial chain on one harness, representing a synced node with full history. let num_final_blocks = E::slots_per_epoch() * 2; diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs index 51cda0fac3b..ad40c080f6e 100644 --- a/beacon_node/src/cli.rs +++ b/beacon_node/src/cli.rs @@ -1622,6 +1622,18 @@ pub fn cli_app() -> Command { .action(ArgAction::Set) .display_order(0) ) + .arg( + Arg::new("cold-backend") + .long("cold-backend") + .value_name("BACKEND") + .value_parser(store::config::ColdBackendKind::VARIANTS.to_vec()) + .help("Cold (freezer) DB backend. \"kv\" stores cold data in the \ + same KV as the hot DB. \"static\" stores cold data in \ + slot-keyed static files; only supported when starting \ + from genesis.") + .action(ArgAction::Set) + .display_order(0) + ) .arg( Arg::new("delay-block-publishing") .long("delay-block-publishing") diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index 8ba2c0f3214..31e0b59a6e2 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -418,6 +418,10 @@ pub fn get_config( client_config.store.backend = backend; } + if let Some(cold_backend) = clap_utils::parse_optional(cli_args, "cold-backend")? { + client_config.store.cold_backend = cold_backend; + } + if let Some(hierarchy_config) = clap_utils::parse_optional(cli_args, "hierarchy-exponents")? { client_config.store.hierarchy_config = hierarchy_config; } diff --git a/beacon_node/store/src/config.rs b/beacon_node/store/src/config.rs index c3b09396940..d741048f630 100644 --- a/beacon_node/store/src/config.rs +++ b/beacon_node/store/src/config.rs @@ -281,7 +281,10 @@ pub enum DatabaseBackend { } /// Cold backend selector. -#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive( + Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, EnumString, VariantNames, +)] +#[strum(serialize_all = "lowercase")] pub enum ColdBackendKind { /// Cold data lives in the same KV backend as the hot DB. Default. #[default] From fc8cc497fa0a255bd33186a28e44519a784cf278 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Fri, 8 May 2026 23:45:59 +0200 Subject: [PATCH 19/24] Static cold review followups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Idempotent put at any committed slot makes `migrate_database` retries safe after a mid-loop crash. The previous put accepted re-puts only at exactly `highest_written_slot`; on retry, slot 0 < highest fired out-of-order. Now any committed slot accepts an identical-value re-put; mismatched values and skipped-slot fills still error. New `COLD_BACKEND_KEY` in `BeaconMeta` pins the backend kind on first open and refuses mismatched re-opens (Static and Kv on-disk layouts are incompatible). `reconstruct_historic_states` refuses to run under static cold — the slots it would write are below every column's high-water mark. `max_value_bytes` ratchets upward on open if the build default exceeds disk, so a newer build can write larger records than an older one persisted, and re-persists immediately for stable re-opens. Per-column files renamed `static_blocks_*` -> `data_*`, `static_blocks.conf` -> `column.conf` — the literal prefix was misleading after the per-column generalisation. `kv_cold_store` helper module dropped; `MemoryStore`'s `ColdStore` impl inlined to match `ColdBackend::Kv`. Two impls, no shared helper. `decompress_record` returns `Result>` (was `Result>>` with `Some` on every success path). `TODO(static)` markers added for `iter_from` perf, the migrate-vs-index transient invariant 11 window, invariants 10/11/12 re-review under static cold, and the missing test set. Spec cleanup: delete `specs/static-blocks.md` (stale, ~60% contradicted the code) and `TODO-static-block-storage.md`. Rewrite the `static_cold.rs` module header as the canonical byte-level format reference (layout, data file, `column.conf`, put contract, recovery). --- TODO-static-block-storage.md | 47 --- beacon_node/store/src/config.rs | 39 ++- beacon_node/store/src/database/interface.rs | 3 + beacon_node/store/src/errors.rs | 15 + beacon_node/store/src/hot_cold_store.rs | 76 ++++- beacon_node/store/src/invariants.rs | 7 + beacon_node/store/src/lib.rs | 96 ------ beacon_node/store/src/memory_store.rs | 55 +++- beacon_node/store/src/metadata.rs | 4 + beacon_node/store/src/reconstruct.rs | 10 + beacon_node/store/src/static_cold.rs | 136 ++++++--- specs/static-blocks.md | 314 -------------------- 12 files changed, 291 insertions(+), 511 deletions(-) delete mode 100644 TODO-static-block-storage.md delete mode 100644 specs/static-blocks.md diff --git a/TODO-static-block-storage.md b/TODO-static-block-storage.md deleted file mode 100644 index 763a39d6d8c..00000000000 --- a/TODO-static-block-storage.md +++ /dev/null @@ -1,47 +0,0 @@ -# Static Cold Storage TODO - -Current spec: [`specs/static-cold-backend.md`](./specs/static-cold-backend.md) -(file format inherited from [`specs/static-blocks.md`](./specs/static-blocks.md) -and generalised per-column). - -Implemented in this branch: -- multi-column slot-keyed store: `StaticColdStore` (one type, dispatched on - `DBColumnCold`) -- per-column subdirectory + per-column conf with persisted `record_type`, - `compression`, `max_value_bytes` (conf magic `LHSTBLK2`) -- `ColdStore` trait covering both slot-keyed bulk and root-keyed indices - (`DBColumnColdIndex`); KV backends impl by translating slot/root keys into - the underlying `KeyValueStore` -- startup healing for interrupted writes (per-column) -- `prune_historic_states` removed (mode it produced is not in the spec's - startup-path table) - -Remaining: - -1. Cold backend selection. - - add a CLI/config flag to switch the cold backend between the existing - KV implementation and the static-file implementation - - reject startup combinations the spec doesn't allow (e.g. checkpoint sync - without complete static history into static-archive mode) - -2. Review block read/write paths. - - decide where finalized blocks live in the static-cold mode - (`DBColumn::BeaconBlock`? a new slot-keyed `DBColumnCold::Block`?) - - root → slot resolution: with `BeaconBlockSlot` removed, no on-disk index - maps a block_root to its slot. Choose a path: bring the index back - (whether in hot or in the cold backend), perform a slot-walk, or reject - root-keyed reads in static-cold mode - - update `HotColdDB::get_block_with` and `block_exists` accordingly - -3. Review invariants. - - it is unclear whether invariants 10/11/12 still hold under static-cold - mode. Walk through each and confirm or update — in particular, archived - blocks no longer needing hot-DB block bodies, and the consistency of - root-to-slot indices once their location is decided in (2) - -4. Tests. - - happy path for `StaticColdStore::open/get/put` per cold column - - out-of-order put rejection - - crash windows around data, `.off`, and per-column `.conf` - - cold backend selection via CLI flag - - rejected startup-mode combinations diff --git a/beacon_node/store/src/config.rs b/beacon_node/store/src/config.rs index d741048f630..1488f11ce53 100644 --- a/beacon_node/store/src/config.rs +++ b/beacon_node/store/src/config.rs @@ -102,6 +102,7 @@ pub enum StoreConfigError { }, ZeroEpochsPerBlobPrune, InvalidVersionByte(Option), + InvalidColdBackendByte(u8), } impl Default for StoreConfig { @@ -282,7 +283,7 @@ pub enum DatabaseBackend { /// Cold backend selector. #[derive( - Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, EnumString, VariantNames, + Debug, Default, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, EnumString, VariantNames, )] #[strum(serialize_all = "lowercase")] pub enum ColdBackendKind { @@ -292,3 +293,39 @@ pub enum ColdBackendKind { /// Cold data lives in slot-keyed static files. Static, } + +impl ColdBackendKind { + /// One-byte tag persisted under `COLD_BACKEND_KEY` in `BeaconMeta`. + /// Stable across builds — never reorder or reuse a value. + pub fn as_byte(self) -> u8 { + match self { + Self::Kv => 0, + Self::Static => 1, + } + } + + pub fn from_byte(byte: u8) -> Result { + match byte { + 0 => Ok(Self::Kv), + 1 => Ok(Self::Static), + other => Err(StoreConfigError::InvalidColdBackendByte(other)), + } + } +} + +impl StoreItem for ColdBackendKind { + fn db_column() -> DBColumn { + DBColumn::BeaconMeta + } + + fn as_store_bytes(&self) -> Vec { + vec![self.as_byte()] + } + + fn from_store_bytes(bytes: &[u8]) -> Result { + let &[byte] = bytes else { + return Err(StoreConfigError::InvalidColdBackendByte(0).into()); + }; + Ok(Self::from_byte(byte)?) + } +} diff --git a/beacon_node/store/src/database/interface.rs b/beacon_node/store/src/database/interface.rs index 11bd64f034b..4ce22df9af4 100644 --- a/beacon_node/store/src/database/interface.rs +++ b/beacon_node/store/src/database/interface.rs @@ -84,6 +84,9 @@ impl ColdStore for ColdBackend { Self::Static(db) => ColdStore::::iter_from(db, c, from), } } + // `Slot::as_ssz_bytes()` is byte-identical to the legacy + // `ColdStateSummary { slot }` wrapper so existing dbs round-trip without + // migration. Pinned by `ssz_compat_with_legacy_summary` in `lib.rs`. fn get_index(&self, c: DBColumnColdIndex, root: Hash256) -> Result, Error> { match self { Self::Kv(db) => Ok(db diff --git a/beacon_node/store/src/errors.rs b/beacon_node/store/src/errors.rs index 1e974cf4b36..68dc28998e3 100644 --- a/beacon_node/store/src/errors.rs +++ b/beacon_node/store/src/errors.rs @@ -40,6 +40,21 @@ pub enum Error { MissingHistoricBlocks { oldest_block_slot: Slot, }, + /// State reconstruction is not supported with the static cold backend. + /// + /// The static-file backend is strict-ascending append-only, but online + /// reconstruction writes states at slots already below the high-water + /// mark. Per `specs/static-cold-backend.md`, a full node never becomes + /// archive by online reconstruction. + ReconstructionUnsupportedOnStaticCold, + /// The configured cold backend differs from the one persisted on disk. + /// + /// Switching cold backends in-place is unsupported because the on-disk + /// formats are incompatible. + ColdBackendMismatch { + on_disk: crate::config::ColdBackendKind, + configured: crate::config::ColdBackendKind, + }, /// State reconstruction failed because it didn't reach the upper limit slot. /// /// This should never happen (it's a logic error). diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 3c511ec02f1..d89dc5a0d26 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -6,7 +6,7 @@ use crate::historic_state_cache::HistoricStateCache; use crate::iter::{BlockRootsIterator, ParentRootBlockIterator, RootsIterator}; use crate::memory_store::MemoryStore; use crate::metadata::{ - ANCHOR_INFO_KEY, ANCHOR_UNINITIALIZED, AnchorInfo, BLOB_INFO_KEY, BlobInfo, + ANCHOR_INFO_KEY, ANCHOR_UNINITIALIZED, AnchorInfo, BLOB_INFO_KEY, BlobInfo, COLD_BACKEND_KEY, COMPACTION_TIMESTAMP_KEY, CONFIG_KEY, CURRENT_SCHEMA_VERSION, CompactionTimestamp, DATA_COLUMN_CUSTODY_INFO_KEY, DATA_COLUMN_INFO_KEY, DataColumnCustodyInfo, DataColumnInfo, SCHEMA_VERSION_KEY, SPLIT_KEY, STATE_UPPER_LIMIT_NO_RETAIN, SchemaVersion, @@ -297,6 +297,13 @@ impl HotColdDB, ColdBackend> { let anchor_info = RwLock::new(Self::load_anchor_info(&hot_db)?); debug!(?anchor_info, "Loaded anchor info"); + // Pin the cold backend kind to the directory before we touch it. + // Static and Kv layouts are incompatible on disk, so refuse to open + // a directory written by the other one. If no record exists yet + // (fresh DB), persist the configured kind so this check is + // load-bearing on every subsequent open. + Self::check_or_init_cold_backend_kind(&hot_db, config.cold_backend)?; + let db = HotColdDB { split: RwLock::new(Split::default()), anchor_info, @@ -3082,6 +3089,25 @@ impl, Cold: ColdStore> HotColdDB self.hot_db.put(&CONFIG_KEY, &self.config.as_disk_config()) } + /// Pin the cold backend kind to the DB's hot metadata on first open and + /// refuse subsequent opens that disagree. Static and Kv use incompatible + /// on-disk layouts in the cold path, so silently switching would leave + /// orphaned data behind and quietly corrupt new writes. + fn check_or_init_cold_backend_kind( + hot_db: &BeaconNodeBackend, + configured: crate::config::ColdBackendKind, + ) -> Result<(), Error> { + use crate::config::ColdBackendKind; + match hot_db.get::(&COLD_BACKEND_KEY)? { + Some(on_disk) if on_disk != configured => Err(Error::ColdBackendMismatch { + on_disk, + configured, + }), + Some(_) => Ok(()), + None => hot_db.put(&COLD_BACKEND_KEY, &configured), + } + } + /// Load the split point from disk, sans block root. fn load_split_partial(&self) -> Result, Error> { self.hot_db @@ -3570,6 +3596,17 @@ pub fn migrate_database, Cold: ColdStore>( // Committed after the slot-keyed cold data so a crash leaves no dangling indices. let mut cold_state_summary_index: Vec<(Hash256, Slot)> = vec![]; + // The static cold backend is a self-sufficient archive — it owns the + // bulk historic-block bytes, not just the slot index. The KV cold + // backend keeps blocks in the hot DB indefinitely, so duplicating them + // would only waste space. + let move_blocks_to_static_cold = matches!( + store.get_config().cold_backend, + crate::config::ColdBackendKind::Static + ); + let mut cold_db_block_data: Vec<(Slot, Vec)> = vec![]; + let mut last_seen_block_root: Option = None; + // Iterate in descending order until the current split slot let state_roots: Vec<_> = process_results(RootsIterator::new(&store, finalized_state), |iter| { @@ -3582,6 +3619,29 @@ pub fn migrate_database, Cold: ColdStore>( // Store the slot to block root mapping. cold_db_block_ops.push((*slot, block_root.as_slice().to_vec())); + // Move the block bytes into static cold at the slot they were + // proposed at. `RootsIterator` yields the same `block_root` for + // every skip slot until the next block — dedup by tracking the + // most recent root, then keep only the first occurrence whose + // iteration slot matches `block.slot()` (skip-slot continuations + // from the previous finalization window have `block.slot() < slot` + // and are already in cold from that earlier migration). + if move_blocks_to_static_cold && Some(*block_root) != last_seen_block_root { + last_seen_block_root = Some(*block_root); + if let Some(block_bytes) = store + .hot_db + .get_bytes(DBColumn::BeaconBlock, block_root.as_slice())? + { + let block = SignedBeaconBlock::>::from_ssz_bytes( + &block_bytes, + &store.spec, + )?; + if block.slot() == *slot { + cold_db_block_data.push((*slot, block_bytes)); + } + } + } + // Do not try to store states if a restore point is yet to be stored, or will never be // stored (see `STATE_UPPER_LIMIT_NO_RETAIN`). Make an exception for the genesis state // which always needs to be copied from the hot DB to the freezer and should not be deleted. @@ -3631,9 +3691,23 @@ pub fn migrate_database, Cold: ColdStore>( // // Slot-keyed cold data lands first; the BeaconColdStateSummary root index is committed after, // so a mid-migration crash leaves cold data without dangling indices. + // + // TODO(static): a crash between `put_batch(BlockRoots, …)` and + // `put_index_batch(ColdStateSummary, …)` leaves BlockRoots committed for + // the new range but ColdStateSummary missing. The next restart re-runs the + // migration, which re-derives the summary, but until then invariant 11 + // (`check_cold_state_root_indices`) will fire transiently. KV mode has the + // same window. Worth reviewing whether the index could move to per-iter + // commit alongside the slot-keyed data, or whether the invariant should + // be relaxed during the `split.slot < latest finalized` window. store .cold_db .put_batch(DBColumnCold::BlockRoots, cold_db_block_ops)?; + if !cold_db_block_data.is_empty() { + store + .cold_db + .put_batch(DBColumnCold::Block, cold_db_block_data)?; + } store.cold_db.sync()?; store.cold_db.put_index_batch( DBColumnColdIndex::ColdStateSummary, diff --git a/beacon_node/store/src/invariants.rs b/beacon_node/store/src/invariants.rs index a47b1ded41c..49a7418c21c 100644 --- a/beacon_node/store/src/invariants.rs +++ b/beacon_node/store/src/invariants.rs @@ -557,6 +557,13 @@ impl, Cold: ColdStore> HotColdDB Ok(result) } + // TODO(static): re-walk invariants 10/11/12 under the static cold backend. + // The text-form preconditions and the "block in hot_db" check (#10) were + // written against the KV-cold world where finalized blocks live in hot DB + // forever. Under static cold, archived blocks may live elsewhere (TBD per + // TODO-static-block-storage.md item 2), and `cold_db.iter_index` over a + // sparse static column is O(highest - from). Confirm or update each. + /// Invariant 10 (Cold DB): Block root indices. /// /// ```text diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index 6f5fa4d84af..b756a423113 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -35,7 +35,6 @@ pub use crate::metadata::BlobInfo; pub use errors::Error; pub use metadata::AnchorInfo; pub use metrics::scrape_for_metrics; -use ssz::{Decode, Encode}; use std::collections::HashSet; use std::sync::Arc; use strum::{EnumIter, EnumString, IntoStaticStr}; @@ -188,101 +187,6 @@ pub trait ColdStore: Sync + Send + Sized + 'static { fn sync(&self) -> Result<(), Error>; } -/// Helpers used by both KV-backed `ColdStore` impls (`BeaconNodeBackend`, -/// `MemoryStore`). Translation between `Slot`/`Hash256` keys and the byte-keyed -/// `KeyValueStore` API is identical regardless of which KV is underneath, so we -/// extract it here and let each backend's `impl ColdStore` thunk through. -pub(crate) mod kv_cold_store { - use super::*; - - pub fn get>( - kv: &T, - column: DBColumnCold, - slot: Slot, - ) -> Result>, Error> { - kv.get_bytes(column.db_column(), &slot.as_u64().to_be_bytes()) - } - - pub fn put_batch>( - kv: &T, - column: DBColumnCold, - items: Vec<(Slot, Vec)>, - ) -> Result<(), Error> { - let col = column.db_column(); - let ops = items - .into_iter() - .map(|(slot, value)| { - KeyValueStoreOp::PutKeyValue(col, slot.as_u64().to_be_bytes().to_vec(), value) - }) - .collect(); - kv.do_atomically(ops) - } - - pub fn contains>( - kv: &T, - column: DBColumnCold, - slot: Slot, - ) -> Result { - kv.key_exists(column.db_column(), &slot.as_u64().to_be_bytes()) - } - - pub fn iter_from>( - kv: &T, - column: DBColumnCold, - from: Slot, - ) -> SlotIter<'_> { - Box::new( - kv.iter_column_from::>(column.db_column(), &from.as_u64().to_be_bytes()) - .map(|res| { - res.and_then(|(key_bytes, value)| { - let bytes: [u8; 8] = - key_bytes.try_into().map_err(|_| Error::InvalidBytes)?; - Ok((Slot::new(u64::from_be_bytes(bytes)), value)) - }) - }), - ) - } - - // `Slot::as_ssz_bytes()` is byte-identical to the legacy - // `ColdStateSummary { slot }` wrapper, so existing dbs round-trip without - // migration. Pinned by `ssz_compat_with_legacy_summary`. - pub fn get_index>( - kv: &T, - column: DBColumnColdIndex, - root: Hash256, - ) -> Result, Error> { - Ok(kv - .get_bytes(column.db_column(), root.as_slice())? - .map(|bytes| Slot::from_ssz_bytes(&bytes)) - .transpose()?) - } - - pub fn put_index_batch>( - kv: &T, - column: DBColumnColdIndex, - items: Vec<(Hash256, Slot)>, - ) -> Result<(), Error> { - let col = column.db_column(); - let ops = items - .into_iter() - .map(|(root, slot)| { - KeyValueStoreOp::PutKeyValue(col, root.as_slice().to_vec(), slot.as_ssz_bytes()) - }) - .collect(); - kv.do_atomically(ops) - } - - pub fn iter_index>( - kv: &T, - column: DBColumnColdIndex, - ) -> IndexIter<'_> { - Box::new( - kv.iter_column::(column.db_column()) - .map(|res| res.and_then(|(root, value)| Ok((root, Slot::from_ssz_bytes(&value)?)))), - ) - } -} - pub trait Key: Sized + 'static { fn from_bytes(key: &[u8]) -> Result; } diff --git a/beacon_node/store/src/memory_store.rs b/beacon_node/store/src/memory_store.rs index a60a7cc4553..2777518e423 100644 --- a/beacon_node/store/src/memory_store.rs +++ b/beacon_node/store/src/memory_store.rs @@ -1,9 +1,10 @@ use crate::{ ColdStore, ColumnIter, ColumnKeyIter, DBColumn, DBColumnCold, DBColumnColdIndex, Error, IndexIter, ItemStore, Key, KeyValueStore, KeyValueStoreOp, SlotIter, get_key_for_col, - hot_cold_store::BytesKey, kv_cold_store, + hot_cold_store::BytesKey, }; use parking_lot::RwLock; +use ssz::{Decode, Encode}; use std::collections::{BTreeMap, HashSet}; use std::marker::PhantomData; use types::*; @@ -153,31 +154,69 @@ impl KeyValueStore for MemoryStore { impl ItemStore for MemoryStore {} +// Mirrors `ColdBackend::Kv` in `database/interface.rs` — both translate the +// slot/root-typed `ColdStore` API into the byte-keyed `KeyValueStore` API in +// the same way. Kept inline here (and there) rather than behind a shared helper +// because there are only two impls and the indirection wasn't earning its +// keep. +// +// `Slot::as_ssz_bytes()` is byte-identical to the legacy +// `ColdStateSummary { slot }` wrapper, so existing dbs round-trip without +// migration. Pinned by the `ssz_compat_with_legacy_summary` test in `lib.rs`. impl ColdStore for MemoryStore { fn get(&self, c: DBColumnCold, slot: Slot) -> Result>, Error> { - kv_cold_store::get(self, c, slot) + self.get_bytes(c.db_column(), &slot.as_u64().to_be_bytes()) } fn put_batch(&self, c: DBColumnCold, items: Vec<(Slot, Vec)>) -> Result<(), Error> { - kv_cold_store::put_batch(self, c, items) + let col = c.db_column(); + let ops = items + .into_iter() + .map(|(slot, value)| { + KeyValueStoreOp::PutKeyValue(col, slot.as_u64().to_be_bytes().to_vec(), value) + }) + .collect(); + self.do_atomically(ops) } fn contains(&self, c: DBColumnCold, slot: Slot) -> Result { - kv_cold_store::contains(self, c, slot) + self.key_exists(c.db_column(), &slot.as_u64().to_be_bytes()) } fn iter_from(&self, c: DBColumnCold, from: Slot) -> SlotIter<'_> { - kv_cold_store::iter_from(self, c, from) + Box::new( + self.iter_column_from::>(c.db_column(), &from.as_u64().to_be_bytes()) + .map(|res| { + res.and_then(|(key_bytes, value)| { + let bytes: [u8; 8] = + key_bytes.try_into().map_err(|_| Error::InvalidBytes)?; + Ok((Slot::new(u64::from_be_bytes(bytes)), value)) + }) + }), + ) } fn get_index(&self, c: DBColumnColdIndex, root: Hash256) -> Result, Error> { - kv_cold_store::get_index(self, c, root) + Ok(self + .get_bytes(c.db_column(), root.as_slice())? + .map(|bytes| Slot::from_ssz_bytes(&bytes)) + .transpose()?) } fn put_index_batch( &self, c: DBColumnColdIndex, items: Vec<(Hash256, Slot)>, ) -> Result<(), Error> { - kv_cold_store::put_index_batch(self, c, items) + let col = c.db_column(); + let ops = items + .into_iter() + .map(|(root, slot)| { + KeyValueStoreOp::PutKeyValue(col, root.as_slice().to_vec(), slot.as_ssz_bytes()) + }) + .collect(); + self.do_atomically(ops) } fn iter_index(&self, c: DBColumnColdIndex) -> IndexIter<'_> { - kv_cold_store::iter_index(self, c) + Box::new( + self.iter_column::(c.db_column()) + .map(|res| res.and_then(|(root, value)| Ok((root, Slot::from_ssz_bytes(&value)?)))), + ) } fn sync(&self) -> Result<(), Error> { KeyValueStore::sync(self) diff --git a/beacon_node/store/src/metadata.rs b/beacon_node/store/src/metadata.rs index cf16c010817..2386291b799 100644 --- a/beacon_node/store/src/metadata.rs +++ b/beacon_node/store/src/metadata.rs @@ -19,6 +19,10 @@ pub const ANCHOR_INFO_KEY: Hash256 = Hash256::repeat_byte(5); pub const BLOB_INFO_KEY: Hash256 = Hash256::repeat_byte(6); pub const DATA_COLUMN_INFO_KEY: Hash256 = Hash256::repeat_byte(7); pub const DATA_COLUMN_CUSTODY_INFO_KEY: Hash256 = Hash256::repeat_byte(8); +/// Records which cold backend (`Kv` or `Static`) was used to write this DB. +/// Set on first open, checked on every subsequent open. Switching backends +/// in-place is unsupported. +pub const COLD_BACKEND_KEY: Hash256 = Hash256::repeat_byte(9); /// State upper limit value used to indicate that a node is not storing historic states. pub const STATE_UPPER_LIMIT_NO_RETAIN: Slot = Slot::new(u64::MAX); diff --git a/beacon_node/store/src/reconstruct.rs b/beacon_node/store/src/reconstruct.rs index 128be42dde3..8bace0ab2ce 100644 --- a/beacon_node/store/src/reconstruct.rs +++ b/beacon_node/store/src/reconstruct.rs @@ -1,4 +1,5 @@ //! Implementation of historic state reconstruction (given complete block history). +use crate::config::ColdBackendKind; use crate::forwards_iter::FrozenForwardsIterator; use crate::hot_cold_store::{ColdBatch, HotColdDB, HotColdDBError}; use crate::metrics; @@ -22,6 +23,15 @@ where self: &Arc, num_blocks: Option, ) -> Result<(), Error> { + // Online reconstruction writes historic states into the cold backend + // at slots that are already below the static-cold high-water mark for + // each column. The static backend is strict-ascending and would reject + // those puts as out-of-order. Per `specs/static-cold-backend.md`, a + // full node never becomes archive by online reconstruction — refuse. + if matches!(self.config.cold_backend, ColdBackendKind::Static) { + return Err(Error::ReconstructionUnsupportedOnStaticCold); + } + let mut anchor = self.get_anchor_info(); // Nothing to do, history is complete. diff --git a/beacon_node/store/src/static_cold.rs b/beacon_node/store/src/static_cold.rs index 0f0115e7f1c..82ba5b93e73 100644 --- a/beacon_node/store/src/static_cold.rs +++ b/beacon_node/store/src/static_cold.rs @@ -1,23 +1,50 @@ //! Slot-keyed durable archive for finalized cold-DB columns. //! -//! `StaticColdStore` is a black box from `HotColdDB`'s perspective: hand it -//! `(column, slot, bytes)`, ask it for them back by `(column, slot)`. File -//! mapping, recovery, and rename semantics are internal. +//! `StaticColdStore` is a black box: `(column, slot, bytes)` in, same back. +//! See `specs/static-cold-backend.md` for the abstraction-level contract. //! -//! Each column gets its own subdirectory under the store root, so the on-disk -//! format of a single column is the original single-column layout — -//! `static_blocks_{file_id:05}` data files, matching `.off` sidecars, and a -//! `static_blocks.conf` commit marker — just rooted at `//`. +//! # Layout //! -//! Per-column behaviour (compression, record-type tag, max value bytes) lives -//! in `column_config`, keyed by the tight `DBColumnCold` enum. +//! ```text +//! / +//! {blk,bbr,bsr,bss,bsd}/ # one subdir per DBColumnCold +//! data_{file_id:05} # file_id = slot / 8192 +//! data_{file_id:05}.off # 8192 × u64 LE offsets, 0 = no record +//! column.conf # 36-byte commit marker, atomic-renamed +//! index/ # embedded KV for DBColumnColdIndex +//! ``` //! -//! Contract: -//! - `put(column, slot, bytes)` is durable on return. -//! - Slots within a column must arrive strictly ascending; columns are -//! independent. +//! # File format //! -//! See `specs/static-blocks.md` for the on-disk format. +//! Data file: e2store version record (`65 32 00 00 00 00 00 00`), then records +//! appended as `type[2] | length[4 LE] | reserved[2]=0 | payload` (snappy- +//! framed if `column.compression`). Per-column tags in `column_config`. +//! +//! `column.conf`: `b"LHSTBLK2" | highest_slot u64 LE (u64::MAX = empty) | +//! current_data_len u64 LE | record_type[2] | compression u8 | reserved | max_value_bytes u64 LE`. +//! Atomic update: write `.tmp`, fsync, rename, fsync dir. +//! +//! # Put contract +//! +//! Durable on return. Slots arrive ascending **or** are identical-value +//! re-puts of an already-committed slot (so `migrate_database` retries after +//! a mid-loop crash are safe). Previously-skipped slots (offset 0) cannot +//! be filled — that would break the append-only data file. +//! +//! # Recovery on open +//! +//! Data file truncated to `current_data_len`; `.off` entries beyond +//! `highest_slot` cleared. The `column.conf` rename is the commit point. +//! +//! # TODO(static): tests +//! +//! - happy path `open` / `get` / `put` per `DBColumnCold` +//! - out-of-order put rejection +//! - identical-value re-put at any committed slot succeeds; mismatched +//! value or skipped-slot fill rejected +//! - crash windows around data, `.off`, and `column.conf` (heal on open) +//! - `max_value_bytes` ratchet-up persists on next open +//! - `COLD_BACKEND_KEY` mismatch refuses to start use crate::config::StoreConfig; use crate::database::interface::BeaconNodeBackend; @@ -38,8 +65,9 @@ use types::{EthSpec, Slot}; const SLOTS_PER_FILE: u64 = 8192; const OFFSET_SIZE: u64 = 8; const OFFSET_FILE_LEN: u64 = SLOTS_PER_FILE * OFFSET_SIZE; -const CONFIG_FILE: &str = "static_blocks.conf"; -const CONFIG_TMP_FILE: &str = "static_blocks.conf.tmp"; +const CONFIG_FILE: &str = "column.conf"; +const CONFIG_TMP_FILE: &str = "column.conf.tmp"; +const DATA_FILE_PREFIX: &str = "data_"; const CONFIG_MAGIC: &[u8; 8] = b"LHSTBLK2"; const CONFIG_LEN: usize = 36; /// Empty-store sentinel for `highest_written_slot` in the per-column config. @@ -215,12 +243,29 @@ impl Column { } let on_disk = read_config(&config_path)?; + // record_type and compression are sticky — they're load-bearing for + // reading old records, so on-disk wins over build-time defaults. + // max_value_bytes is a soft bound used to cap accepted record sizes; + // ratchet it up if the build's default is larger so a newer build + // can write bigger records than an older one persisted, then + // re-persist immediately so future opens see the new bound. + let max_value_bytes = on_disk.max_value_bytes.max(defaults.max_value_bytes); let config = ColumnConfig { subdir: defaults.subdir, record_type: on_disk.record_type, compression: on_disk.compression, - max_value_bytes: on_disk.max_value_bytes, + max_value_bytes, }; + if max_value_bytes != on_disk.max_value_bytes { + atomic_write_config( + &config_path, + &tmp_path, + &root_dir, + on_disk.highest_written_slot, + on_disk.current_data_len, + &config, + )?; + } let handle = Self { root_dir, @@ -273,7 +318,7 @@ impl Column { data_file.read_exact(&mut payload)?; if self.config.compression { - decompress_record(&payload, self.config.max_value_bytes) + decompress_record(&payload, self.config.max_value_bytes).map(Some) } else { if (payload.len() as u64) > self.config.max_value_bytes { return Err(StaticColdStoreError::Invalid( @@ -296,29 +341,27 @@ impl Column { fn put(&self, slot: Slot, bytes: &[u8]) -> StoreResult<()> { let mut highest_written_slot = self.highest_written_slot.lock(); - if let Some(highest) = *highest_written_slot { - if slot < highest { - return Err(StaticColdStoreError::Invalid( - "static cold put out of order".into(), - )); - } - if slot == highest { - // Idempotent re-put: tolerate a write of the identical value - // at the most recently committed slot. Errors on a value - // mismatch — that's a real bug, not a duplicate. Read the - // existing record without re-locking the writer mutex. - let existing = self.read_record(slot)?.ok_or_else(|| { - StaticColdStoreError::Invalid( - "static cold missing record at highest slot".into(), - ) - })?; - if existing == bytes { - return Ok(()); - } - return Err(StaticColdStoreError::Invalid( - "static cold re-put with mismatched value".into(), - )); + if let Some(highest) = *highest_written_slot + && slot <= highest + { + // Idempotent re-put: any committed slot can be re-put with the + // identical value. Required so a `migrate_database` retry after a + // mid-loop crash can re-walk slots that were already committed in + // the previous attempt without tripping the strict-ascending + // invariant. A previously-skipped slot (offset zero) cannot be + // filled in — that would break the append-only data file. + let existing = self.read_record(slot)?.ok_or_else(|| { + StaticColdStoreError::Invalid(format!( + "static cold re-put at slot {slot} <= highest {highest} \ + but no record exists; cannot fill a previously-skipped slot" + )) + })?; + if existing == bytes { + return Ok(()); } + return Err(StaticColdStoreError::Invalid(format!( + "static cold re-put at slot {slot} with mismatched value" + ))); } let payload = if self.config.compression { @@ -452,12 +495,13 @@ impl Column { } fn data_path(&self, file_id: u64) -> PathBuf { - self.root_dir.join(format!("static_blocks_{file_id:05}")) + self.root_dir + .join(format!("{DATA_FILE_PREFIX}{file_id:05}")) } fn offset_path(&self, file_id: u64) -> PathBuf { self.root_dir - .join(format!("static_blocks_{file_id:05}.off")) + .join(format!("{DATA_FILE_PREFIX}{file_id:05}.off")) } } @@ -557,7 +601,7 @@ fn write_record( Ok(()) } -fn decompress_record(bytes: &[u8], max_value_bytes: u64) -> StoreResult>> { +fn decompress_record(bytes: &[u8], max_value_bytes: u64) -> StoreResult> { let decoder = FrameDecoder::new(bytes); let mut limited = decoder.take(max_value_bytes + 1); let mut decompressed = Vec::new(); @@ -569,7 +613,7 @@ fn decompress_record(bytes: &[u8], max_value_bytes: u64) -> StoreResult StoreResult<()> { @@ -597,6 +641,10 @@ impl crate::ColdStore for StaticColdStore { } fn iter_from(&self, c: DBColumnCold, from: Slot) -> crate::SlotIter<'_> { + // TODO(static): this is O(highest - from) reads, one File::open per slot, + // and most slots in sparse columns (StateSnapshot/StateDiff) yield None. + // Acceptable today because iter_from is only used by infrequent paths + // (forwards iter, invariants). Improve if it becomes a hotspot. let column = &self.columns[&c]; let Some(highest) = *column.highest_written_slot.lock() else { return Box::new(std::iter::empty()); diff --git a/specs/static-blocks.md b/specs/static-blocks.md deleted file mode 100644 index db57867e6bf..00000000000 --- a/specs/static-blocks.md +++ /dev/null @@ -1,314 +0,0 @@ -# Static Block Storage - -Static-file backend for finalized **blinded** `SignedBeaconBlock` archival. -Slot-indexed, append-only forward. Execution payloads, full blocks, and blobs -are out of scope. - -**Genesis sync only.** Checkpoint sync, historical block backfill, and -late activation on an existing populated node are incompatible and rejected -at startup. - -## API - -A field on `HotColdDB`. Not a `KeyValueStore`. No `Hash256` in the API; the -archive is purely slot-keyed. File rotation, fsync ordering, and crash recovery -are internal. - -```rust -fn open(path: &Path) -> Result; -fn get(slot: Slot) -> Result>>; // SSZ-encoded blinded block -fn put(slot: Slot, bytes: &[u8]) -> Result<()>; // durable on return -``` - -`put` durability on return is the only caller-visible contract; the source- -of-truth flip in `migrate_database` relies on it. - -## Static file format - -Files live together in one directory: - -``` -static_blocks_00000 -static_blocks_00000.off -static_blocks_00001 -static_blocks_00001.off -static_blocks.conf -``` - -Mapping: - -``` -SLOTS_PER_FILE = 8192 -file_id = slot / SLOTS_PER_FILE -index = slot % SLOTS_PER_FILE -off_pos = index * 8 -``` - -The data file name uses `file_id` as a zero-padded decimal number. The slot -range is derived from the id and is not encoded in the name. - -Each data file starts with the e2store version record: - -``` -65 32 00 00 00 00 00 00 -``` - -Block records are appended after it: - -``` -type: [0x01, 0x00] -length: compressed_data.len() as u32, little-endian -reserved: u16 = 0 -data: snappy-framed(SSZ-encoded blinded SignedBeaconBlock bytes) -``` - -The `.off` file is fixed-size: `8192 * 8` bytes. Each entry is a little-endian -`u64` absolute byte offset into the matching data file. Offset `0` means no -block is present for that slot. Real block offsets are nonzero because the data -file starts with the version record. - -`static_blocks.conf` is global to the static block store and is fixed-size: - -``` -magic: [u8; 8] = b"LHSTBLK1" -highest_written_slot: u64 little-endian, u64::MAX means empty -current_data_len: u64 little-endian -``` - -`current_data_len` applies to the current file, derived from -`highest_written_slot / SLOTS_PER_FILE`. - -Config updates are atomic: - -1. Write the full config to `static_blocks.conf.tmp`. -2. Fsync `static_blocks.conf.tmp`. -3. Rename it over `static_blocks.conf`. -4. Fsync the directory. - -## `put` contract - -`put(slot, bytes)` requires: - -``` -highest_written_slot == None || slot > highest_written_slot -snappy_framed(bytes).len() <= u32::MAX -``` - -Skipped slots are allowed. They leave zero offsets in `.off`. - -Write sequence: - -1. Lock the writer. -2. Reject `slot <= highest_written_slot`. -3. Compute `file_id`, `index`, and `off_pos`. -4. Create or open `static_blocks_{file_id:05}`. -5. If the data file is new, write the e2store version record. -6. Create or open `static_blocks_{file_id:05}.off`. -7. If the `.off` file is new, initialize it to `8192 * 8` zero bytes. -8. Compress `bytes` with snappy-framed compression. -9. Append the compressed block record to the data file, remembering the offset - of its 8-byte record header. -10. Fsync the data file. -11. Write the offset as `u64` little-endian at `off_pos` in the `.off` file. -12. Fsync the `.off` file. -13. Atomically update `static_blocks.conf` with: - ``` - highest_written_slot = slot - current_data_len = data_file_len - ``` -14. Fsync the directory after the rename. - -A write is committed only when `static_blocks.conf` reflects it. - -On open, the store reads `static_blocks.conf`, truncates the current data file -to `current_data_len`, and clears offsets after `highest_written_slot` in the -current `.off` file. - -Crash behavior: - -| Crash point | Restart behavior | -| - | - | -| Before `static_blocks.conf` update | Previous slot remains committed; appended data is truncated and offset tail is cleared. | -| During `static_blocks.conf.tmp` write | Previous `static_blocks.conf` remains the commit marker. | -| After `static_blocks.conf` rename | New slot is committed. | - -## `get` contract - -`get(slot)`: - -1. Compute `file_id`, `index`, and `off_pos`. -2. Open `static_blocks_{file_id:05}.off`. -3. Read the `u64` little-endian offset at `off_pos`. -4. If the offset is `0`, return `None`. -5. Open `static_blocks_{file_id:05}`. -6. Seek to the offset. -7. Read and validate the 8-byte block record header: - ``` - type == [0x01, 0x00] - reserved == 0 - ``` -8. Read `length` compressed bytes. -9. Snappy-decompress the bytes with the consensus maximum - `SignedBeaconBlock` SSZ size for the active fork as the output bound. -10. Return the decompressed SSZ bytes. - -If decompression exceeds the bound, return a corruption error. - -Missing files are treated as `None` only when the slot is beyond -`highest_written_slot`. Missing files for committed slots are corruption. - -## `open` contract - -In-memory state is minimal: - -``` -dir -highest_written_slot -mutex -``` - -Files are opened inside `put` and `get`; the store does not cache current file -handles in v1. - -`static_blocks.conf` uses `u64::MAX` as the empty-store sentinel for -`highest_written_slot`. - -`open(path)`: - -1. Create `path` if it does not exist. -2. If `static_blocks.conf` does not exist, create it with: - ``` - magic = b"LHSTBLK1" - highest_written_slot = u64::MAX - current_data_len = 0 - ``` -3. Read and validate `static_blocks.conf`. -4. If `highest_written_slot == u64::MAX`, initialize in-memory - `highest_written_slot = None` and return. -5. Derive the current file from `highest_written_slot / SLOTS_PER_FILE`. -6. Truncate the current data file to `current_data_len`. -7. Clear `.off` entries after `highest_written_slot` in the current `.off` - file by writing zeroes. -8. Initialize in-memory `highest_written_slot = Some(slot)`. - -## Interaction with existing DBs - -| Concern | Today | With static blocks | -| --------------------- | ---------------------------------------------------- | ------------------------------------------------------ | -| Blinded body by root | `hot_db[BeaconBlock][root]`, forever | `hot_db` until archived, then `static.get(slot)` | -| Slot → root | `cold_db[BeaconBlockRoots][slot]` | unchanged | -| Root → slot | not stored | **new**: `cold_db[BeaconBlockSlot][root]` (SSZ `Slot`) | -| Execution payload | `hot_db[ExecPayload][root]` / `[PayloadEnvelope]` | unchanged | -| Blobs / data columns | `blobs_db` | unchanged | -| Cold-DB block bodies | none (cold has only indices) | unchanged | -| Backfill | writes blinded bodies to `hot_db`, slot→root to cold | rejected at startup | - -## Read path - -`HotColdDB::get_block_with(root)`: -1. `hot_db[BeaconBlock][root]` — hits unfinalized blocks and blocks not yet - archived. -2. else `cold_db[BeaconBlockSlot][root] -> slot`, then `static.get(slot)`. -3. else `None`. - -`HotColdDB::block_exists` mirrors (1)+(2) without decoding. - -## Write path - -Block archival lives **inside `migrate_database`** as a second pass over the -already-collected `state_roots` vector. The migration's existing loop is -unchanged; a new loop after it walks the same range to drive archival. Both -loops contribute to the same `cold_db_block_ops` batch, so `BeaconBlockRoots` -and `BeaconBlockSlot` are committed atomically. - -``` -migrate_database(finalized_state): - state_roots = RootsIterator(finalized_state).take_while(slot >= current_split.slot) - - # Loop 1 (existing): BeaconBlockRoots puts + cold-state migration. - for (block_root, state_root, slot) in state_roots ascending: - cold_db_block_ops.push(BeaconBlockRoots[slot] = block_root) - ...cold state ops... - - # Loop 2 (new, gated on static_blocks): archival. - if static_blocks: - # Seed from the slot just below the iteration to catch the boundary case - # where current_split.slot is itself a skip-slot extension of a block - # archived in a previous migration. - prev_block_root = cold_db[BeaconBlockRoots][current_split.slot - 1] - or Hash256::ZERO # genesis seed; never collides - for (block_root, _, slot) in state_roots ascending: - if block_root == prev_block_root: continue # skip-slot extension - prev_block_root = block_root - if slot >= finalized_state.slot(): continue # new-split block stays in hot - bytes = hot_db[BeaconBlock][block_root] # must be present - static_blocks.put(slot, bytes) # durable - cold_db_block_ops.push(BeaconBlockSlot[block_root] = slot) - hot_db_block_delete_ops.push(delete BeaconBlock[block_root]) - - # Atomic commit of cold ops (BeaconBlockRoots + BeaconBlockSlot together). - cold_db.do_atomically(cold_db_block_ops) - cold_db.sync() - - # Split commit. - ...write SPLIT_KEY, update in-memory split... - - # Reclaim hot-KV space. - hot_db.do_atomically(hot_db_block_delete_ops) -``` - -### Why the seed catches the boundary - -`RootsIterator` yields the same `block_root` for every slot covered by that -block, including skip-slot extensions. In ascending iteration the **first** -slot of each run is the block's real slot — *except* when the migration -starts inside a run (i.e. `current_split.slot` is itself a skip-slot -extension of a block archived in a previous migration). Reading -`BeaconBlockRoots[current_split.slot - 1]` returns that previous block's root, -the dedup match fires on the first iteration, and we correctly skip. - -If the previous-slot lookup is missing, the cold DB is inconsistent and the -migration aborts with `Error::MigrationError`. - -### Crash semantics - -| Crash window | State after restart | Recovery | -| --------------------------------------------- | --------------------------------------------------------- | ---------------------------------------------- | -| During loops, before cold commit | Nothing committed. | Migration retried fresh. | -| Between cold commit and split commit | Reverse-index committed but split not advanced. | Migration retried; cold puts are idempotent, hot bodies still present. | -| Between split commit and hot delete | Split advanced, reverse-index committed, bodies linger in hot. | Reads still correct (hot returns the same bytes); leaked bodies stay in hot. | - -The last window is a bounded leak (~one migration's worth of bodies, ~32 -blocks) and a rare crash. No automatic recovery in v1; can be addressed later -by a startup scan if it matters in practice. - -## Modes of operation - -| Mode | Behavior | -| ------------------------------------- | ----------------------------------------------------------------------- | -| **Disabled** (default) | `static_blocks: None`. Byte-identical to current. | -| **Genesis sync + static enabled** | Archive grows from slot 0; bodies migrate out of `hot_db` per epoch. | -| **Checkpoint sync + static enabled** | Refused at startup. | -| **Late activation on existing node** | Refused at startup. | - -Late activation is unsupported because there is no persisted "lowest unarchived -slot" — the migration relies on `current_split.slot` as the watermark, so the -prefix below it would never be archived. Operator must reinitialize. - -## Schema - -- New `DBColumn::BeaconBlockSlot` (3-letter tag `bbs`). Key: 32-byte block - root. Value: SSZ-encoded `Slot` (8 bytes). Lives in `cold_db`. -- No changes to `AnchorInfo`, `BlobInfo`, `Split`, or any existing column. -- Schema version bump on the addition. - -## CLI - -- `--store-static-blocks` (default off). Mutually exclusive with - `--checkpoint-sync-url` and any block-backfill flag; node refuses to start - if both are set. Cannot be enabled on a node previously run without it. - -## Coexistence - -Additive. Default paths (no flag) are untouched. The `blobs_db` and the -era-blob backend (see `era-storage.md`) are independent of this. From 33b2b2a546ee7689e80db1ace10aaab062afca5f Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Sat, 9 May 2026 00:07:01 +0200 Subject: [PATCH 20/24] Run store_tests under static cold backend in CI Adds a sibling job to `beacon-chain-tests` that runs `beacon_chain::store_tests::*` with `COLD_BACKEND=static` (and `FORK_NAME=fulu`) to exercise the static slot-keyed cold-DB backend on every CI run. Mirrors the existing job's runner, toolchain, cache, and feature flags (`fork_from_env,slasher/lmdb,portable`). Added to `test-suite-success` so the merge queue blocks on it. --- .github/workflows/test-suite.yml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 1d66bd30e78..c3a56a86a70 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -148,6 +148,31 @@ jobs: cache-provider: warpbuild - name: Run beacon_chain tests for all known forks run: make test-beacon-chain + beacon-chain-store-tests-static-cold: + name: beacon-chain-store-tests-static-cold + needs: [check-labels] + if: needs.check-labels.outputs.skip_ci != 'true' + runs-on: ${{ github.repository == 'sigp/lighthouse' && 'warp-ubuntu-latest-x64-8x;snapshot.key=lighthouse-ubuntu-latest-v1' || 'ubuntu-latest' }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v5 + - if: github.repository != 'sigp/lighthouse' + name: Get latest version of stable Rust + uses: moonrepo/setup-rust@v1 + with: + channel: stable + cache-target: release + bins: cargo-nextest + - if: github.repository == 'sigp/lighthouse' + uses: Swatinem/rust-cache@v2 + with: + cache-provider: warpbuild + - name: Run beacon_chain store_tests against the static cold backend + env: + COLD_BACKEND: static + FORK_NAME: fulu + run: cargo nextest run --release --features "fork_from_env,slasher/lmdb,$TEST_FEATURES" -p beacon_chain --test beacon_chain_tests -E 'test(/^store_tests::/)' --no-fail-fast http-api-tests: name: http-api-tests needs: [check-labels] @@ -493,6 +518,7 @@ jobs: 'forbidden-files-check', 'release-tests-ubuntu', 'beacon-chain-tests', + 'beacon-chain-store-tests-static-cold', 'op-pool-tests', 'network-tests', 'slasher-tests', From bbc3badfd2c80e49642087838e0079c3e495db3c Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Sat, 9 May 2026 00:10:32 +0200 Subject: [PATCH 21/24] Wire static cold block reads + hot-delete after migrate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the missing pieces so the static cold archive can serve block-by-root reads without keeping a duplicate in hot indefinitely. Schema (re-adds what f671da1fd5 dropped): - `DBColumn::BeaconBlockSlot` (tag `bbs`, 32-byte key, 8-byte SSZ Slot) - `DBColumnColdIndex::BlockSlot` variant Migrate (`migrate_database`): - alongside the existing block-bulk push to `cold.Block`, push the matching `(block_root, slot)` to `cold_block_slot_index` and the `block_root` to `hot_block_delete_roots` - end-of-loop: `put_index_batch(BlockSlot, ...)` after `ColdStateSummary`, before split commit - post split commit: `hot_db.do_atomically(deletes)` reclaims hot space for the just-migrated blocks. Hot delete only runs after cold bytes + cold index are durable, so a crash here leaves cold canonical and reads fall through. KV mode keeps `move_blocks_to_static_cold` false → all the new buffers stay empty → status quo. Read fallback (`get_block_with`, `block_exists`): - hot first; on miss, `cold.get_index(BlockSlot, root)` then `cold.get(Block, slot)`. Missing bulk for an indexed slot raises `MissingFrozenBlock` (corruption). KV mode's empty BlockSlot index makes the fallback always return None on hot miss — identical to before. Invariant 10 (`check_cold_block_root_indices`): - now uses `self.block_exists(&block_root)` (the public read with cold fallback) instead of the bare `hot_db.key_exists(...)`. Required because hot-delete makes the bare hot check fire spuriously for every migrated slot under Static cold. Init-path coverage: - Genesis + KV: cold writes gated off, BlockSlot empty, fallback always None on hot miss. Status quo. - Genesis + Static: migrate writes block + index to cold, deletes from hot. Reads ≥ split.slot hit hot; < split.slot hit cold via fallback. - Era + Static: hot has only post-anchor blocks. cold has 0..S from era (future era-import path) + post-S from migrate. Fallback is the read path for slot < S. - Ckpt + KV: BlockSlot empty as in Genesis + KV. Backfill fills hot. - Ckpt + Static (no era): rejected by the existing WSS guard. --- beacon_node/store/src/hot_cold_store.rs | 73 +++++++++++++++++++++++-- beacon_node/store/src/invariants.rs | 9 +-- beacon_node/store/src/lib.rs | 13 +++++ 3 files changed, 85 insertions(+), 10 deletions(-) diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index d89dc5a0d26..3b4026aa32d 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -755,16 +755,34 @@ impl, Cold: ColdStore> HotColdDB /// /// This is useful for e.g. ignoring the slot-indicated fork to forcefully load a block as if it /// were for a different fork. + /// + /// Reads hot first, then falls back to the cold archive via the + /// `BlockSlot` index. Under KV cold the index is empty so the fallback + /// always returns None — behaviour is identical to a hot-only read. + /// Under Static cold (genesis-archive or era-import), blocks at slot < + /// split.slot live in cold only, and the fallback is the read path. pub fn get_block_with>( &self, block_root: &Hash256, decoder: impl FnOnce(&[u8]) -> Result, ssz::DecodeError>, ) -> Result>, Error> { - self.hot_db + if let Some(bytes) = self + .hot_db .get_bytes(DBColumn::BeaconBlock, block_root.as_slice())? - .map(|block_bytes| decoder(&block_bytes)) - .transpose() - .map_err(|e| e.into()) + { + return decoder(&bytes).map(Some).map_err(Into::into); + } + let Some(slot) = self + .cold_db + .get_index(DBColumnColdIndex::BlockSlot, *block_root)? + else { + return Ok(None); + }; + let bytes = self + .cold_db + .get(DBColumnCold::Block, slot)? + .ok_or(HotColdDBError::MissingFrozenBlock(slot))?; + decoder(&bytes).map(Some).map_err(Into::into) } pub fn get_payload_envelope( @@ -966,9 +984,23 @@ impl, Cold: ColdStore> HotColdDB } /// Determine whether a block exists in the database. + /// + /// Mirrors `get_block_with`: hot first, then cold via the `BlockSlot` + /// index. pub fn block_exists(&self, block_root: &Hash256) -> Result { - self.hot_db - .key_exists(DBColumn::BeaconBlock, block_root.as_slice()) + if self + .hot_db + .key_exists(DBColumn::BeaconBlock, block_root.as_slice())? + { + return Ok(true); + } + let Some(slot) = self + .cold_db + .get_index(DBColumnColdIndex::BlockSlot, *block_root)? + else { + return Ok(false); + }; + self.cold_db.contains(DBColumnCold::Block, slot) } /// Delete a block from the store and the block cache. @@ -3605,6 +3637,14 @@ pub fn migrate_database, Cold: ColdStore>( crate::config::ColdBackendKind::Static ); let mut cold_db_block_data: Vec<(Slot, Vec)> = vec![]; + // `block_root -> slot` index for blocks moved into the cold archive, + // committed after the slot-keyed bulk so a crash leaves no dangling index + // entry. Reads on hot miss go index -> bulk. + let mut cold_block_slot_index: Vec<(Hash256, Slot)> = vec![]; + // Hot block roots whose bytes are now durably in cold; deleted from hot + // after the cold index is committed. Hot keeps blocks at slot >= + // split.slot; cold owns slot < split.slot under Static. + let mut hot_block_delete_roots: Vec = vec![]; let mut last_seen_block_root: Option = None; // Iterate in descending order until the current split slot @@ -3638,6 +3678,8 @@ pub fn migrate_database, Cold: ColdStore>( )?; if block.slot() == *slot { cold_db_block_data.push((*slot, block_bytes)); + cold_block_slot_index.push((*block_root, *slot)); + hot_block_delete_roots.push(*block_root); } } } @@ -3713,6 +3755,11 @@ pub fn migrate_database, Cold: ColdStore>( DBColumnColdIndex::ColdStateSummary, cold_state_summary_index, )?; + if !cold_block_slot_index.is_empty() { + store + .cold_db + .put_index_batch(DBColumnColdIndex::BlockSlot, cold_block_slot_index)?; + } let new_split = { let mut split_guard = store.split.write(); let latest_split = *split_guard; @@ -3749,6 +3796,20 @@ pub fn migrate_database, Cold: ColdStore>( new_split }; + // Reclaim hot-DB space for blocks that are now durably in cold. Run AFTER + // the split commit: if we crash here, the next `get_block_with` for one + // of these roots will hit the cold fallback (BlockSlot index then Block + // bulk) and find them. A crash *before* this point is also safe — hot + // still has the bytes, and the next migration's idempotent re-puts cover + // any partial cold state. + if !hot_block_delete_roots.is_empty() { + let hot_delete_ops: Vec = hot_block_delete_roots + .into_iter() + .map(|root| KeyValueStoreOp::DeleteKey(DBColumn::BeaconBlock, root.as_slice().to_vec())) + .collect(); + store.hot_db.do_atomically(hot_delete_ops)?; + } + // Update the cache's view of the finalized state. store.update_finalized_state( finalized_state_root, diff --git a/beacon_node/store/src/invariants.rs b/beacon_node/store/src/invariants.rs index 49a7418c21c..fe2e3e802ca 100644 --- a/beacon_node/store/src/invariants.rs +++ b/beacon_node/store/src/invariants.rs @@ -607,10 +607,11 @@ impl, Cold: ColdStore> HotColdDB } let block_root = Hash256::from_slice(&root_bytes); - let block_exists = self - .hot_db - .key_exists(DBColumn::BeaconBlock, block_root.as_slice())?; - if !block_exists { + // Check both hot and (for Static cold) the cold archive via the + // BlockSlot index — under Static cold, finalized canonical + // blocks are deleted from hot once they're durable in cold, so a + // hot-only check would flag every migrated slot as an orphan. + if !self.block_exists(&block_root)? { result.add_violation(InvariantViolation::ColdBlockRootOrphan { slot, block_root }); } } diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index b756a423113..61b49a91dc8 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -146,6 +146,11 @@ impl DBColumnCold { /// Root-keyed indices owned by the cold backend. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum DBColumnColdIndex { + /// `block_root -> slot` for blocks moved into the cold archive. Populated + /// by `migrate_database` (Static cold) and era-file import. Empty under + /// KV cold. Consulted by `HotColdDB::get_block_with` to resolve root-keyed + /// reads against the slot-keyed cold archive. + BlockSlot, /// `state_root -> slot` for cold state summaries. ColdStateSummary, } @@ -153,6 +158,7 @@ pub enum DBColumnColdIndex { impl DBColumnColdIndex { pub fn db_column(self) -> DBColumn { match self { + Self::BlockSlot => DBColumn::BeaconBlockSlot, Self::ColdStateSummary => DBColumn::BeaconColdStateSummary, } } @@ -431,6 +437,12 @@ pub enum DBColumn { /// Can be removed once schema v22 is buried by a hard fork. #[strum(serialize = "bbr")] BeaconBlockRootsChunked, + /// `block_root -> slot` index for blocks moved into the cold archive. + /// Populated by `migrate_database` (Static cold) and era-file import. + /// Empty under KV cold. Consulted by `HotColdDB::get_block_with` to + /// resolve root-keyed reads against the slot-keyed cold archive. + #[strum(serialize = "bbs")] + BeaconBlockSlot, /// DEPRECATED. Can be removed once schema v22 is buried by a hard fork. #[strum(serialize = "bhr")] BeaconHistoricalRoots, @@ -486,6 +498,7 @@ impl DBColumn { Self::OverflowLRUCache => 33, // DEPRECATED Self::BeaconMeta | Self::BeaconBlock + | Self::BeaconBlockSlot | Self::BeaconState | Self::BeaconBlob | Self::BeaconStateSummary From a0d8ffbccba1295a7b002c832bc77f791396ce1d Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Sat, 9 May 2026 00:23:50 +0200 Subject: [PATCH 22/24] Refresh `lighthouse beacon_node` help snapshot for `--cold-backend` `make cli-local` after `e259a5157b` introduced `--cold-backend` without touching `book/src/help_bn.md`, so `cli-check` failed on every push. --- book/src/help_bn.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/book/src/help_bn.md b/book/src/help_bn.md index b580bcae528..d43bc44516e 100644 --- a/book/src/help_bn.md +++ b/book/src/help_bn.md @@ -71,6 +71,10 @@ Options: --checkpoint-sync-url-timeout Set the timeout for checkpoint sync calls to remote beacon node HTTP endpoint. [default: 180] + --cold-backend + Cold (freezer) DB backend. "kv" stores cold data in the same KV as the + hot DB. "static" stores cold data in slot-keyed static files; only + supported when starting from genesis. [possible values: kv, static] -d, --datadir Used to specify a custom root data directory for lighthouse keys and databases. Defaults to $HOME/.lighthouse/{network} where network is From 0381575296c12703fe1c7ee43ac3a40c8d8a8cc4 Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Sat, 9 May 2026 00:39:47 +0200 Subject: [PATCH 23/24] schema_stability: include `bbs` in expected DBColumn snapshot Re-added in `bbc3badfd2` (`BeaconBlockSlot`); the hardcoded snapshot in `check_db_columns` wasn't updated, so the test asserted on a stale list. --- beacon_node/beacon_chain/tests/schema_stability.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beacon_node/beacon_chain/tests/schema_stability.rs b/beacon_node/beacon_chain/tests/schema_stability.rs index 76e1a0804f6..5c277802fdc 100644 --- a/beacon_node/beacon_chain/tests/schema_stability.rs +++ b/beacon_node/beacon_chain/tests/schema_stability.rs @@ -107,7 +107,7 @@ fn check_db_columns() { let expected_columns = vec![ "bma", "blk", "blb", "bdc", "bdi", "ste", "hsd", "hsn", "bsn", "bsd", "bss", "bs3", "bcs", "bst", "exp", "pay", "bch", "opo", "etc", "frk", "pkc", "brp", "bsx", "bsr", "bbx", "bbr", - "bhr", "brm", "dht", "cus", "otb", "bhs", "olc", "lcu", "scb", "scm", "dmy", + "bbs", "bhr", "brm", "dht", "cus", "otb", "bhs", "olc", "lcu", "scb", "scm", "dmy", ]; assert_eq!(expected_columns, current_columns); } From 13c74f944207ddfb54e4f4c83f35d2e83df07f6b Mon Sep 17 00:00:00 2001 From: dapplion <35266934+dapplion@users.noreply.github.com> Date: Sun, 10 May 2026 01:02:47 +0200 Subject: [PATCH 24/24] static_cold: batch fsyncs in put_batch Replace the per-slot fsync loop in `put_batch` with one fsync per file: items are grouped by file_id, all records appended through a BufWriter, then a single sync_all for the data file, all offsets written, single sync_all for the offset file, and a single atomic config commit per batch. Same caller-visible "batch durable on return" contract. For an 8192-item batch (one ERA's worth of slot-keyed writes) this drops fsync count from ~32k (4 per slot) to ~3, with measured speedups between 155x and 775x per column on /mnt/ssd NVMe. Spec updated to reflect the batched semantics. --- beacon_node/store/src/static_cold.rs | 149 ++++++++++++++++++++++++++- specs/static-cold-backend.md | 11 ++ 2 files changed, 156 insertions(+), 4 deletions(-) diff --git a/beacon_node/store/src/static_cold.rs b/beacon_node/store/src/static_cold.rs index 82ba5b93e73..1da8f2219b8 100644 --- a/beacon_node/store/src/static_cold.rs +++ b/beacon_node/store/src/static_cold.rs @@ -426,6 +426,150 @@ impl Column { Ok(()) } + /// Append `items` to the column with one fsync per file (data + offset), + /// not per slot. Whole batch is durable on return — the same caller-visible + /// contract as `put` — but with O(1) syncs per underlying file instead of + /// O(n) per item. + /// + /// The implementation walks `items` once, grouping them by `file_id`. For + /// each group it opens the data file and offset file once, appends every + /// record's bytes (collecting `(slot, offset)` pairs in memory), writes the + /// offset table, fsyncs both files, then commits via `write_config`. Idempotent + /// re-put of `items[0]` at `highest_written_slot` is honored as in `put`. + fn put_batch(&self, items: Vec<(Slot, Vec)>) -> StoreResult<()> { + if items.is_empty() { + return Ok(()); + } + + // Validate ascending order up front (cheap, catches caller bugs). + for w in items.windows(2) { + if w[1].0 <= w[0].0 { + return Err(StaticColdStoreError::Invalid( + "static cold put_batch slots must be strictly ascending".into(), + )); + } + } + + let mut highest_written_slot = self.highest_written_slot.lock(); + let mut iter = items.into_iter().peekable(); + + // Idempotent re-put: if the first item is exactly highest_written_slot + // with matching bytes, drop it from the batch. + if let (Some(highest), Some((first_slot, _))) = (*highest_written_slot, iter.peek()) { + if *first_slot < highest { + return Err(StaticColdStoreError::Invalid( + "static cold put_batch out of order vs highest_written_slot".into(), + )); + } + if *first_slot == highest { + let (slot, value) = iter.next().expect("peeked"); + let existing = self.read_record(slot)?.ok_or_else(|| { + StaticColdStoreError::Invalid( + "static cold missing record at highest slot".into(), + ) + })?; + if existing != value { + return Err(StaticColdStoreError::Invalid( + "static cold re-put with mismatched value".into(), + )); + } + } + } + + // Group remaining items by file_id, write each group with a single + // fsync per file. + let mut last_slot: Option = None; + let mut last_data_len: u64 = 0; + while iter.peek().is_some() { + let target_file_id = file_id(iter.peek().expect("peeked").0); + let mut group: Vec<(Slot, Vec)> = Vec::new(); + while let Some(&(slot, _)) = iter.peek() { + if file_id(slot) != target_file_id { + break; + } + group.push(iter.next().expect("peeked")); + } + + let reset_file = (*highest_written_slot).map(file_id) != Some(target_file_id); + let data_path = self.data_path(target_file_id); + let off_path = self.offset_path(target_file_id); + + // Data file: append all records, then fsync once. + let mut data_file = OpenOptions::new() + .read(true) + .append(true) + .create(true) + .open(&data_path)?; + if reset_file { + data_file.set_len(0)?; + } + if data_file.metadata()?.len() == 0 { + data_file.write_all(&VERSION_RECORD)?; + } + // BufWriter coalesces the small-record header writes (8 bytes) and + // the small payloads into larger syscalls. + let mut offsets: Vec<(Slot, u64)> = Vec::with_capacity(group.len()); + { + let mut writer = std::io::BufWriter::with_capacity(1 << 20, &mut data_file); + let mut cursor = writer.get_ref().metadata()?.len(); + for (slot, value) in &group { + let payload: std::borrow::Cow<'_, [u8]> = if self.config.compression { + compress_record(value)?.into() + } else { + value.as_slice().into() + }; + let payload_len = u32::try_from(payload.len()).map_err(|_| { + StaticColdStoreError::Invalid("static cold record too large".into()) + })?; + offsets.push((*slot, cursor)); + // Inline `write_record` to avoid the `&mut File` -> BufWriter mismatch. + writer.write_all(&self.config.record_type)?; + writer.write_all(&payload_len.to_le_bytes())?; + writer.write_all(&0u16.to_le_bytes())?; + writer.write_all(&payload)?; + cursor += 8 + payload.len() as u64; + } + writer.flush()?; + } + let data_len = data_file.seek(SeekFrom::End(0))?; + data_file.sync_all()?; + + // Offset file: open, ensure full size, write all offsets in seek+write + // pairs (8 bytes each), then fsync once. + let mut off_file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(&off_path)?; + if reset_file { + off_file.set_len(0)?; + } + if off_file.metadata()?.len() < OFFSET_FILE_LEN { + off_file.set_len(OFFSET_FILE_LEN)?; + } + for (slot, offset) in &offsets { + off_file.seek(SeekFrom::Start(offset_position(*slot)))?; + off_file.write_all(&offset.to_le_bytes())?; + } + off_file.sync_all()?; + + // Track final slot/data_len for the single config commit at end of batch. + if let Some((s, _)) = group.last() { + last_slot = Some(*s); + last_data_len = data_len; + } + *highest_written_slot = last_slot; + } + + // Single atomic config commit covering the whole batch. + if let Some(s) = last_slot { + self.write_config(Some(s), last_data_len)?; + } + + Ok(()) + } + fn heal_current_file(&self, slot: Slot, current_data_len: u64) -> StoreResult<()> { let file_id = file_id(slot); let data_path = self.data_path(file_id); @@ -630,10 +774,7 @@ impl crate::ColdStore for StaticColdStore { } fn put_batch(&self, c: DBColumnCold, items: Vec<(Slot, Vec)>) -> Result<(), crate::Error> { - for (slot, value) in items { - self.put(c, slot, &value)?; - } - Ok(()) + self.columns[&c].put_batch(items).map_err(Into::into) } fn contains(&self, c: DBColumnCold, slot: Slot) -> Result { diff --git a/specs/static-cold-backend.md b/specs/static-cold-backend.md index 84add548ff3..f142b73011f 100644 --- a/specs/static-cold-backend.md +++ b/specs/static-cold-backend.md @@ -53,6 +53,17 @@ the same KV implementation Lighthouse uses for the main DB at `/index/` to serve these. Crash-safety rule: slot-keyed bulk data is committed before the matching root index entry, so a crash leaves cold data without a dangling index. +### `put_batch` durability and fsync semantics + +`put_batch(items)` is durable on return for the batch as a whole — the same +caller-visible contract as N×`put` — but it performs O(1) fsyncs per +underlying file regardless of batch size, instead of the 4 fsyncs per slot +that the per-item path issues (data file, offset file, config tmp, config +dir). Within a column, slots in `items` must be strictly ascending; items +that span multiple `file_id` boundaries are handled by grouping internally, +with one data fsync and one offset fsync per touched file plus a single +atomic config commit at the end of the batch. + ## Removed - `lighthouse db prune-states` and `HotColdDB::prune_historic_states`. They