diff --git a/.claude/state-cache-memory-tracking.md b/.claude/state-cache-memory-tracking.md new file mode 100644 index 00000000000..a4915878cfb --- /dev/null +++ b/.claude/state-cache-memory-tracking.md @@ -0,0 +1,122 @@ +# State Cache Memory Tracking + +## Problem + +The state cache needs to know how much memory cached states consume to enforce +a byte budget (`--state-cache-max-mb`) and avoid OOM. States share tree nodes +via milhouse COW — the marginal cost depends on which nodes are shared. + +### Prior art + +- **sigp/lighthouse#7803** — Full `MemoryTracker` walk over all cached states. + Rejected: 450ms+ per measurement at mainnet scale, holds cache mutex. +- **sigp/lighthouse#7449, #7450** — Tracking issues for cache size measurement. +- **Spec-derived estimation** (`estimated_marginal_bytes`) — O(1) heuristic from + spec knowledge. Implemented in this branch as a fallback, with 25 tests. Tight + at epoch boundary (1.04x) but loose mid-epoch (3x). No milhouse dependency. + +## Current design: ApproxOwnedBytes + cow_bytes + +### How it works + +Each `BeaconState` carries a `Vec>` — byte counts for +chunks of tree memory it owns. States that share ancestry (via clone) share the +same `Arc` entries. Total cache memory = sum of unique entries (deduplicated by +Arc pointer) across all cached states. + +Measurement uses milhouse's `cow_bytes` (PR sigp/milhouse#100): a pairwise tree +walk that compares two trees by `Arc::ptr_eq` at each node, skipping shared +subtrees. O(dirty_nodes) with zero allocations. + +### Two-layer approach + +**Fast path (every `put_state`):** Sum `ApproxOwnedBytesList` segments across all +states. Overcounts due to repeated mutations to the same tree path, but overcounting +is safe — it triggers eviction earlier, never too late. Cost: microseconds. + +**Slow path (on finalization):** Run `cow_bytes_between(finalized, state)` for every +cached state, replacing segments with exact measurements. Corrects accumulated +overcount. Cost: ~2ms for slot-only caches, ~225ms with epoch boundary states. + +### Three measurement points + +1. **Initial finalized state** — `total_state_tree_bytes()` walks all tree nodes + once. ~25ms at 1M validators. Happens once per finalization (~every 6 min). + +2. **State loaded from disk after rebase** — `cow_bytes_between(finalized, state)` + measures unique bytes vs finalized. O(dirty_nodes). + +3. **After block/slot processing** — `TreeSnapshot` clones pre-state (cheap Arc + bumps), then `cow_bytes_between(pre, post)` after transition. Pushed as a new + `ApproxOwnedBytes` entry. + +### Performance (benchmarked at 1M validators, MainnetEthSpec) + +| Operation | Time | +|-----------|------| +| cow_bytes slot transition | **541 ns** | +| cow_bytes epoch transition | **12.8 ms** | +| total_tree_bytes (initial) | **25.1 ms** | +| MemoryTracker (for comparison) | **458 ms** | + +### Eviction + +`put_state` checks `total_approx_owned_bytes()` against `max_bytes`. If over +budget, culls states by priority (advanced → old boundary → mid-epoch → good +boundary) until under budget. The total is recomputed each check by iterating +all cached states and deduplicating `ApproxOwnedBytes` entries — ~6400 pointer +comparisons, trivial. + +### Data flow + +``` +per_slot_processing / per_block_processing: + TreeSnapshot::new(state) ← cheap clone (Arc bumps) + ... process ... + snapshot.cow_bytes(state) ← O(dirty_nodes), ~541ns slot / ~12.8ms epoch + state.approx_owned_bytes.push(delta) + +rebase_on_finalized: + state.rebase_on(finalized) + cow_bytes_between(finalized, state) ← O(dirty_nodes) + state.approx_owned_bytes = finalized.approx_owned_bytes + unique + +update_finalized_state: + total_state_tree_bytes(state) ← O(all_nodes), ~25ms, once + state.approx_owned_bytes.push(base_size) + +put_state: + total = total_approx_owned_bytes() ← deduplicate Arc pointers + if total > max_bytes: cull(...) +``` + +## What's implemented + +- `ApproxOwnedBytes` / `ApproxOwnedBytesList` on `BeaconState` (all variants) +- `cow_bytes_between()`, `total_state_tree_bytes()` in `consensus/types` +- `TreeSnapshot` in `per_slot_processing` and `per_block_processing` +- `rebase_on_finalized` resets segments to finalized's + unique cost +- `update_finalized_state` measures base size for new finalized states +- `total_approx_owned_bytes()` on `StateCache` +- Eviction wired to `total_approx_owned_bytes()` in `put_state` +- `--state-cache-max-mb` CLI flag (default: None = count-based only) +- Metrics: `store_beacon_state_cache_cow_byte_size` gauge, + `store_beacon_state_cache_evictions_total` counter +- Debug tracing on finalized base size, rebase cow_bytes, eviction events +- `MemorySize` for `BeaconState` and all subtypes (from #7803) +- `estimated_marginal_bytes` fallback with 25 tests (not used for eviction) +- milhouse `cow_bytes` PR: sigp/milhouse#100 + +## What's not tracked + +- **Non-tree caches**: committee_caches (~30-60MB Arc-shared), pubkey_cache + (~100-150MB rpds), epoch_cache (~5MB Arc). Marginal cost ~0 when shared, + but the base finalized state's caches aren't measured. +- **Scalar fields**: fork, checkpoints, eth1_data. Small, fixed per state. + +## References + +- sigp/lighthouse#7449 — Measure state cache size +- sigp/lighthouse#7450 — Prune state cache based on size +- sigp/lighthouse#7803 — Memory Aware Caching (rejected) +- sigp/milhouse#100 — cow_bytes pairwise tree walk diff --git a/Cargo.lock b/Cargo.lock index 726929e9ec9..391fbf41428 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5749,8 +5749,7 @@ dependencies = [ [[package]] name = "milhouse" version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "259dd9da2ae5e0278b95da0b7ecef9c18c309d0a2d9e6db57ed33b9e8910c5e7" +source = "git+https://github.com/dapplion/milhouse.git?branch=cow-bytes#0b9ce4a7e00e9574ac838a5d56146608f10c21c8" dependencies = [ "alloy-primitives", "arbitrary", @@ -8469,6 +8468,7 @@ dependencies = [ "ethereum_ssz", "ethereum_ssz_derive", "fixed_bytes", + "genesis", "itertools 0.14.0", "leveldb", "logging", diff --git a/Cargo.toml b/Cargo.toml index db6853d44d8..a3be2af9e4c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -177,7 +177,7 @@ malloc_utils = { path = "common/malloc_utils" } maplit = "1" merkle_proof = { path = "consensus/merkle_proof" } metrics = { path = "common/metrics" } -milhouse = { version = "0.9", default-features = false, features = ["context_deserialize"] } +milhouse = { git = "https://github.com/dapplion/milhouse.git", branch = "cow-bytes", default-features = false, features = ["context_deserialize"] } mockall = "0.13" mockall_double = "0.3" mockito = "1.5.0" diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs index 61dccc9674d..20b6769021b 100644 --- a/beacon_node/src/cli.rs +++ b/beacon_node/src/cli.rs @@ -802,11 +802,22 @@ pub fn cli_app() -> Command { Arg::new("state-cache-size") .long("state-cache-size") .value_name("STATE_CACHE_SIZE") - .help("Specifies the size of the state cache") + .help("Specifies the maximum number of states in the state cache") .default_value("128") .action(ArgAction::Set) .display_order(0) ) + .arg( + Arg::new("state-cache-max-mb") + .long("state-cache-max-mb") + .value_name("STATE_CACHE_MAX_MB") + .help("Maximum memory budget for the state cache in megabytes. When set, the \ + cache evicts states to stay within this budget using estimated byte costs. \ + Epoch boundary states (~32MB each on mainnet) are deprioritized for \ + eviction. If unset, only count-based eviction is used.") + .action(ArgAction::Set) + .display_order(0) + ) /* * Execution Layer Integration */ diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index 0a52bcef06a..c66253d6eb9 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -366,6 +366,10 @@ pub fn get_config( .map_err(|_| "state-cache-size is not a valid integer".to_string())?; } + if let Some(max_mb) = clap_utils::parse_optional::(cli_args, "state-cache-max-mb")? { + client_config.store.state_cache_max_mb = Some(max_mb); + } + if let Some(historic_state_cache_size) = clap_utils::parse_optional(cli_args, "historic-state-cache-size")? { diff --git a/beacon_node/store/Cargo.toml b/beacon_node/store/Cargo.toml index 50028fe73ff..32961ff32ca 100644 --- a/beacon_node/store/Cargo.toml +++ b/beacon_node/store/Cargo.toml @@ -41,9 +41,14 @@ zstd = { workspace = true } [dev-dependencies] beacon_chain = { workspace = true } criterion = { workspace = true } +genesis = { workspace = true } rand = { workspace = true, features = ["small_rng"] } tempfile = { workspace = true } [[bench]] name = "hdiff" harness = false + +[[bench]] +name = "state_memory" +harness = false diff --git a/beacon_node/store/benches/state_memory.rs b/beacon_node/store/benches/state_memory.rs new file mode 100644 index 00000000000..44357482d0e --- /dev/null +++ b/beacon_node/store/benches/state_memory.rs @@ -0,0 +1,133 @@ +//! Benchmarks for state memory measurement using cow_bytes (pairwise tree walk). + +use criterion::{Criterion, criterion_group, criterion_main}; +use milhouse::{List, Vector}; +use ssz_types::BitVector; +use std::hint::black_box; +use std::sync::Arc; +use types::state::*; +use types::*; + +type E = MainnetEthSpec; + +fn make_state(n: usize) -> BeaconState { + let validator = Validator { + pubkey: bls::PublicKeyBytes::empty(), + withdrawal_credentials: Hash256::ZERO, + effective_balance: 32_000_000_000, + slashed: false, + activation_eligibility_epoch: Epoch::new(0), + activation_epoch: Epoch::new(0), + exit_epoch: Epoch::new(u64::MAX), + withdrawable_epoch: Epoch::new(u64::MAX), + }; + let validators = List::new(vec![validator; n]).unwrap(); + let balances = List::new(vec![32_000_000_000u64; n]).unwrap(); + let inactivity_scores = List::new(vec![0u64; n]).unwrap(); + let participation = List::new(vec![ParticipationFlags::default(); n]).unwrap(); + let default_cc = Arc::new(CommitteeCache::default()); + let sync = Arc::new(SyncCommittee::temporary()); + + BeaconState::Altair(BeaconStateAltair { + genesis_time: 0, + genesis_validators_root: Hash256::ZERO, + slot: Slot::new(0), + fork: Fork::default(), + latest_block_header: BeaconBlockHeader::empty(), + block_roots: Vector::default(), + state_roots: Vector::default(), + historical_roots: List::default(), + eth1_data: Eth1Data::default(), + eth1_data_votes: List::default(), + eth1_deposit_index: 0, + validators, + balances, + randao_mixes: Vector::default(), + slashings: Vector::default(), + previous_epoch_participation: participation.clone(), + current_epoch_participation: participation, + justification_bits: BitVector::new(), + previous_justified_checkpoint: Checkpoint::default(), + current_justified_checkpoint: Checkpoint::default(), + finalized_checkpoint: Checkpoint::default(), + inactivity_scores, + current_sync_committee: sync.clone(), + next_sync_committee: sync, + total_active_balance: None, + progressive_balances_cache: ProgressiveBalancesCache::default(), + committee_caches: [default_cc.clone(), default_cc.clone(), default_cc], + pubkey_cache: PubkeyCache::default(), + exit_cache: ExitCache::default(), + slashings_cache: SlashingsCache::default(), + epoch_cache: EpochCache::default(), + approx_owned_bytes: ApproxOwnedBytesList::default(), + }) +} + +fn make_slot_transition(base: &BeaconState, n: usize) -> BeaconState { + let mut post = base.clone(); + // 1 proposer reward + 128 participation + roots + randao + *post.balances_mut().get_mut(0).unwrap() += 1; + *post.state_roots_mut().get_mut(0).unwrap() = Hash256::repeat_byte(0x01); + *post.block_roots_mut().get_mut(0).unwrap() = Hash256::repeat_byte(0x02); + *post.randao_mixes_mut().get_mut(0).unwrap() = Hash256::repeat_byte(0x03); + for i in 0..128.min(n) { + post.current_epoch_participation_mut() + .unwrap() + .get_mut(i) + .unwrap() + .add_flag(0) + .unwrap(); + } + post.apply_pending_mutations().unwrap(); + post +} + +fn make_epoch_transition(base: &BeaconState, n: usize) -> BeaconState { + let mut post = base.clone(); + // All balances + inactivity + participation replaced + for i in 0..n { + *post.balances_mut().get_mut(i).unwrap() += 1; + } + for i in 0..n { + *post.inactivity_scores_mut().unwrap().get_mut(i).unwrap() += 1; + } + *post.previous_epoch_participation_mut().unwrap() = + List::new(vec![ParticipationFlags::default(); n]).unwrap(); + *post.current_epoch_participation_mut().unwrap() = + List::new(vec![ParticipationFlags::default(); n]).unwrap(); + post.apply_pending_mutations().unwrap(); + post +} + +fn bench_cow_bytes(c: &mut Criterion) { + let mut group = c.benchmark_group("cow_bytes"); + group.sample_size(10); + + for n in [1_000_000, 2_000_000] { + eprintln!("Building states with {n} validators..."); + let base = make_state(n); + + // Slot transition: few dirty nodes. + let post_slot = make_slot_transition(&base, n); + group.bench_function(format!("slot_transition_{n}"), |b| { + b.iter(|| black_box(cow_bytes_between(&base, &post_slot))); + }); + + // Epoch transition: many dirty nodes. + let post_epoch = make_epoch_transition(&base, n); + group.bench_function(format!("epoch_transition_{n}"), |b| { + b.iter(|| black_box(cow_bytes_between(&base, &post_epoch))); + }); + + // Total tree bytes (for initial finalized state). + group.bench_function(format!("total_tree_bytes_{n}"), |b| { + b.iter(|| black_box(total_state_tree_bytes(&base))); + }); + } + + group.finish(); +} + +criterion_group!(benches, bench_cow_bytes); +criterion_main!(benches); diff --git a/beacon_node/store/src/config.rs b/beacon_node/store/src/config.rs index 29705283fa9..9cadc29e2ad 100644 --- a/beacon_node/store/src/config.rs +++ b/beacon_node/store/src/config.rs @@ -29,6 +29,9 @@ pub const DEFAULT_HOT_HDIFF_BUFFER_CACHE_SIZE: NonZeroUsize = new_non_zero_usize const EST_COMPRESSION_FACTOR: usize = 2; pub const DEFAULT_EPOCHS_PER_BLOB_PRUNE: u64 = 1; pub const DEFAULT_BLOB_PUNE_MARGIN_EPOCHS: u64 = 0; +/// Default maximum memory budget for the state cache in megabytes. `None` means no byte-budget +/// limit (count-based eviction only, the previous behaviour). +pub const DEFAULT_STATE_CACHE_MAX_MB: Option = None; /// Database configuration parameters. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -64,6 +67,10 @@ pub struct StoreConfig { /// The margin for blob pruning in epochs. The oldest blobs are pruned up until /// data_availability_boundary - blob_prune_margin_epochs. Default: 0. pub blob_prune_margin_epochs: u64, + /// Maximum memory budget for the state cache in megabytes. When set, the cache will evict + /// states to stay within this budget using spec-derived byte cost estimates. `None` disables + /// byte-budget eviction (count-based only). + pub state_cache_max_mb: Option, } /// Variant of `StoreConfig` that gets written to disk. Contains immutable configuration params. @@ -120,6 +127,7 @@ impl Default for StoreConfig { prune_blobs: true, epochs_per_blob_prune: DEFAULT_EPOCHS_PER_BLOB_PRUNE, blob_prune_margin_epochs: DEFAULT_BLOB_PUNE_MARGIN_EPOCHS, + state_cache_max_mb: DEFAULT_STATE_CACHE_MAX_MB, } } } diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 78dd69e55a2..3ae6c5097e9 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -243,6 +243,7 @@ impl HotColdDB, MemoryStore> { config.state_cache_size, config.state_cache_headroom, config.hot_hdiff_buffer_cache_size, + config.state_cache_max_mb.map(|mb| mb * 1_048_576), )), historic_state_cache: Mutex::new(HistoricStateCache::new( config.cold_hdiff_buffer_cache_size, @@ -297,6 +298,7 @@ impl HotColdDB, BeaconNodeBackend> { config.state_cache_size, config.state_cache_headroom, config.hot_hdiff_buffer_cache_size, + config.state_cache_max_mb.map(|mb| mb * 1_048_576), )), historic_state_cache: Mutex::new(HistoricStateCache::new( config.cold_hdiff_buffer_cache_size, @@ -515,6 +517,10 @@ impl, Cold: ItemStore> HotColdDB &metrics::STORE_BEACON_STATE_CACHE_SIZE, state_cache.len() as i64, ); + metrics::set_gauge( + &metrics::STORE_BEACON_STATE_CACHE_COW_BYTE_SIZE, + state_cache.cached_bytes() as i64, + ); metrics::set_gauge_vec( &metrics::STORE_BEACON_HDIFF_BUFFER_CACHE_SIZE, HOT_METRIC, diff --git a/beacon_node/store/src/metrics.rs b/beacon_node/store/src/metrics.rs index 93c9840586e..73e0002d0eb 100644 --- a/beacon_node/store/src/metrics.rs +++ b/beacon_node/store/src/metrics.rs @@ -269,6 +269,36 @@ pub static STORE_BEACON_STATE_CACHE_SIZE: LazyLock> = LazyLock: "Current count of items in beacon store state cache", ) }); +pub static STORE_BEACON_STATE_CACHE_ESTIMATED_BYTE_SIZE: LazyLock> = + LazyLock::new(|| { + try_create_int_gauge( + "store_beacon_state_cache_estimated_byte_size", + "Estimated memory consumed by states in the state cache (bytes)", + ) + }); +pub static STORE_BEACON_STATE_CACHE_COW_BYTE_SIZE: LazyLock> = + LazyLock::new(|| { + try_create_int_gauge( + "store_beacon_state_cache_cow_byte_size", + "Total unique COW bytes across all cached states (from ApproxOwnedBytes)", + ) + }); +pub static STORE_BEACON_STATE_CACHE_EVICTIONS: LazyLock> = LazyLock::new(|| { + try_create_int_counter( + "store_beacon_state_cache_evictions_total", + "Total number of states evicted from the state cache due to byte budget", + ) +}); +pub static STORE_BEACON_STATE_CACHE_SEGMENT_COUNT: LazyLock> = + LazyLock::new(|| { + try_create_histogram_with_buckets( + "store_beacon_state_cache_segment_count", + "Number of ApproxOwnedBytes segments per cached state", + Ok(vec![ + 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0, + ]), + ) + }); pub static STORE_BEACON_HISTORIC_STATE_CACHE_SIZE: LazyLock> = LazyLock::new(|| { try_create_int_gauge( diff --git a/beacon_node/store/src/state_cache.rs b/beacon_node/store/src/state_cache.rs index d016922adeb..1ce2d54c158 100644 --- a/beacon_node/store/src/state_cache.rs +++ b/beacon_node/store/src/state_cache.rs @@ -38,14 +38,15 @@ pub struct SlotMap { #[derive(Debug)] pub struct StateCache { finalized_state: Option>, - // Stores the tuple (state_root, state) as LruCache only returns the value on put and we need - // the state_root + /// Stores (state_root, state) per cached state. states: LruCache)>, block_map: BlockMap, hdiff_buffers: HotHDiffBufferCache, max_epoch: Epoch, head_block_root: Hash256, headroom: NonZeroUsize, + /// Optional byte budget. When set, eviction triggers when total COW bytes exceed this. + max_bytes: Option, } /// Cache of hdiff buffers for hot states. @@ -83,6 +84,7 @@ impl StateCache { state_capacity: NonZeroUsize, headroom: NonZeroUsize, hdiff_capacity: NonZeroUsize, + max_bytes: Option, ) -> Self { StateCache { finalized_state: None, @@ -92,6 +94,7 @@ impl StateCache { max_epoch: Epoch::new(0), head_block_root: Hash256::ZERO, headroom, + max_bytes, } } @@ -111,6 +114,12 @@ impl StateCache { self.hdiff_buffers.mem_usage() } + /// Total bytes consumed by cached states, computed by deduplicating shared + /// `ApproxOwnedBytes` segments across all states (including finalized). + pub fn cached_bytes(&self) -> usize { + self.total_approx_owned_bytes() + } + /// Return all state roots currently held in the cache, including the finalized state. pub fn state_roots(&self) -> Vec { let mut roots: Vec = self @@ -128,7 +137,7 @@ impl StateCache { &mut self, state_root: Hash256, block_root: Hash256, - state: BeaconState, + mut state: BeaconState, pre_finalized_slots_to_retain: &[Slot], ) -> Result<(), Error> { if state.slot() % E::slots_per_epoch() != 0 { @@ -179,8 +188,27 @@ impl StateCache { } } + // Measure base size for states loaded from disk or genesis (empty list). + if state.approx_owned_bytes().0.is_empty() { + let base_bytes = types::total_state_tree_bytes(&state); + tracing::debug!( + base_bytes, + slot = %state.slot(), + validators = state.validators().len(), + "measured finalized state base tree size" + ); + state.approx_owned_bytes_mut().push(base_bytes); + } + // Update finalized state. self.finalized_state = Some(FinalizedState { state_root, state }); + + // NOTE: we do NOT recompute exact costs here because cached states still share + // tree nodes with the OLD finalized state, not this new one. cow_bytes_between + // against the new finalized would see completely different trees and overcount + // massively. The slow-path recomputation needs a mechanism to know which base + // each cached state actually shares with — a future improvement. + Ok(()) } @@ -209,6 +237,18 @@ impl StateCache { && state.slot() >= finalized_state.state.slot() { state.rebase_on(&finalized_state.state, spec)?; + + // After rebase, the state shares the finalized tree. Recompute owned bytes: + // adopt the finalized state's list + measure the remaining unique cost. + let unique_bytes = types::cow_bytes_between(&finalized_state.state, state); + tracing::debug!( + unique_bytes, + slot = %state.slot(), + "rebased state cow_bytes vs finalized" + ); + state + .approx_owned_bytes_mut() + .reset_to_base(finalized_state.state.approx_owned_bytes(), unique_bytes); } Ok(()) @@ -252,7 +292,7 @@ impl StateCache { // Update the cache's idea of the max epoch. self.max_epoch = std::cmp::max(state.current_epoch(), self.max_epoch); - // If the cache is full, use the custom cull routine to make room. + // If the cache is full (by count), use the custom cull routine to make room. let mut deleted_states = if let Some(over_capacity) = self.len().checked_sub(self.capacity()) { // The `over_capacity` should always be 0, but we add it here just in case. @@ -261,6 +301,38 @@ impl StateCache { vec![] }; + // Fast path: check byte budget using approximate segment-based total. + // This may overcount (segments accumulate from repeated mutations to the same + // path), but overcounting is safe — it triggers eviction earlier, never too late. + // The slow path in update_finalized_state corrects the overcount periodically. + if let Some(max_bytes) = self.max_bytes { + let total_before = self.total_approx_owned_bytes(); + let mut evicted = 0; + while self.total_approx_owned_bytes() > max_bytes && self.len() > 0 { + let culled = self.cull(1); + if culled.is_empty() { + break; + } + evicted += culled.len(); + deleted_states.extend(culled); + } + if evicted > 0 { + let total_after = self.total_approx_owned_bytes(); + tracing::debug!( + max_bytes, + total_before, + total_after, + evicted, + remaining = self.len(), + "state cache byte budget eviction" + ); + metrics::inc_counter_by( + &metrics::STORE_BEACON_STATE_CACHE_EVICTIONS, + evicted as u64, + ); + } + } + // Insert the full state into the cache. if let Some((deleted_state_root, _)) = self.states.put(state_root, (state_root, state.clone())) @@ -357,6 +429,36 @@ impl StateCache { } } + /// Compute the total unique COW bytes across all cached states. + /// + /// Iterates all states and deduplicates `CowSegment`s by `Arc` pointer identity. + /// Shared segments (from common ancestors) are counted once. + pub fn total_approx_owned_bytes(&self) -> usize { + // Record segment counts per state for observability. + if let Some(ref fin) = self.finalized_state { + metrics::observe( + &metrics::STORE_BEACON_STATE_CACHE_SEGMENT_COUNT, + fin.state.approx_owned_bytes().0.len() as f64, + ); + } + for (_, (_, state)) in self.states.iter() { + metrics::observe( + &metrics::STORE_BEACON_STATE_CACHE_SEGMENT_COUNT, + state.approx_owned_bytes().0.len() as f64, + ); + } + + let finalized = self + .finalized_state + .as_ref() + .map(|f| f.state.approx_owned_bytes()); + let cached = self + .states + .iter() + .map(|(_, (_, state))| state.approx_owned_bytes()); + types::sum_approx_owned_bytes(finalized.into_iter().chain(cached)) + } + /// Cull approximately `count` states from the cache. /// /// States are culled LRU, with the following extra order imposed: @@ -546,3 +648,256 @@ impl HotHDiffBufferCache { .sum() } } + +#[cfg(test)] +mod tests { + use super::*; + use milhouse::List; + use ssz_types::BitVector; + use std::num::NonZeroUsize; + use std::sync::Arc; + use types::state::*; + use types::*; + + type E = MinimalEthSpec; + + fn make_test_validator() -> Validator { + Validator { + pubkey: bls::PublicKeyBytes::empty(), + withdrawal_credentials: Hash256::ZERO, + effective_balance: 32_000_000_000, + slashed: false, + activation_eligibility_epoch: Epoch::new(0), + activation_epoch: Epoch::new(0), + exit_epoch: Epoch::new(u64::MAX), + withdrawable_epoch: Epoch::new(u64::MAX), + } + } + + fn make_altair_state(n: usize, slot: Slot) -> BeaconState { + let validators = List::new(vec![make_test_validator(); n]).unwrap(); + let balances = List::new(vec![32_000_000_000u64; n]).unwrap(); + let inactivity_scores = List::new(vec![0u64; n]).unwrap(); + let participation = List::new(vec![ParticipationFlags::default(); n]).unwrap(); + let default_cc = Arc::new(CommitteeCache::default()); + let sync = Arc::new(SyncCommittee::temporary()); + + BeaconState::Altair(BeaconStateAltair { + genesis_time: 0, + genesis_validators_root: Hash256::ZERO, + slot, + fork: Fork::default(), + latest_block_header: BeaconBlockHeader::empty(), + block_roots: milhouse::Vector::default(), + state_roots: milhouse::Vector::default(), + historical_roots: List::default(), + eth1_data: Eth1Data::default(), + eth1_data_votes: List::default(), + eth1_deposit_index: 0, + validators, + balances, + randao_mixes: milhouse::Vector::default(), + slashings: milhouse::Vector::default(), + previous_epoch_participation: participation.clone(), + current_epoch_participation: participation, + justification_bits: BitVector::new(), + previous_justified_checkpoint: Checkpoint::default(), + current_justified_checkpoint: Checkpoint::default(), + finalized_checkpoint: Checkpoint::default(), + inactivity_scores, + current_sync_committee: sync.clone(), + next_sync_committee: sync, + total_active_balance: None, + progressive_balances_cache: ProgressiveBalancesCache::default(), + committee_caches: [default_cc.clone(), default_cc.clone(), default_cc], + pubkey_cache: PubkeyCache::default(), + exit_cache: ExitCache::default(), + slashings_cache: SlashingsCache::default(), + epoch_cache: EpochCache::default(), + approx_owned_bytes: ApproxOwnedBytesList::default(), + }) + } + + fn hash(byte: u8) -> Hash256 { + Hash256::repeat_byte(byte) + } + + fn new_cache(capacity: usize, max_bytes: Option) -> StateCache { + StateCache::new( + NonZeroUsize::new(capacity).unwrap(), + NonZeroUsize::new(1).unwrap(), + NonZeroUsize::new(1).unwrap(), + max_bytes, + ) + } + // ── cow_bytes_between tests ────────────────────────────────────────── + + #[test] + fn cow_bytes_clone_is_zero() { + let state = make_altair_state(256, Slot::new(1)); + let clone = state.clone(); + assert_eq!(cow_bytes_between(&state, &clone), 0); + } + + #[test] + fn cow_bytes_single_mutation() { + let base = make_altair_state(256, Slot::new(1)); + let mut derived = base.clone(); + *derived.balances_mut().get_mut(0).unwrap() += 1; + derived.apply_pending_mutations().unwrap(); + + let cow = cow_bytes_between(&base, &derived); + assert!(cow > 0, "single mutation should produce non-zero cow_bytes"); + } + + #[test] + fn cow_bytes_epoch_boundary_mutations() { + let n = 256; + let base = make_altair_state(n, Slot::new(8)); + let mut derived = base.clone(); + + // Simulate epoch: all balances + inactivity + participation replaced + for i in 0..n { + *derived.balances_mut().get_mut(i).unwrap() += 1; + } + for i in 0..n { + *derived.inactivity_scores_mut().unwrap().get_mut(i).unwrap() += 1; + } + *derived.previous_epoch_participation_mut().unwrap() = + List::new(vec![ParticipationFlags::default(); n]).unwrap(); + *derived.current_epoch_participation_mut().unwrap() = + List::new(vec![ParticipationFlags::default(); n]).unwrap(); + derived.apply_pending_mutations().unwrap(); + + let cow = cow_bytes_between(&base, &derived); + // Should be substantial — most of the tree is dirty + assert!( + cow > 10_000, + "epoch boundary should produce significant cow_bytes: {cow}" + ); + } + + // ── total_state_tree_bytes tests ────────────────────────────────────── + + #[test] + fn total_tree_bytes_nonzero() { + let state = make_altair_state(256, Slot::new(0)); + let total = total_state_tree_bytes(&state); + // 256 validators × various fields, should be in the tens of KB + assert!(total > 10_000, "total tree bytes should be > 10KB: {total}"); + } + + #[test] + fn total_tree_bytes_scales_with_validators() { + let small = total_state_tree_bytes(&make_altair_state(64, Slot::new(0))); + let large = total_state_tree_bytes(&make_altair_state(1024, Slot::new(0))); + assert!( + large > small * 4, + "1024 validators should be > 4x of 64: small={small}, large={large}" + ); + } + + // ── ApproxOwnedBytesList deduplication tests ────────────────────────── + + #[test] + fn approx_owned_bytes_dedup_across_clones() { + let mut base = ApproxOwnedBytesList::default(); + base.push(1000); + + let mut s1 = base.clone(); + s1.push(100); + + let mut s2 = base.clone(); + s2.push(200); + + // Unique segments: base(1000) + s1(100) + s2(200) = 1300 + let total = sum_approx_owned_bytes([&base, &s1, &s2].into_iter()); + assert_eq!(total, 1300); + } + + // ── StateCache integration tests ────────────────────────────────────── + + #[test] + fn finalized_state_gets_base_size() { + let mut cache = new_cache(10, None); + let state = make_altair_state(256, Slot::new(0)); + let state_root = hash(1); + + cache + .update_finalized_state(state_root, hash(2), state, &[]) + .unwrap(); + + let total = cache.total_approx_owned_bytes(); + assert!( + total > 0, + "finalized state should have non-zero total: {total}" + ); + } + + #[test] + fn put_state_adds_to_total() { + let mut cache = new_cache(10, None); + + // Set finalized + let fin = make_altair_state(64, Slot::new(0)); + cache + .update_finalized_state(hash(1), hash(2), fin, &[]) + .unwrap(); + cache.update_head_block_root(hash(10)); + + let total_before = cache.total_approx_owned_bytes(); + + // Insert a state with some COW mutations + let mut state = cache.get_by_state_root(hash(1)).unwrap(); + *state.slot_mut() = Slot::new(1); + *state.balances_mut().get_mut(0).unwrap() += 1; + state.apply_pending_mutations().unwrap(); + // Push a cow segment to simulate what per_slot_processing does + let cow = cow_bytes_between(&cache.get_by_state_root(hash(1)).unwrap(), &state); + state.approx_owned_bytes_mut().push(cow); + + cache.put_state(hash(3), hash(10), &state).unwrap(); + + let total_after = cache.total_approx_owned_bytes(); + assert!( + total_after >= total_before, + "total should not decrease after adding state: before={total_before}, after={total_after}" + ); + } + + #[test] + fn byte_budget_eviction() { + let fin = make_altair_state(64, Slot::new(0)); + let base_size = total_state_tree_bytes(&fin); + + // Set a very tight budget: just the finalized base. Any inserted state should + // trigger eviction attempts. + let mut cache = new_cache(10, Some(base_size)); + cache + .update_finalized_state(hash(1), hash(2), fin, &[]) + .unwrap(); + cache.update_head_block_root(hash(99)); + + // Insert 5 states with different block roots (not head, so evictable) + for i in 0u8..5 { + let mut state = cache.get_by_state_root(hash(1)).unwrap(); + *state.slot_mut() = Slot::new(i as u64 + 1); + *state.balances_mut().get_mut(i as usize).unwrap() += 1; + state.apply_pending_mutations().unwrap(); + let cow = cow_bytes_between(&cache.get_by_state_root(hash(1)).unwrap(), &state); + state.approx_owned_bytes_mut().push(cow); + + cache + .put_state(hash(100 + i), hash(10 + i), &state) + .unwrap(); + } + + // With a budget equal to base_size, the cache should have evicted most states. + // It may keep 1-2 (exempt), but not all 5. + assert!( + cache.len() < 5, + "eviction should have removed some states, but cache has {} states", + cache.len() + ); + } +} diff --git a/consensus/state_processing/src/per_block_processing.rs b/consensus/state_processing/src/per_block_processing.rs index 5aa610e98ea..123ce50b0d1 100644 --- a/consensus/state_processing/src/per_block_processing.rs +++ b/consensus/state_processing/src/per_block_processing.rs @@ -117,6 +117,9 @@ pub fn per_block_processing>( ctxt: &mut ConsensusContext, spec: &ChainSpec, ) -> Result<(), BlockProcessingError> { + // Snapshot state before mutations for COW tracking. + let pre = state.clone(); + let block = signed_block.message(); // Verify that the `SignedBeaconBlock` instantiation matches the fork at `signed_block.slot()`. @@ -215,6 +218,10 @@ pub fn per_block_processing>( update_progressive_balances_metrics(state.progressive_balances_cache())?; } + // Record COW bytes from this block transition. + let delta = cow_bytes_between(&pre, state); + state.approx_owned_bytes_mut().push(delta); + Ok(()) } diff --git a/consensus/state_processing/src/per_slot_processing.rs b/consensus/state_processing/src/per_slot_processing.rs index f26ea567a26..3c42e304549 100644 --- a/consensus/state_processing/src/per_slot_processing.rs +++ b/consensus/state_processing/src/per_slot_processing.rs @@ -45,6 +45,9 @@ pub fn per_slot_processing( .fork_name(spec) .map_err(Error::InconsistentStateFork)?; + // Snapshot state before mutations for COW tracking. + let pre = state.clone(); + cache_state(state, state_root)?; let summary = if state.slot() > spec.genesis_slot @@ -109,6 +112,10 @@ pub fn per_slot_processing( state.build_caches(spec)?; } + // Record COW bytes from this slot transition. + let delta = cow_bytes_between(&pre, state); + state.approx_owned_bytes_mut().push(delta); + Ok(summary) } diff --git a/consensus/state_processing/src/upgrade/altair.rs b/consensus/state_processing/src/upgrade/altair.rs index 022175ff999..ae934d8d787 100644 --- a/consensus/state_processing/src/upgrade/altair.rs +++ b/consensus/state_processing/src/upgrade/altair.rs @@ -111,6 +111,7 @@ pub fn upgrade_to_altair( exit_cache: mem::take(&mut pre.exit_cache), slashings_cache: mem::take(&mut pre.slashings_cache), epoch_cache: EpochCache::default(), + approx_owned_bytes: mem::take(&mut pre.approx_owned_bytes), }); // Fill in previous epoch participation from the pre state's pending attestations. diff --git a/consensus/state_processing/src/upgrade/bellatrix.rs b/consensus/state_processing/src/upgrade/bellatrix.rs index f23e571cd12..c292d4fb316 100644 --- a/consensus/state_processing/src/upgrade/bellatrix.rs +++ b/consensus/state_processing/src/upgrade/bellatrix.rs @@ -66,6 +66,7 @@ pub fn upgrade_to_bellatrix( exit_cache: mem::take(&mut pre.exit_cache), slashings_cache: mem::take(&mut pre.slashings_cache), epoch_cache: EpochCache::default(), + approx_owned_bytes: mem::take(&mut pre.approx_owned_bytes), }); *pre_state = post; diff --git a/consensus/state_processing/src/upgrade/capella.rs b/consensus/state_processing/src/upgrade/capella.rs index 948fa511b73..99cc8cc5a7c 100644 --- a/consensus/state_processing/src/upgrade/capella.rs +++ b/consensus/state_processing/src/upgrade/capella.rs @@ -71,6 +71,7 @@ pub fn upgrade_to_capella( exit_cache: mem::take(&mut pre.exit_cache), slashings_cache: mem::take(&mut pre.slashings_cache), epoch_cache: EpochCache::default(), + approx_owned_bytes: mem::take(&mut pre.approx_owned_bytes), }); *pre_state = post; diff --git a/consensus/state_processing/src/upgrade/deneb.rs b/consensus/state_processing/src/upgrade/deneb.rs index c21e1361a5a..dc26c8fe244 100644 --- a/consensus/state_processing/src/upgrade/deneb.rs +++ b/consensus/state_processing/src/upgrade/deneb.rs @@ -71,6 +71,7 @@ pub fn upgrade_to_deneb( exit_cache: mem::take(&mut pre.exit_cache), slashings_cache: mem::take(&mut pre.slashings_cache), epoch_cache: EpochCache::default(), + approx_owned_bytes: mem::take(&mut pre.approx_owned_bytes), }); *pre_state = post; diff --git a/consensus/state_processing/src/upgrade/electra.rs b/consensus/state_processing/src/upgrade/electra.rs index 258b28a45bd..f74a764cae3 100644 --- a/consensus/state_processing/src/upgrade/electra.rs +++ b/consensus/state_processing/src/upgrade/electra.rs @@ -168,6 +168,7 @@ pub fn upgrade_state_to_electra( exit_cache: mem::take(&mut pre.exit_cache), slashings_cache: mem::take(&mut pre.slashings_cache), epoch_cache: EpochCache::default(), + approx_owned_bytes: mem::take(&mut pre.approx_owned_bytes), }); Ok(post) } diff --git a/consensus/state_processing/src/upgrade/fulu.rs b/consensus/state_processing/src/upgrade/fulu.rs index c14c1edbec3..19e4ce44725 100644 --- a/consensus/state_processing/src/upgrade/fulu.rs +++ b/consensus/state_processing/src/upgrade/fulu.rs @@ -110,6 +110,7 @@ pub fn upgrade_state_to_fulu( exit_cache: mem::take(&mut pre.exit_cache), slashings_cache: mem::take(&mut pre.slashings_cache), epoch_cache: mem::take(&mut pre.epoch_cache), + approx_owned_bytes: mem::take(&mut pre.approx_owned_bytes), proposer_lookahead, }); Ok(post) diff --git a/consensus/state_processing/src/upgrade/gloas.rs b/consensus/state_processing/src/upgrade/gloas.rs index b39ee6048f7..764077b96fe 100644 --- a/consensus/state_processing/src/upgrade/gloas.rs +++ b/consensus/state_processing/src/upgrade/gloas.rs @@ -117,6 +117,7 @@ pub fn upgrade_state_to_gloas( exit_cache: mem::take(&mut pre.exit_cache), slashings_cache: mem::take(&mut pre.slashings_cache), epoch_cache: mem::take(&mut pre.epoch_cache), + approx_owned_bytes: mem::take(&mut pre.approx_owned_bytes), }); // [New in Gloas:EIP7732] onboard_builders_from_pending_deposits(&mut post, spec)?; diff --git a/consensus/types/src/state/approx_owned_bytes.rs b/consensus/types/src/state/approx_owned_bytes.rs new file mode 100644 index 00000000000..05dce36ee96 --- /dev/null +++ b/consensus/types/src/state/approx_owned_bytes.rs @@ -0,0 +1,269 @@ +use crate::core::EthSpec; +use crate::state::BeaconState; +use std::collections::HashSet; +use std::sync::Arc; + +/// Approximate bytes of tree memory owned by a group of states at a specific point — +/// either the base tree of a state loaded from disk, or the new COW nodes produced +/// by a state transition. +/// +/// Identity is by `Arc` pointer — states sharing the same `Arc` +/// inherited it from a common ancestor via clone. +#[derive(Debug)] +pub struct ApproxOwnedBytes { + pub bytes: usize, +} + +/// List of `ApproxOwnedBytes` carried on each `BeaconState`. +/// +/// Each entry is a chunk of tree memory: the base tree (for states loaded from disk) +/// or new nodes from a transition. States that share ancestry share the same `Arc` +/// entries — clone copies the `Vec` but shares all `Arc` pointers. +/// +/// `PartialEq` always returns true — memory tracking is not consensus-relevant. +#[derive(Clone, Debug, Default)] +pub struct ApproxOwnedBytesList(pub Vec>); + +impl PartialEq for ApproxOwnedBytesList { + fn eq(&self, _other: &Self) -> bool { + true + } +} + +impl ApproxOwnedBytesList { + pub fn push(&mut self, bytes: usize) { + if bytes > 0 { + self.0.push(Arc::new(ApproxOwnedBytes { bytes })); + } + } + + /// Replace with a base state's list plus an optional entry for unique bytes. + /// + /// Used after `rebase_on` to adopt the finalized state's entries and add the + /// remaining unique cost. + pub fn reset_to_base(&mut self, base: &ApproxOwnedBytesList, unique_bytes: usize) { + self.0 = base.0.clone(); + if unique_bytes > 0 { + self.0.push(Arc::new(ApproxOwnedBytes { + bytes: unique_bytes, + })); + } + } +} + +/// Sum the unique `ApproxOwnedBytes` across multiple states. +/// +/// Deduplicates by `Arc` pointer identity — shared entries are counted once. +pub fn sum_approx_owned_bytes<'a>(states: impl Iterator) -> usize { + let mut seen = HashSet::new(); + let mut total: usize = 0; + for list in states { + for entry in &list.0 { + if seen.insert(Arc::as_ptr(entry)) { + total = total.saturating_add(entry.bytes); + } + } + } + total +} + +/// Compute the COW bytes between two states across all tree-backed fields and caches. +/// +/// IMPORTANT: this list must be kept in sync with `BeaconState::rebase_on` which uses +/// `bimap_beacon_state_*_tree_list_fields!` macros. When a new fork adds a tree-backed +/// field, add it here too. +/// +/// NOTE: milhouse's `cow_bytes` uses `size_of::()` for leaf data, which only counts +/// stack size. If a future leaf type has heap allocations (Vec, String, etc.), they won't +/// be counted. All current beacon state leaf types are fully inline, so this is correct today. +#[allow(clippy::arithmetic_side_effects)] +pub fn cow_bytes_between(base: &BeaconState, derived: &BeaconState) -> usize { + let mut total: usize = 0; + + // Tree-backed fields (common to all forks). + total = total.saturating_add(derived.validators().cow_bytes(base.validators())); + total = total.saturating_add(derived.balances().cow_bytes(base.balances())); + total = total.saturating_add(derived.state_roots().cow_bytes(base.state_roots())); + total = total.saturating_add(derived.block_roots().cow_bytes(base.block_roots())); + total = total.saturating_add(derived.randao_mixes().cow_bytes(base.randao_mixes())); + total = total.saturating_add(derived.slashings().cow_bytes(base.slashings())); + total = total.saturating_add(derived.eth1_data_votes().cow_bytes(base.eth1_data_votes())); + total = total.saturating_add( + derived + .historical_roots() + .cow_bytes(base.historical_roots()), + ); + + // Altair+ fields. + if let (Ok(d), Ok(b)) = (derived.inactivity_scores(), base.inactivity_scores()) { + total = total.saturating_add(d.cow_bytes(b)); + } + if let (Ok(d), Ok(b)) = ( + derived.previous_epoch_participation(), + base.previous_epoch_participation(), + ) { + total = total.saturating_add(d.cow_bytes(b)); + } + if let (Ok(d), Ok(b)) = ( + derived.current_epoch_participation(), + base.current_epoch_participation(), + ) { + total = total.saturating_add(d.cow_bytes(b)); + } + + // Capella+ fields. + if let (Ok(d), Ok(b)) = (derived.historical_summaries(), base.historical_summaries()) { + total = total.saturating_add(d.cow_bytes(b)); + } + + // Electra+ fields. + if let (Ok(d), Ok(b)) = (derived.pending_deposits(), base.pending_deposits()) { + total = total.saturating_add(d.cow_bytes(b)); + } + if let (Ok(d), Ok(b)) = ( + derived.pending_partial_withdrawals(), + base.pending_partial_withdrawals(), + ) { + total = total.saturating_add(d.cow_bytes(b)); + } + if let (Ok(d), Ok(b)) = ( + derived.pending_consolidations(), + base.pending_consolidations(), + ) { + total = total.saturating_add(d.cow_bytes(b)); + } + + // Caches: count as COW if they point to different Arc allocations. + for (d, b) in derived + .committee_caches() + .iter() + .zip(base.committee_caches()) + { + if !Arc::ptr_eq(d, b) { + total = total.saturating_add(d.approx_heap_bytes()); + } + } + if let (Ok(d), Ok(b)) = ( + derived.current_sync_committee(), + base.current_sync_committee(), + ) && !Arc::ptr_eq(d, b) + { + total = total.saturating_add(std::mem::size_of_val(&**d)); + } + if let (Ok(d), Ok(b)) = (derived.next_sync_committee(), base.next_sync_committee()) + && !Arc::ptr_eq(d, b) + { + total = total.saturating_add(std::mem::size_of_val(&**d)); + } + + total +} + +/// Compute the total bytes for a state's tree-backed fields and caches (no sharing). +/// +/// IMPORTANT: must be kept in sync with `cow_bytes_between`. +#[allow(clippy::arithmetic_side_effects)] +pub fn total_state_tree_bytes(state: &BeaconState) -> usize { + let mut total: usize = 0; + + // Tree-backed fields. + total = total.saturating_add(state.validators().total_tree_bytes()); + total = total.saturating_add(state.balances().total_tree_bytes()); + total = total.saturating_add(state.state_roots().total_tree_bytes()); + total = total.saturating_add(state.block_roots().total_tree_bytes()); + total = total.saturating_add(state.randao_mixes().total_tree_bytes()); + total = total.saturating_add(state.slashings().total_tree_bytes()); + total = total.saturating_add(state.eth1_data_votes().total_tree_bytes()); + total = total.saturating_add(state.historical_roots().total_tree_bytes()); + + if let Ok(f) = state.inactivity_scores() { + total = total.saturating_add(f.total_tree_bytes()); + } + if let Ok(f) = state.previous_epoch_participation() { + total = total.saturating_add(f.total_tree_bytes()); + } + if let Ok(f) = state.current_epoch_participation() { + total = total.saturating_add(f.total_tree_bytes()); + } + if let Ok(f) = state.historical_summaries() { + total = total.saturating_add(f.total_tree_bytes()); + } + if let Ok(f) = state.pending_deposits() { + total = total.saturating_add(f.total_tree_bytes()); + } + if let Ok(f) = state.pending_partial_withdrawals() { + total = total.saturating_add(f.total_tree_bytes()); + } + if let Ok(f) = state.pending_consolidations() { + total = total.saturating_add(f.total_tree_bytes()); + } + + // Caches. + for cc in state.committee_caches() { + total = total.saturating_add(cc.approx_heap_bytes()); + } + if let Ok(sc) = state.current_sync_committee() { + total = total.saturating_add(std::mem::size_of_val(&**sc)); + } + if let Ok(sc) = state.next_sync_committee() { + total = total.saturating_add(std::mem::size_of_val(&**sc)); + } + + total +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn star_topology() { + let mut base = ApproxOwnedBytesList::default(); + base.push(500); + + let mut s1 = base.clone(); + s1.push(50); + + let mut s2 = base.clone(); + s2.push(80); + + assert_eq!(sum_approx_owned_bytes([&base, &s1, &s2].into_iter()), 630); + assert_eq!(sum_approx_owned_bytes([&s1, &s2].into_iter()), 630); + } + + #[test] + fn chain_topology() { + let mut f = ApproxOwnedBytesList::default(); + f.push(500); + + let mut a = f.clone(); + a.push(50); + + let mut b = a.clone(); + b.push(30); + + assert_eq!(sum_approx_owned_bytes([&f, &a, &b].into_iter()), 580); + assert_eq!(sum_approx_owned_bytes([&b].into_iter()), 580); + } + + #[test] + fn rebase_resets() { + let mut f = ApproxOwnedBytesList::default(); + f.push(500); + + let mut s = ApproxOwnedBytesList::default(); + s.push(999); + s.push(10); + + s.reset_to_base(&f, 80); + + assert_eq!(sum_approx_owned_bytes([&f, &s].into_iter()), 580); + } + + #[test] + fn zero_bytes_not_pushed() { + let mut s = ApproxOwnedBytesList::default(); + s.push(0); + assert!(s.0.is_empty()); + } +} diff --git a/consensus/types/src/state/beacon_state.rs b/consensus/types/src/state/beacon_state.rs index a033272b9d9..8cffcd23a90 100644 --- a/consensus/types/src/state/beacon_state.rs +++ b/consensus/types/src/state/beacon_state.rs @@ -45,9 +45,9 @@ use crate::{ FINALIZED_ROOT_INDEX_ELECTRA, NEXT_SYNC_COMMITTEE_INDEX, NEXT_SYNC_COMMITTEE_INDEX_ELECTRA, }, state::{ - BlockRootsIter, CommitteeCache, EpochCache, EpochCacheError, ExitCache, HistoricalBatch, - HistoricalSummary, ProgressiveBalancesCache, PubkeyCache, SlashingsCache, - get_active_validator_indices, + ApproxOwnedBytesList, BlockRootsIter, CommitteeCache, EpochCache, EpochCacheError, + ExitCache, HistoricalBatch, HistoricalSummary, ProgressiveBalancesCache, PubkeyCache, + SlashingsCache, get_active_validator_indices, }, sync_committee::{SyncCommittee, SyncDuty}, test_utils::TestRandom, @@ -716,6 +716,14 @@ where #[test_random(default)] #[metastruct(exclude)] pub epoch_cache: EpochCache, + /// COW memory tracking. Each entry is a segment of tree memory — the base tree + /// size or the COW bytes from a transition. Shared via `Arc` with cloned states. + #[serde(skip_serializing, skip_deserializing)] + #[ssz(skip_serializing, skip_deserializing)] + #[tree_hash(skip_hashing)] + #[test_random(default)] + #[metastruct(exclude)] + pub approx_owned_bytes: ApproxOwnedBytesList, } impl BeaconState { @@ -778,6 +786,7 @@ impl BeaconState { exit_cache: ExitCache::default(), slashings_cache: SlashingsCache::default(), epoch_cache: EpochCache::default(), + approx_owned_bytes: ApproxOwnedBytesList::default(), }) } diff --git a/consensus/types/src/state/committee_cache.rs b/consensus/types/src/state/committee_cache.rs index 2e74ab760cb..0bce50d960f 100644 --- a/consensus/types/src/state/committee_cache.rs +++ b/consensus/types/src/state/committee_cache.rs @@ -60,6 +60,18 @@ fn compare_shuffling_positions(xs: &Vec, ys: &Vec usize { + self.shuffling + .capacity() + .saturating_mul(std::mem::size_of::()) + .saturating_add( + self.shuffling_positions + .capacity() + .saturating_mul(std::mem::size_of::()), + ) + } + /// Return a new, fully initialized cache. /// /// The epoch must be within the range that the state can service: historic epochs with diff --git a/consensus/types/src/state/epoch_cache.rs b/consensus/types/src/state/epoch_cache.rs index cdea0d143df..f9f1b54b9ed 100644 --- a/consensus/types/src/state/epoch_cache.rs +++ b/consensus/types/src/state/epoch_cache.rs @@ -73,6 +73,25 @@ impl From for EpochCacheError { } impl EpochCache { + /// Approximate heap bytes consumed by this cache. + pub fn approx_heap_bytes(&self) -> usize { + self.inner + .as_ref() + .map(|inner| { + inner + .effective_balances + .capacity() + .saturating_mul(std::mem::size_of::()) + .saturating_add( + inner + .base_rewards + .capacity() + .saturating_mul(std::mem::size_of::()), + ) + }) + .unwrap_or(0) + } + pub fn new( key: EpochCacheKey, effective_balances: Vec, diff --git a/consensus/types/src/state/mod.rs b/consensus/types/src/state/mod.rs index a3bb1b8c9f0..26978bc4666 100644 --- a/consensus/types/src/state/mod.rs +++ b/consensus/types/src/state/mod.rs @@ -3,6 +3,7 @@ mod balance; mod beacon_state; #[macro_use] mod committee_cache; +mod approx_owned_bytes; mod epoch_cache; mod exit_cache; mod historical_batch; @@ -13,6 +14,10 @@ mod pubkey_cache; mod slashings_cache; pub use activation_queue::ActivationQueue; +pub use approx_owned_bytes::{ + ApproxOwnedBytes, ApproxOwnedBytesList, cow_bytes_between, sum_approx_owned_bytes, + total_state_tree_bytes, +}; pub use balance::Balance; pub use beacon_state::{ BeaconState, BeaconStateAltair, BeaconStateBase, BeaconStateBellatrix, BeaconStateCapella,