diff --git a/Cargo.lock b/Cargo.lock index ce2f3500..f7b6df2c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -365,7 +365,6 @@ dependencies = [ "iddqd-test-utils", "proptest", "ref-cast", - "rustc-hash", "schemars", "serde", "serde_core", @@ -710,12 +709,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "rustc-hash" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" - [[package]] name = "rustix" version = "0.38.44" diff --git a/Cargo.toml b/Cargo.toml index d490a5e1..75292176 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,7 +32,6 @@ iddqd = { path = "crates/iddqd", default-features = false } iddqd-test-utils = { path = "crates/iddqd-test-utils" } proptest = { version = "1.7.0", default-features = false, features = ["std"] } ref-cast = "1.0.24" -rustc-hash = { version = "2.1.1", default-features = false } schemars = "0.8.22" serde = "1.0.223" serde_core = "1.0.223" diff --git a/crates/iddqd/Cargo.toml b/crates/iddqd/Cargo.toml index 9a4ae046..66a26f2b 100644 --- a/crates/iddqd/Cargo.toml +++ b/crates/iddqd/Cargo.toml @@ -29,7 +29,6 @@ equivalent.workspace = true foldhash = { workspace = true, optional = true } hashbrown.workspace = true ref-cast = { workspace = true, optional = true } -rustc-hash.workspace = true schemars = { workspace = true, optional = true } serde_core = { workspace = true, optional = true } serde_json = { workspace = true, optional = true } @@ -50,7 +49,7 @@ default-hasher = ["dep:foldhash", "iddqd-test-utils/default-hasher"] proptest = ["dep:proptest"] schemars08 = ["dep:schemars", "dep:serde_json", "serde"] serde = ["dep:serde_core", "iddqd-test-utils/serde"] -std = ["dep:foldhash", "iddqd-test-utils/std", "rustc-hash/std"] +std = ["dep:foldhash", "iddqd-test-utils/std"] # Internal-only feature for testing that schemars/preserve_order works. internal-schemars08-preserve-order = ["schemars08", "schemars/preserve_order"] diff --git a/crates/iddqd/src/bi_hash_map/imp.rs b/crates/iddqd/src/bi_hash_map/imp.rs index 69976b66..77e1e4b3 100644 --- a/crates/iddqd/src/bi_hash_map/imp.rs +++ b/crates/iddqd/src/bi_hash_map/imp.rs @@ -812,9 +812,7 @@ impl BiHashMap { &mut self, additional: usize, ) -> Result<(), crate::errors::TryReserveError> { - self.items - .try_reserve(additional) - .map_err(crate::errors::TryReserveError::from_hashbrown)?; + self.items.try_reserve(additional)?; self.tables .k1_to_item .try_reserve(additional) @@ -863,9 +861,19 @@ impl BiHashMap { /// # } /// ``` pub fn shrink_to_fit(&mut self) { - self.items.shrink_to_fit(); - self.tables.k1_to_item.shrink_to_fit(); - self.tables.k2_to_item.shrink_to_fit(); + let remap = self.items.shrink_to_fit(); + if !remap.is_empty() { + self.tables.k1_to_item.remap_indexes(&remap); + self.tables.k2_to_item.remap_indexes(&remap); + } + let items = &self.items; + let state = &self.tables.state; + self.tables + .k1_to_item + .shrink_to_fit(|ix| state.hash_one(items[*ix].key1())); + self.tables + .k2_to_item + .shrink_to_fit(|ix| state.hash_one(items[*ix].key2())); } /// Shrinks the capacity of the map with a lower limit. It will drop @@ -910,9 +918,19 @@ impl BiHashMap { /// # } /// ``` pub fn shrink_to(&mut self, min_capacity: usize) { - self.items.shrink_to(min_capacity); - self.tables.k1_to_item.shrink_to(min_capacity); - self.tables.k2_to_item.shrink_to(min_capacity); + let remap = self.items.shrink_to(min_capacity); + if !remap.is_empty() { + self.tables.k1_to_item.remap_indexes(&remap); + self.tables.k2_to_item.remap_indexes(&remap); + } + let items = &self.items; + let state = &self.tables.state; + self.tables + .k1_to_item + .shrink_to(min_capacity, |ix| state.hash_one(items[*ix].key1())); + self.tables + .k2_to_item + .shrink_to(min_capacity, |ix| state.hash_one(items[*ix].key2())); } /// Returns an iterator over all items in the map. @@ -1031,7 +1049,7 @@ impl BiHashMap { self.tables.validate(self.len(), compactness)?; // Check that the indexes are all correct. - for (&ix, item) in self.items.iter() { + for (ix, item) in self.items.iter() { let key1 = item.key1(); let key2 = item.key2(); diff --git a/crates/iddqd/src/bi_hash_map/iter.rs b/crates/iddqd/src/bi_hash_map/iter.rs index 9d80090d..d077c6ed 100644 --- a/crates/iddqd/src/bi_hash_map/iter.rs +++ b/crates/iddqd/src/bi_hash_map/iter.rs @@ -2,12 +2,11 @@ use super::{RefMut, tables::BiHashMapTables}; use crate::{ BiHashItem, DefaultHashBuilder, support::{ - alloc::{AllocWrapper, Allocator, Global}, - item_set::ItemSet, + alloc::{Allocator, Global}, + item_set::{self, ItemSet}, }, }; use core::{hash::BuildHasher, iter::FusedIterator}; -use hashbrown::hash_map; /// An iterator over the elements of a [`BiHashMap`] by shared reference. /// Created by [`BiHashMap::iter`]. @@ -20,7 +19,7 @@ use hashbrown::hash_map; /// [`HashMap`]: std::collections::HashMap #[derive(Clone, Debug, Default)] pub struct Iter<'a, T: BiHashItem> { - inner: hash_map::Values<'a, usize, T>, + inner: item_set::Values<'a, T>, } impl<'a, T: BiHashItem> Iter<'a, T> { @@ -45,7 +44,6 @@ impl ExactSizeIterator for Iter<'_, T> { } } -// hash_map::Iter is a FusedIterator, so Iter is as well. impl FusedIterator for Iter<'_, T> {} /// An iterator over the elements of a [`BiHashMap`] by mutable reference. @@ -67,7 +65,7 @@ pub struct IterMut< A: Allocator = Global, > { tables: &'a BiHashMapTables, - inner: hash_map::ValuesMut<'a, usize, T>, + inner: item_set::ValuesMut<'a, T>, } impl<'a, T: BiHashItem, S: Clone + BuildHasher, A: Allocator> @@ -103,7 +101,6 @@ impl ExactSizeIterator } } -// hash_map::IterMut is a FusedIterator, so IterMut is as well. impl FusedIterator for IterMut<'_, T, S, A> { @@ -120,7 +117,7 @@ impl FusedIterator /// [`HashMap`]: std::collections::HashMap #[derive(Debug)] pub struct IntoIter { - inner: hash_map::IntoValues>, + inner: item_set::IntoValues, } impl IntoIter { @@ -145,5 +142,4 @@ impl ExactSizeIterator for IntoIter { } } -// hash_map::IterMut is a FusedIterator, so IterMut is as well. impl FusedIterator for IntoIter {} diff --git a/crates/iddqd/src/errors.rs b/crates/iddqd/src/errors.rs index aa254702..ef001001 100644 --- a/crates/iddqd/src/errors.rs +++ b/crates/iddqd/src/errors.rs @@ -72,7 +72,7 @@ pub struct TryReserveError { } #[derive(Clone, PartialEq, Eq, Debug)] -enum TryReserveErrorKind { +pub(crate) enum TryReserveErrorKind { /// Error due to the computed capacity exceeding the collection's maximum /// (usually `isize::MAX` bytes). CapacityOverflow, @@ -97,6 +97,29 @@ impl TryReserveError { }; Self { kind } } + + /// Converts from an `allocator_api2` `TryReserveError`. + pub(crate) fn from_allocator_api2( + error: allocator_api2::collections::TryReserveError, + ) -> Self { + use allocator_api2::collections::TryReserveErrorKind as Kind; + let kind = match error.kind() { + Kind::CapacityOverflow => TryReserveErrorKind::CapacityOverflow, + Kind::AllocError { layout, .. } => { + TryReserveErrorKind::AllocError { layout } + } + }; + Self { kind } + } + + #[doc(hidden)] + pub(crate) fn __from_kind(kind: TryReserveErrorKind) -> Self { + Self { kind } + } + + pub(crate) fn kind(&self) -> &TryReserveErrorKind { + &self.kind + } } impl fmt::Display for TryReserveError { diff --git a/crates/iddqd/src/id_hash_map/imp.rs b/crates/iddqd/src/id_hash_map/imp.rs index 2efdcdf4..638c0bd8 100644 --- a/crates/iddqd/src/id_hash_map/imp.rs +++ b/crates/iddqd/src/id_hash_map/imp.rs @@ -690,9 +690,7 @@ impl IdHashMap { &mut self, additional: usize, ) -> Result<(), crate::errors::TryReserveError> { - self.items - .try_reserve(additional) - .map_err(crate::errors::TryReserveError::from_hashbrown)?; + self.items.try_reserve(additional)?; self.tables .key_to_item .try_reserve(additional) @@ -733,8 +731,15 @@ impl IdHashMap { /// # } /// ``` pub fn shrink_to_fit(&mut self) { - self.items.shrink_to_fit(); - self.tables.key_to_item.shrink_to_fit(); + let remap = self.items.shrink_to_fit(); + if !remap.is_empty() { + self.tables.key_to_item.remap_indexes(&remap); + } + let items = &self.items; + let state = &self.tables.state; + self.tables + .key_to_item + .shrink_to_fit(|ix| state.hash_one(items[*ix].key())); } /// Shrinks the capacity of the map with a lower limit. It will drop @@ -775,8 +780,15 @@ impl IdHashMap { /// # } /// ``` pub fn shrink_to(&mut self, min_capacity: usize) { - self.items.shrink_to(min_capacity); - self.tables.key_to_item.shrink_to(min_capacity); + let remap = self.items.shrink_to(min_capacity); + if !remap.is_empty() { + self.tables.key_to_item.remap_indexes(&remap); + } + let items = &self.items; + let state = &self.tables.state; + self.tables + .key_to_item + .shrink_to(min_capacity, |ix| state.hash_one(items[*ix].key())); } /// Iterates over the items in the map. @@ -880,7 +892,7 @@ impl IdHashMap { self.tables.validate(self.len(), compactness)?; // Check that the indexes are all correct. - for (&ix, item) in self.items.iter() { + for (ix, item) in self.items.iter() { let key = item.key(); let Some(ix1) = self.find_index(&key) else { return Err(ValidationError::general(format!( diff --git a/crates/iddqd/src/id_hash_map/iter.rs b/crates/iddqd/src/id_hash_map/iter.rs index d785ab6b..d77b513d 100644 --- a/crates/iddqd/src/id_hash_map/iter.rs +++ b/crates/iddqd/src/id_hash_map/iter.rs @@ -2,12 +2,11 @@ use super::{RefMut, tables::IdHashMapTables}; use crate::{ DefaultHashBuilder, IdHashItem, support::{ - alloc::{AllocWrapper, Allocator, Global}, - item_set::ItemSet, + alloc::{Allocator, Global}, + item_set::{self, ItemSet}, }, }; use core::{hash::BuildHasher, iter::FusedIterator}; -use hashbrown::hash_map; /// An iterator over the elements of a [`IdHashMap`] by shared reference. /// Created by [`IdHashMap::iter`]. @@ -20,7 +19,7 @@ use hashbrown::hash_map; /// [`HashMap`]: std::collections::HashMap #[derive(Clone, Debug, Default)] pub struct Iter<'a, T: IdHashItem> { - inner: hash_map::Values<'a, usize, T>, + inner: item_set::Values<'a, T>, } impl<'a, T: IdHashItem> Iter<'a, T> { @@ -45,7 +44,6 @@ impl ExactSizeIterator for Iter<'_, T> { } } -// hash_map::Iter is a FusedIterator, so Iter is as well. impl FusedIterator for Iter<'_, T> {} /// An iterator over the elements of a [`IdHashMap`] by mutable reference. @@ -67,7 +65,7 @@ pub struct IterMut< A: Allocator = Global, > { tables: &'a IdHashMapTables, - inner: hash_map::ValuesMut<'a, usize, T>, + inner: item_set::ValuesMut<'a, T>, } impl<'a, T: IdHashItem, S: BuildHasher, A: Allocator> IterMut<'a, T, S, A> { @@ -101,7 +99,6 @@ impl ExactSizeIterator } } -// hash_map::IterMut is a FusedIterator, so IterMut is as well. impl FusedIterator for IterMut<'_, T, S, A> { @@ -118,7 +115,7 @@ impl FusedIterator /// [`HashMap`]: std::collections::HashMap #[derive(Debug)] pub struct IntoIter { - inner: hash_map::IntoValues>, + inner: item_set::IntoValues, } impl IntoIter { @@ -143,5 +140,4 @@ impl ExactSizeIterator for IntoIter { } } -// hash_map::IterMut is a FusedIterator, so IterMut is as well. impl FusedIterator for IntoIter {} diff --git a/crates/iddqd/src/id_ord_map/imp.rs b/crates/iddqd/src/id_ord_map/imp.rs index dd400481..1e0b4186 100644 --- a/crates/iddqd/src/id_ord_map/imp.rs +++ b/crates/iddqd/src/id_ord_map/imp.rs @@ -431,7 +431,11 @@ impl IdOrdMap { /// assert!(map.capacity() >= 2); /// ``` pub fn shrink_to_fit(&mut self) { - self.items.shrink_to_fit(); + let remap = self.items.shrink_to_fit(); + if !remap.is_empty() { + let items = &self.items; + self.tables.key_to_item.remap_indexes(&remap, |ix| items[ix].key()); + } } /// Shrinks the capacity of the map with a lower limit. It will drop @@ -474,7 +478,11 @@ impl IdOrdMap { /// assert!(map.capacity() >= 2); /// ``` pub fn shrink_to(&mut self, min_capacity: usize) { - self.items.shrink_to(min_capacity); + let remap = self.items.shrink_to(min_capacity); + if !remap.is_empty() { + let items = &self.items; + self.tables.key_to_item.remap_indexes(&remap, |ix| items[ix].key()); + } } /// Iterates over the items in the map. @@ -591,7 +599,7 @@ impl IdOrdMap { // Check that the indexes are all correct. - for (&ix, item) in self.items.iter() { + for (ix, item) in self.items.iter() { let key = item.key(); let ix1 = match chaos { ValidateChaos::Yes => { @@ -1364,7 +1372,7 @@ impl IdOrdMap { Q: ?Sized + Ord + Equivalent>, { self.items.iter().find_map(|(index, item)| { - (k.equivalent(&item.key())).then_some(*index) + (k.equivalent(&item.key())).then_some(index) }) } diff --git a/crates/iddqd/src/id_ord_map/iter.rs b/crates/iddqd/src/id_ord_map/iter.rs index 3524e749..4fbfbc7e 100644 --- a/crates/iddqd/src/id_ord_map/iter.rs +++ b/crates/iddqd/src/id_ord_map/iter.rs @@ -1,8 +1,11 @@ use super::{IdOrdItem, RefMut, tables::IdOrdMapTables}; use crate::support::{ - alloc::Global, borrow::DormantMutRef, btree_table, item_set::ItemSet, + alloc::Global, + borrow::DormantMutRef, + btree_table, + item_set::{ConsumingItemSet, ItemSet}, }; -use core::{hash::Hash, iter::FusedIterator}; +use core::{hash::Hash, iter::FusedIterator, marker::PhantomData}; /// An iterator over the elements of an [`IdOrdMap`] by shared reference. /// @@ -45,6 +48,39 @@ impl ExactSizeIterator for Iter<'_, T> { // btree_set::Iter is a FusedIterator, so Iter is as well. impl FusedIterator for Iter<'_, T> {} +// A raw pointer into an `ItemSet`'s slot buffer with the same +// thread-safety posture as an `&'a mut ItemSet`. +// +// We use a raw pointer rather than a reference inside `IterMut` to +// avoid reborrow invalidation under Tree Borrows — each iteration +// reborrowing `&mut self.items` would invalidate previously yielded +// `&mut T` children. Wrapping the raw pointer in a dedicated struct +// (instead of a bare field + manual `Send`/`Sync` on `IterMut`) lets +// the compiler auto-derive `IterMut`'s auto traits from the +// combination of *all* its fields, so if a future `IdOrdMapTables` or +// `btree_table::Iter` field becomes non-`Send` / non-`Sync`, +// `IterMut` follows automatically. +struct ItemSetPtr<'a, T: IdOrdItem> { + ptr: *mut Option, + // Borrow the ItemSet for `'a` so the raw pointer stays live, and + // so variance / drop-check mirror `&'a mut ItemSet`. + _marker: PhantomData<&'a mut ItemSet>, +} + +impl core::fmt::Debug for ItemSetPtr<'_, T> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("ItemSetPtr").field("ptr", &self.ptr).finish() + } +} + +// SAFETY: `ItemSetPtr<'a, T>` has the same thread-safety semantics as +// `&'a mut ItemSet`, which is `Send`/`Sync` iff +// `ItemSet` is, which reduces to `T: Send` / `T: Sync` +// (since `Global: Send + Sync` unconditionally). +unsafe impl<'a, T: IdOrdItem + Send> Send for ItemSetPtr<'a, T> {} +// SAFETY: see the `Send` impl above. +unsafe impl<'a, T: IdOrdItem + Sync> Sync for ItemSetPtr<'a, T> {} + /// An iterator over the elements of a [`IdOrdMap`] by mutable reference. /// /// This iterator returns [`RefMut`] instances. @@ -58,7 +94,7 @@ pub struct IterMut<'a, T: IdOrdItem> where T::Key<'a>: Hash, { - items: &'a mut ItemSet, + items: ItemSetPtr<'a, T>, tables: &'a IdOrdMapTables, iter: btree_table::Iter<'a>, } @@ -71,10 +107,21 @@ where items: &'a mut ItemSet, tables: &'a IdOrdMapTables, ) -> Self { - Self { items, tables, iter: tables.key_to_item.iter() } + Self { + items: ItemSetPtr { ptr: items.as_mut_ptr(), _marker: PhantomData }, + tables, + iter: tables.key_to_item.iter(), + } } } +// `Send` / `Sync` are auto-derived: `ItemSetPtr<'a, T>` contributes the +// `T: Send`/`Sync` bound that `&'a mut ItemSet` would, +// `&'a IdOrdMapTables` contributes `IdOrdMapTables: Sync`, and +// `btree_table::Iter<'a>` contributes whatever its fields require. +// If any of those ever loses `Send`/`Sync`, `IterMut` follows without +// a silent manual impl masking the change. + impl<'a, T: IdOrdItem + 'a> Iterator for IterMut<'a, T> where T::Key<'a>: Hash, @@ -85,34 +132,21 @@ where fn next(&mut self) -> Option { let index = self.iter.next()?; - let item = &mut self.items[index]; - - // SAFETY: This lifetime extension from self to 'a is safe based on two - // things: - // - // 1. We never repeat indexes, i.e. for an index i, once we've handed - // out an item at i, creating `&mut T`, we'll never get the index i - // again. (This is guaranteed from the set-based nature of the - // iterator.) This means that we don't ever create a mutable alias to - // the same memory. - // - // In particular, unlike all the other places we look up data from a - // btree table, we don't pass a lookup function into - // self.iter.next(). If we did, then it is possible the lookup - // function would have been called with an old index i. But we don't - // need to do that. - // - // 2. All mutable references to data within self.items are derived from - // self.items. So, the rule described at [1] is upheld: - // - // > When creating a mutable reference, then while this reference - // > exists, the memory it points to must not get accessed (read or - // > written) through any other pointer or reference not derived from - // > this reference. - // - // [1]: - // https://doc.rust-lang.org/std/ptr/index.html#pointer-to-reference-conversion - let item = unsafe { core::mem::transmute::<&mut T, &'a mut T>(item) }; + // SAFETY: the btree only stores indexes that currently point at + // `Some` slots in the backing `ItemSet` (upheld by every + // btree-mutating call site in `id_ord_map`), so + // `items.ptr.add(index)` is in-bounds and the slot is + // initialized. The btree is a set, so each call to + // `self.iter.next()` yields a distinct `index`: the `&mut T`s + // handed out across iterations target disjoint memory and never + // alias. Since we never reborrow `&mut ItemSet` between + // iterations, no ancestor reborrow invalidates + // previously-yielded references. + let item: &'a mut T = unsafe { + (*self.items.ptr.add(index)) + .as_mut() + .expect("btree index points at a Some slot in ItemSet") + }; let (hash, dormant) = { let (item, dormant) = DormantMutRef::new(item); @@ -120,8 +154,8 @@ where (hash, dormant) }; - // SAFETY: item is dropped above, and self is no longer used after this - // point. + // SAFETY: item is dropped above, and self is no longer used + // after this point. let item = unsafe { dormant.awaken() }; Some(RefMut::new(self.tables.state().clone(), hash, item)) @@ -138,7 +172,6 @@ where } } -// hash_map::IterMut is a FusedIterator, so IterMut is as well. impl<'a, T: IdOrdItem + 'a> FusedIterator for IterMut<'a, T> where T::Key<'a>: Hash { @@ -152,7 +185,7 @@ impl<'a, T: IdOrdItem + 'a> FusedIterator for IterMut<'a, T> where /// [`IdOrdMap::into_iter`]: crate::IdOrdMap::into_iter #[derive(Debug)] pub struct IntoIter { - items: ItemSet, + items: ConsumingItemSet, iter: btree_table::IntoIter, } @@ -161,7 +194,10 @@ impl IntoIter { items: ItemSet, tables: IdOrdMapTables, ) -> Self { - Self { items, iter: tables.key_to_item.into_iter() } + Self { + items: items.into_consuming(), + iter: tables.key_to_item.into_iter(), + } } } @@ -171,9 +207,13 @@ impl Iterator for IntoIter { #[inline] fn next(&mut self) -> Option { let index = self.iter.next()?; + // We own `self.items` and the btree's indexes are never revisited, + // so take directly from the consuming view (O(1), no free-list + // allocation) rather than `ItemSet::remove`, which would push to + // the free list per call. let next = self .items - .remove(index) + .take(index) .unwrap_or_else(|| panic!("index {index} not found in items")); Some(next) } diff --git a/crates/iddqd/src/support/btree_table.rs b/crates/iddqd/src/support/btree_table.rs index 4c4ac1e4..a6059022 100644 --- a/crates/iddqd/src/support/btree_table.rs +++ b/crates/iddqd/src/support/btree_table.rs @@ -4,7 +4,7 @@ //! integers (that are indexes corresponding to items), but use an external //! comparator. -use super::map_hash::MapHash; +use super::{item_set::IndexRemap, map_hash::MapHash}; use crate::internal::{TableValidationError, ValidateCompact}; use alloc::{ collections::{BTreeSet, btree_set}, @@ -233,6 +233,38 @@ impl MapBTreeTable { self.items.retain(|index| f(index.0)); } + /// Rewrites every stored index via `remap`. + /// + /// Called after [`ItemSet::shrink_to_fit`] / + /// [`ItemSet::shrink_to`] compacts the backing items buffer. std's + /// `BTreeSet` does not permit in-place key mutation, so we drain + /// the old set and rebuild. Because the remap is monotonically + /// non-decreasing in input order and preserves the relative order + /// of live items, the drained sequence is already in comparator + /// order for the rebuilt set. + /// + /// `lookup` must resolve *new* (post-compaction) indexes back to + /// their keys — callers arrange for items to be compacted before + /// this is invoked. + /// + /// [`ItemSet::shrink_to_fit`]: super::item_set::ItemSet::shrink_to_fit + /// [`ItemSet::shrink_to`]: super::item_set::ItemSet::shrink_to + pub(crate) fn remap_indexes(&mut self, remap: &IndexRemap, lookup: F) + where + K: Ord, + F: Fn(usize) -> K, + { + let old = core::mem::take(&mut self.items); + + let f = remap_cmp(lookup); + let guard = CmpDropGuard::new(&f); + for old_idx in old { + let new_idx = remap.remap(old_idx.0); + self.items.insert(Index::new(new_idx)); + } + drop(guard); + } + /// Clears the B-tree table, removing all items. #[inline] pub(crate) fn clear(&mut self) { @@ -325,6 +357,26 @@ where } } +fn remap_cmp<'a, K, F>(lookup: F) -> impl Fn(Index, Index) -> Ordering + 'a +where + F: 'a + Fn(usize) -> K, + K: Ord, +{ + move |a: Index, b: Index| { + if a.0 == b.0 { + return Ordering::Equal; + } + // Remap rebuild inserts only real (post-compaction) indexes — + // the sentinel is a lookup-time artifact and must not appear + // here. + debug_assert!( + a.0 != Index::SENTINEL_VALUE && b.0 != Index::SENTINEL_VALUE, + "sentinel value should not appear during remap" + ); + lookup(a.0).cmp(&lookup(b.0)) + } +} + fn insert_cmp<'a, K, Q, F>( index: usize, key: &'a Q, diff --git a/crates/iddqd/src/support/free_list.rs b/crates/iddqd/src/support/free_list.rs new file mode 100644 index 00000000..765ab8a2 --- /dev/null +++ b/crates/iddqd/src/support/free_list.rs @@ -0,0 +1,348 @@ +//! A lazily-allocated free list of `usize` indexes. +//! +//! See [`FreeList`] for the full picture. This type is an internal +//! primitive used by [`ItemSet`](super::item_set::ItemSet); it is kept +//! in its own module so the unsafe surface can be audited independently +//! of the surrounding collection logic. + +use crate::errors::{TryReserveError, TryReserveErrorKind}; +use allocator_api2::alloc::{Allocator, Layout}; +use core::{ + marker::PhantomData, + mem::{align_of, size_of}, + ptr::NonNull, +}; + +// `FreeListHeader::data_ptr` relies on the inline `usize` slots sitting +// flush against the header, i.e. no padding between them. That holds iff +// `size_of::()` is a multiple of `align_of::()`, +// which is true for every target that has `#[repr(C)] struct { usize, +// usize }` layout. +// +// Assert this at compile time so a future platform or layout change can't +// silently break `data_ptr`. +const _: () = assert!( + size_of::() % align_of::() == 0, + "FreeListHeader layout breaks the data_ptr invariant; \ + use the offset returned by `FreeListHeader::layout_for` instead", +); + +/// A free list of `usize` indexes, lazily allocated. +/// +/// The field is a single nullable pointer (`None` when no allocation has +/// happened yet), so it is one word on the stack. When an allocation is +/// present, it is laid out as a [`FreeListHeader`] followed inline by `cap` +/// `usize` slots. +/// +/// The allocator is passed in explicitly every time the list is mutated. This +/// allows us to not require that the allocator be `Clone`, and in case of a +/// non-ZST allocator like bumpalo, it means that we don't have to store a +/// second copy of it. The caller is responsible for passing in the same +/// allocator on every call (this is documented as a safety requirement). +pub(crate) struct FreeList { + ptr: Option>, + // Propagate auto traits as if we held an `A`. + _marker: PhantomData, +} + +/// Header at the start of the free-list allocation. +/// +/// Indexes live inline after this struct. This is opaque to callers outside +/// this module — it's `pub(crate)` only so [`FreeList::try_reserve_total`] can +/// mention `NonNull` in its signature. +#[repr(C)] +pub(crate) struct FreeListHeader { + len: usize, + cap: usize, +} + +impl FreeListHeader { + /// Returns the allocation layout for a header followed by `cap` inline + /// `usize` slots, plus the byte offset at which the slots begin. + #[inline] + fn layout_for(cap: usize) -> (Layout, usize) { + Self::layout_for_checked(cap) + .expect("free-list layout did not overflow") + } + + /// Fallible variant of [`layout_for`] that surfaces capacity / + /// layout overflow as `TryReserveError::CapacityOverflow` rather + /// than panicking. + #[inline] + fn layout_for_checked( + cap: usize, + ) -> Result<(Layout, usize), TryReserveError> { + let overflow = || { + TryReserveError::__from_kind(TryReserveErrorKind::CapacityOverflow) + }; + let header = Layout::new::(); + let data = Layout::array::(cap).map_err(|_| overflow())?; + let (combined, offset) = header.extend(data).map_err(|_| overflow())?; + Ok((combined.pad_to_align(), offset)) + } + + /// Pointer to the inline slot array. + /// + /// # Safety + /// + /// `ptr` must have been produced by an allocation using + /// [`Self::layout_for`]. + #[inline] + unsafe fn data_ptr(ptr: NonNull) -> *mut usize { + // For `{usize, usize}`, `size_of::()` is already + // a multiple of `align_of::()`, so the first slot sits + // flush against the header. + (ptr.as_ptr() as *mut u8).add(size_of::()) as *mut usize + } +} + +impl FreeList { + #[inline] + pub(crate) const fn new() -> Self { + Self { ptr: None, _marker: PhantomData } + } + + #[inline] + pub(crate) fn as_slice(&self) -> &[usize] { + match self.ptr { + Some(ptr) => { + // SAFETY: `ptr` was allocated by `ensure_capacity`, so the + // header is initialized and the inline slots up to `len` + // are initialized. + unsafe { + let header = ptr.as_ref(); + let data = FreeListHeader::data_ptr(ptr); + core::slice::from_raw_parts(data, header.len) + } + } + None => &[], + } + } + + #[inline] + pub(crate) fn last(&self) -> Option { + self.as_slice().last().copied() + } + + #[inline] + pub(crate) fn pop(&mut self) -> Option { + let mut ptr = self.ptr?; + // SAFETY: `ptr` is live (since self.ptr was Some) and we have + // unique access via `&mut self`. + // + // There is a subtle detail here: header (a &mut FreeListHeader) is + // alive until the end of the function. The scope of the mutable borrow + // is the header. value points to data after the header, so there's no + // aliasing of mutable data. + let header = unsafe { ptr.as_mut() }; + if header.len == 0 { + return None; + } + header.len -= 1; + // SAFETY: `header.len` was just decremented to an in-bounds + // index, and that slot was initialized when pushed. + let value = + unsafe { FreeListHeader::data_ptr(ptr).add(header.len).read() }; + Some(value) + } + + /// Pushes `value`, growing the backing allocation if needed. + /// + /// # Safety + /// + /// `alloc` must be the same allocator (or a functionally equivalent + /// handle) that was used for every prior mutation of this free list. + #[inline] + pub(crate) unsafe fn push( + &mut self, + value: usize, + alloc: &T, + ) { + // SAFETY: additional (= 1) is greater than 0; the allocator contract is + // forwarded. + let mut ptr = unsafe { self.ensure_capacity(1, alloc) }; + // SAFETY: `ptr` is the header returned by `ensure_capacity`, + // which guarantees room for at least `header.len + 1` slots. + unsafe { + let header = ptr.as_mut(); + FreeListHeader::data_ptr(ptr).add(header.len).write(value); + header.len += 1; + } + } + + /// Zeros the stored length without deallocating. Preserves capacity + /// so a prior `try_reserve_total` reservation survives. + #[inline] + pub(crate) fn clear(&mut self) { + if let Some(mut ptr) = self.ptr { + // SAFETY: `ptr` is live and we hold `&mut self`, so no other + // reference into the header exists. + unsafe { ptr.as_mut().len = 0 }; + } + } + + /// Deallocates any backing storage. + pub(crate) fn deallocate(&mut self, alloc: &T) { + let Some(ptr) = self.ptr.take() else { return }; + // SAFETY: this block was allocated via `alloc` (by the contract of + // `push`) using a layout computed from the recorded capacity, so + // it is sound to deallocate with the same allocator and layout. + unsafe { + let cap = ptr.as_ref().cap; + let (layout, _) = FreeListHeader::layout_for(cap); + alloc.deallocate(ptr.cast::(), layout); + } + } + + /// Current allocated capacity. Zero if the free list has never + /// allocated. + #[inline] + pub(crate) fn capacity(&self) -> usize { + match self.ptr { + // SAFETY: `ptr` is live; the header was initialized at + // allocation time and only mutated through `&mut self`. + Some(ptr) => unsafe { ptr.as_ref().cap }, + None => 0, + } + } + + /// Fallibly grows capacity so that `self.capacity() >= total`, returning a + /// pointer to the (possibly-reallocated) header. + /// + /// Growth is amortized: on a reallocation, the new capacity is the maximum + /// of `total`, twice the current capacity, and 4 (the floor for a first + /// allocation). + /// + /// Used by + /// [`ItemSet::try_reserve`](super::item_set::ItemSet::try_reserve) + /// to front-load the allocation that + /// [`ItemSet::remove`](super::item_set::ItemSet::remove) would + /// otherwise do lazily. The infallible variant + /// [`ensure_capacity`](Self::ensure_capacity) is a thin wrapper + /// that translates allocator failure into a panic, for the + /// insert/clone/remove call sites whose callers already accept + /// OOM abort. + /// + /// # Safety + /// + /// - The caller must pass the same allocator on every call for a + /// given `FreeList`. + /// - `total` must be nonzero. Under that contract the returned + /// pointer is always live. + pub(crate) unsafe fn try_reserve_total( + &mut self, + total: usize, + alloc: &T, + ) -> Result, TryReserveError> { + let current_cap = self.capacity(); + if current_cap >= total { + // SAFETY: caller guarantees `total > 0`, so + // `current_cap >= total > 0` implies `self.ptr` is `Some`. + return Ok(unsafe { self.ptr.unwrap_unchecked() }); + } + // Amortized growth: at least double the existing capacity, + // with a floor of 4 on a first allocation. Matches + // `RawVec::grow_amortized`. + let new_cap = total.max(current_cap.saturating_mul(2)).max(4); + let (layout, _) = FreeListHeader::layout_for_checked(new_cap)?; + let new_mem = alloc.allocate(layout).map_err(|_| { + TryReserveError::__from_kind(TryReserveErrorKind::AllocError { + layout, + }) + })?; + let new_ptr = new_mem.cast::(); + // Copy existing contents (if any), then swap in the new block + // and free the old one. + // + // SAFETY: `new_ptr` points at a fresh allocation sized by + // `layout`, so writing the header is in-bounds. If there is an + // old allocation, its layout is recorded in `old_cap` (read + // before the deallocation) and it came from the same allocator + // by the contract of `try_reserve_total` / `ensure_capacity`. + unsafe { + match self.ptr { + Some(old_ptr) => { + let old_len; + let old_cap; + { + let header = old_ptr.as_ref(); + old_len = header.len; + old_cap = header.cap; + } + new_ptr + .as_ptr() + .write(FreeListHeader { len: old_len, cap: new_cap }); + let old_data = FreeListHeader::data_ptr(old_ptr); + let new_data = FreeListHeader::data_ptr(new_ptr); + core::ptr::copy_nonoverlapping(old_data, new_data, old_len); + let (old_layout, _) = FreeListHeader::layout_for(old_cap); + alloc.deallocate(old_ptr.cast::(), old_layout); + } + None => { + new_ptr + .as_ptr() + .write(FreeListHeader { len: 0, cap: new_cap }); + } + } + } + self.ptr = Some(new_ptr); + Ok(new_ptr) + } + + /// Length of the allocated contents. + #[inline] + pub(crate) fn len(&self) -> usize { + match self.ptr { + // SAFETY: `ptr` is live; the header was initialized at + // allocation time. + Some(ptr) => unsafe { ptr.as_ref().len }, + None => 0, + } + } + + /// Ensures there is room for at least `additional` more slots and + /// returns a pointer to the (possibly-reallocated) header. + /// + /// # Safety + /// + /// - The caller must pass the same allocator on every call for a + /// given `FreeList`. + /// - `additional` must be nonzero. Under that contract, the returned + /// pointer is always live. + unsafe fn ensure_capacity( + &mut self, + additional: usize, + alloc: &T, + ) -> NonNull { + let needed = self + .len() + .checked_add(additional) + .expect("free-list capacity did not overflow"); + // SAFETY: caller guarantees `additional > 0`, so `needed > 0` + // satisfies `try_reserve_total`'s nonzero precondition. + // Allocator contract forwarded. + match unsafe { self.try_reserve_total(needed, alloc) } { + Ok(ptr) => ptr, + Err(e) => match *e.kind() { + TryReserveErrorKind::AllocError { layout } => { + alloc::alloc::handle_alloc_error(layout) + } + TryReserveErrorKind::CapacityOverflow => { + panic!( + "free-list capacity overflow: needed={needed} \ + exceeds Layout::array:: bounds" + ) + } + }, + } + } +} + +// SAFETY: `FreeList` logically owns a `Box<[usize]>` plus the phantom +// allocator `A` that the enclosing `ItemSet` uses to allocate. The raw pointer +// disables the compiler's auto-trait derivation, so we re-state the conclusion +// the compiler would otherwise reach: `FreeList` is `Send` if `A` is `Send`. +unsafe impl Send for FreeList {} +// SAFETY: See the `Send` impl above. Similarly, `FreeList` is `Sync` if `A` +// is `Sync`. +unsafe impl Sync for FreeList {} diff --git a/crates/iddqd/src/support/hash_table.rs b/crates/iddqd/src/support/hash_table.rs index d9c87c6f..ffa9962b 100644 --- a/crates/iddqd/src/support/hash_table.rs +++ b/crates/iddqd/src/support/hash_table.rs @@ -2,6 +2,7 @@ use super::{ alloc::{AllocWrapper, Allocator}, + item_set::IndexRemap, map_hash::MapHash, }; use crate::internal::{TableValidationError, ValidateCompact}; @@ -178,16 +179,37 @@ impl MapHashTable { self.items.reserve(additional, |_| 0); } + /// Rewrites every stored index via `remap`. + /// + /// Called after [`ItemSet::shrink_to_fit`] / [`ItemSet::shrink_to`] + /// compacts the backing items buffer. We store hashes of *keys* (not of + /// indexes), so rewriting an index does not invalidate its hash and no + /// rehash is needed. + /// + /// [`ItemSet::shrink_to_fit`]: super::item_set::ItemSet::shrink_to_fit + /// [`ItemSet::shrink_to`]: super::item_set::ItemSet::shrink_to + pub(crate) fn remap_indexes(&mut self, remap: &IndexRemap) { + for slot in self.items.iter_mut() { + *slot = remap.remap(*slot); + } + } + /// Shrinks the capacity of the hash table as much as possible. #[inline] - pub(crate) fn shrink_to_fit(&mut self) { - self.items.shrink_to_fit(|_| 0); + pub(crate) fn shrink_to_fit(&mut self, hasher: impl Fn(&usize) -> u64) { + self.items.shrink_to_fit(hasher); } /// Shrinks the capacity of the hash table with a lower limit. + /// + /// See [`Self::shrink_to_fit`] for the contract `hasher` must satisfy. #[inline] - pub(crate) fn shrink_to(&mut self, min_capacity: usize) { - self.items.shrink_to(min_capacity, |_| 0); + pub(crate) fn shrink_to( + &mut self, + min_capacity: usize, + hasher: impl Fn(&usize) -> u64, + ) { + self.items.shrink_to(min_capacity, hasher); } /// Tries to reserve capacity for at least `additional` more items. diff --git a/crates/iddqd/src/support/item_set.rs b/crates/iddqd/src/support/item_set.rs index 8f804680..c663458b 100644 --- a/crates/iddqd/src/support/item_set.rs +++ b/crates/iddqd/src/support/item_set.rs @@ -1,32 +1,138 @@ -use super::alloc::AllocWrapper; +use super::{alloc::AllocWrapper, free_list::FreeList}; use crate::{ + errors::TryReserveError, internal::{ValidateCompact, ValidationError}, support::alloc::{Allocator, Global, global_alloc}, }; +use allocator_api2::vec::Vec; use core::{ fmt, + iter::FusedIterator, ops::{Index, IndexMut}, }; -use hashbrown::{HashMap, hash_map}; -use rustc_hash::FxBuildHasher; -/// A map of items stored by integer index. -#[derive(Clone)] +/// A remap from old (pre-compaction) to new (post-compaction) indexes. +/// +/// Produced by [`ItemSet::shrink_to_fit`] and [`ItemSet::shrink_to`], +/// consumed by the outer tables (hash / btree index tables) so they +/// can rewrite their stored indexes to point at the compacted `items` +/// buffer. +/// +/// `holes` is the sorted list of pre-compaction indexes that were +/// compacted away. Under the invariants of [`ItemSet`], the holes are +/// exactly the vacant slots that existed at the time of the shrink — +/// equivalently, the pre-shrink free list in sorted order. +#[derive(Clone, Debug)] +pub(crate) struct IndexRemap { + holes: alloc::vec::Vec, +} + +impl IndexRemap { + /// Returns `true` if no indexes were compacted away (i.e. the + /// shrink was a capacity-only operation and the outer tables do + /// not need rewriting). + #[inline] + pub(crate) fn is_empty(&self) -> bool { + self.holes.is_empty() + } + + /// Translates an old (pre-compaction) index into its new + /// (post-compaction) position. + /// + /// `old` must be an index that is still live after compaction — + /// callers walk the outer tables, whose entries always point at + /// live items, so they only ever pass live indexes here. + /// + /// Runs in `O(log k)` where `k = self.holes.len()`. + #[inline] + pub(crate) fn remap(&self, old: usize) -> usize { + // `partition_point` returns the count of holes strictly less + // than `old` — each of those holes shifted `old` down by one. + let shift = self.holes.partition_point(|&h| h < old); + debug_assert!( + self.holes.binary_search(&old).is_err(), + "IndexRemap::remap called on a compacted-away index {old}" + ); + old - shift + } +} + +/// A dense, index-keyed container for items. +/// +/// # Design +/// +/// Items live in `items: Vec>`, indexed directly without any +/// hashing. Slots freed by [`remove`](Self::remove) land on `free_list` +/// and are reused by the next +/// [`insert_at_next_index`](Self::insert_at_next_index), so a churn +/// workload stabilizes at the high-water mark of simultaneously-live +/// items rather than the cumulative insertion count. +/// +/// Using `Option` keeps the hot-path `get` a single cache-line access: +/// for any `T` whose layout has a niche (e.g. anything containing a +/// `Box`, `Vec`, `String`, `&T`, `NonZero*`, and so on), `Option` has +/// the same size as `T` and "is it present?" compiles to a null-pointer +/// test. +/// +/// The free list lives behind a single nullable pointer +/// ([`FreeList`]) which is lazily allocated on first use. That keeps +/// `ItemSet`'s own footprint to a single word beyond `items` and +/// avoids any heap traffic for build-and-read or grow-only maps. +/// +/// # Invariants +/// +/// 1. For every `i < items.len()`: `items[i]` is `Some` iff `i` is not +/// currently in the free list. +/// 2. The free list contains no duplicates and no out-of-bounds indexes. +/// +/// `items` is not eagerly compacted: a trailing remove leaves a `None` +/// slot (and the matching free-list entry) in place. Trailing vacancies +/// are only reclaimed by [`shrink_to_fit`](Self::shrink_to_fit) or +/// [`shrink_to`](Self::shrink_to). This keeps [`remove`](Self::remove) +/// uniform (a single `slot.take` + `free_list.push`) regardless of +/// position; the next [`insert_at_next_index`](Self::insert_at_next_index) +/// reuses the vacated slot via the free list at no memory cost. +/// +/// The live item count is derived — not stored — as +/// `items.len() - free_list.len()`. Under invariants 1–2 that equals +/// the number of `Some` entries in `items`. pub(crate) struct ItemSet { - // rustc-hash's FxHashMap is custom-designed for compact-ish integer keys. - items: HashMap>, - // The next index to use. This only ever goes up, not down. - // - // An alternative might be to use a free list of indexes, but that's - // unnecessarily complex. - next_index: usize, + items: Vec, AllocWrapper>, + free_list: FreeList, +} + +impl Drop for ItemSet { + fn drop(&mut self) { + // Deallocate the free list first, while we can still reach the + // allocator via `items`. `items` gets dropped automatically + // afterward. + self.free_list.deallocate(self.items.allocator()); + } +} + +impl Clone for ItemSet { + fn clone(&self) -> Self { + let mut new = + Self { items: self.items.clone(), free_list: FreeList::new() }; + for &idx in self.free_list.as_slice() { + // SAFETY: `new.free_list` is fresh, so this is the first + // push — the same-allocator contract is trivially + // satisfied on this call, and every subsequent site reuses + // `new.items.allocator()`. + unsafe { + new.free_list.push(idx, new.items.allocator()); + } + } + new + } } impl fmt::Debug for ItemSet { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("ItemSet") - .field("items", &self.items) - .field("next_index", &self.next_index) + .field("len", &self.len()) + .field("slots", &self.items.len()) + .field("free_list", &self.free_list.as_slice()) .finish() } } @@ -34,7 +140,10 @@ impl fmt::Debug for ItemSet { impl ItemSet { #[inline] pub(crate) const fn new() -> Self { - Self::new_in(global_alloc()) + Self { + items: Vec::new_in(AllocWrapper(global_alloc())), + free_list: FreeList::new(), + } } } @@ -42,19 +151,15 @@ impl ItemSet { #[inline] pub(crate) const fn new_in(alloc: A) -> Self { Self { - items: HashMap::with_hasher_in(FxBuildHasher, AllocWrapper(alloc)), - next_index: 0, + items: Vec::new_in(AllocWrapper(alloc)), + free_list: FreeList::new(), } } pub(crate) fn with_capacity_in(capacity: usize, alloc: A) -> Self { Self { - items: HashMap::with_capacity_and_hasher_in( - capacity, - Default::default(), - AllocWrapper(alloc), - ), - next_index: 0, + items: Vec::with_capacity_in(capacity, AllocWrapper(alloc)), + free_list: FreeList::new(), } } @@ -62,26 +167,64 @@ impl ItemSet { &self.items.allocator().0 } - /// Validates the item set. + /// Returns a raw pointer to the backing slot buffer. + /// + /// Intended for iterator types that need to hand out disjoint + /// `&mut T` across iterations without reborrowing `&mut ItemSet` + /// each time (which under Tree Borrows would invalidate previously + /// yielded references). + #[inline] + #[cfg_attr(not(feature = "std"), expect(dead_code))] + pub(crate) fn as_mut_ptr(&mut self) -> *mut Option { + self.items.as_mut_ptr() + } + pub(crate) fn validate( &self, compactness: ValidateCompact, ) -> Result<(), ValidationError> { - // If the map is expected to be compact, then ensure that all keys - // between 0 and next_index are present. + let some_count = self.items.iter().filter(|s| s.is_some()).count(); + let free = self.free_list.as_slice(); + if self.items.len() - some_count != free.len() { + return Err(ValidationError::General(format!( + "ItemSet free_list size ({}) inconsistent with vacant \ + slot count ({})", + free.len(), + self.items.len() - some_count, + ))); + } + for &idx in free { + if idx >= self.items.len() { + return Err(ValidationError::General(format!( + "ItemSet free_list has out-of-range index {idx}" + ))); + } + if self.items[idx].is_some() { + return Err(ValidationError::General(format!( + "ItemSet free_list has occupied index {idx}" + ))); + } + } + let mut sorted: alloc::vec::Vec = free.to_vec(); + sorted.sort_unstable(); + for pair in sorted.windows(2) { + if pair[0] == pair[1] { + return Err(ValidationError::General(format!( + "ItemSet free_list has duplicate index {}", + pair[0], + ))); + } + } match compactness { ValidateCompact::Compact => { - for i in 0..self.next_index { - if !self.items.contains_key(&i) { - return Err(ValidationError::General(format!( - "ItemSet is not compact: missing index {i}" - ))); - } + if !free.is_empty() { + return Err(ValidationError::General(format!( + "ItemSet is not compact: free_list has {} entries", + free.len(), + ))); } } - ValidateCompact::NonCompact => { - // No real checks can be done in this case. - } + ValidateCompact::NonCompact => {} } Ok(()) @@ -93,139 +236,304 @@ impl ItemSet { #[inline] pub(crate) fn is_empty(&self) -> bool { - self.items.is_empty() + // `items.len() == free_list.len()` iff every slot is vacant, + // which (under invariant 1) means there are no live items. + self.items.len() == self.free_list.len() } #[inline] pub(crate) fn len(&self) -> usize { - self.items.len() + self.items.len() - self.free_list.len() } #[inline] - pub(crate) fn iter(&self) -> hash_map::Iter<'_, usize, T> { - self.items.iter() + pub(crate) fn iter(&self) -> Iter<'_, T> { + Iter::new(self) } #[inline] #[expect(dead_code)] - pub(crate) fn iter_mut(&mut self) -> hash_map::IterMut<'_, usize, T> { - self.items.iter_mut() + pub(crate) fn iter_mut(&mut self) -> IterMut<'_, T> { + IterMut::new(self) } #[inline] - pub(crate) fn values(&self) -> hash_map::Values<'_, usize, T> { - self.items.values() + pub(crate) fn values(&self) -> Values<'_, T> { + Values::new(self) } #[inline] - pub(crate) fn values_mut(&mut self) -> hash_map::ValuesMut<'_, usize, T> { - self.items.values_mut() + pub(crate) fn values_mut(&mut self) -> ValuesMut<'_, T> { + ValuesMut::new(self) } #[inline] - pub(crate) fn into_values( - self, - ) -> hash_map::IntoValues> { - self.items.into_values() + pub(crate) fn into_values(self) -> IntoValues { + IntoValues::new(self) } #[inline] pub(crate) fn get(&self, index: usize) -> Option<&T> { - self.items.get(&index) + self.items.get(index).and_then(Option::as_ref) } #[inline] pub(crate) fn get_mut(&mut self, index: usize) -> Option<&mut T> { - self.items.get_mut(&index) + self.items.get_mut(index).and_then(Option::as_mut) } - #[inline] + /// Returns mutable references to up to `N` distinct indexes. + /// + /// Returns `None` for any index that is out of bounds, vacant, or + /// that duplicates an earlier index in the array. pub(crate) fn get_disjoint_mut( &mut self, indexes: [&usize; N], ) -> [Option<&mut T>; N] { - self.items.get_many_mut(indexes) + let len = self.items.len(); + let mut valid = [false; N]; + for i in 0..N { + let idx = *indexes[i]; + if idx >= len { + continue; + } + // SAFETY: idx < len, so `items[idx]` is in bounds. + if unsafe { self.items.get_unchecked(idx) }.is_none() { + continue; + } + let mut dup = false; + for j in 0..i { + if valid[j] && *indexes[j] == idx { + dup = true; + break; + } + } + if !dup { + valid[i] = true; + } + } + + let base = self.items.as_mut_ptr(); + core::array::from_fn(|i| { + if valid[i] { + let idx = *indexes[i]; + // SAFETY: we verified idx is in bounds, the slot is + // `Some`, and no earlier valid entry shares this index. + // Therefore the `&mut` references are disjoint. + unsafe { (*base.add(idx)).as_mut() } + } else { + None + } + }) } - // This is only used by IdOrdMap. + /// Returns the index that [`insert_at_next_index`] will use on its + /// next call. + /// + /// [`insert_at_next_index`]: Self::insert_at_next_index #[cfg_attr(not(feature = "std"), expect(dead_code))] #[inline] pub(crate) fn next_index(&self) -> usize { - self.next_index + // LIFO reuse: the most recently freed slot is likeliest to be + // cache-hot. + match self.free_list.last() { + Some(idx) => idx, + None => self.items.len(), + } } #[inline] pub(crate) fn insert_at_next_index(&mut self, value: T) -> usize { - let index = self.next_index; - self.items.insert(index, value); - self.next_index += 1; - index + if let Some(idx) = self.free_list.pop() { + debug_assert!(self.items[idx].is_none()); + self.items[idx] = Some(value); + idx + } else { + let idx = self.items.len(); + self.items.push(Some(value)); + idx + } } + /// Removes the item at `index`, if any. + /// + /// Pushes `index` onto the free list for reuse by the next + /// [`insert_at_next_index`](Self::insert_at_next_index). The push + /// may allocate if the free list hasn't been pre-sized; allocator + /// failure aborts via the global OOM handler, matching the standard + /// collections' infallible-allocation convention. Callers that need + /// a no-OOM guarantee should pre-size up front via + /// [`try_reserve`](Self::try_reserve). + /// + /// `items` is not truncated here, even for a trailing remove — the + /// vacated slot stays in place until reused by the next insert or + /// reclaimed by [`shrink_to_fit`](Self::shrink_to_fit). #[inline] pub(crate) fn remove(&mut self, index: usize) -> Option { - let entry = self.items.remove(&index); - if entry.is_some() && index == self.next_index - 1 { - // If we removed the last entry, decrement next_index. Not strictly - // necessary but a nice optimization. - // - // This does not guarantee compactness, since it's possible for the - // following set of operations to occur: - // - // 0. start at next_index = 0 - // 1. insert 0, next_index = 1 - // 2. insert 1, next_index = 2 - // 3. remove 0, next_index = 2 - // 4. remove 1, next_index = 1 (not 0, even though the map is empty) - // - // Compactness would require a heap acting as a free list. But that - // seems generally unnecessary. - self.next_index -= 1; + let slot = self.items.get_mut(index)?; + let value = slot.take()?; + // SAFETY: we're handing the same allocator that owns `items` to + // the free list, matching every other site that uses it. + unsafe { + self.free_list.push(index, self.items.allocator()); } - entry + Some(value) + } + + /// Consumes this set into an owned, invariant-free + /// [`ConsumingItemSet`]. + /// + /// Deallocates the free list (the consuming view has no use for it) + /// and hands over ownership of the items buffer. Used by + /// [`IntoValues`] and by `IdOrdMap`'s owning iterator, both of which + /// drain every live slot at most once and don't care about + /// reconstructing the set. + pub(crate) fn into_consuming(self) -> ConsumingItemSet { + // `ManuallyDrop` suppresses the `ItemSet` drop glue so we can + // disassemble the set field-by-field. + let mut set = core::mem::ManuallyDrop::new(self); + + // Deallocate the free list while `items` (and therefore its + // allocator) is still live. We resplit the borrows to satisfy + // the borrow checker: `free_list` wants `&mut`, `items` wants + // `&`. + { + let set = &mut *set; + set.free_list.deallocate(set.items.allocator()); + } + + // SAFETY: we own `set` by value and the `ManuallyDrop` wrapper prevents + // any automatic drop of its fields, so moving `items` out by + // `ptr::read` is sound. `set.items` is no longer accessed after this + // point. + let items = unsafe { core::ptr::read(&set.items) }; + ConsumingItemSet { items } } /// Clears the item set, removing all items. - #[inline] + /// + /// Preserves the capacity of both the items buffer and the free + /// list, matching the behavior of [`Vec::clear`]. A caller that + /// pre-sized via [`try_reserve`](Self::try_reserve) retains its + /// no-OOM guarantee across a `clear` and subsequent reuse. pub(crate) fn clear(&mut self) { self.items.clear(); - self.next_index = 0; + self.free_list.clear(); } - // This method assumes that value has the same ID. It also asserts that - // `index` is valid (and panics if it isn't). + // This method assumes that value has the same ID. It also asserts + // that `index` is valid (and panics if it isn't). #[inline] pub(crate) fn replace(&mut self, index: usize, value: T) -> T { - self.items - .insert(index, value) - .unwrap_or_else(|| panic!("EntrySet index not found: {index}")) + match self.items.get_mut(index) { + Some(slot @ Some(_)) => { + slot.replace(value).expect("slot was just checked to be Some") + } + _ => panic!("ItemSet index not found: {index}"), + } } - /// Reserves capacity for at least `additional` more items. #[inline] pub(crate) fn reserve(&mut self, additional: usize) { self.items.reserve(additional); } - /// Shrinks the capacity of the item set as much as possible. #[inline] - pub(crate) fn shrink_to_fit(&mut self) { + pub(crate) fn shrink_to_fit(&mut self) -> IndexRemap { + let remap = self.compact(); self.items.shrink_to_fit(); + remap } - /// Shrinks the capacity of the item set with a lower limit. #[inline] - pub(crate) fn shrink_to(&mut self, min_capacity: usize) { + pub(crate) fn shrink_to(&mut self, min_capacity: usize) -> IndexRemap { + let remap = self.compact(); self.items.shrink_to(min_capacity); + remap + } + + /// Moves every live slot down to fill `None` holes, truncates + /// `items` to its new length, and clears the free list. + /// + /// Returns an [`IndexRemap`] whose `holes` are the sorted list of + /// pre-compaction indexes whose items were shifted away. The remap + /// is empty iff no compaction happened (no holes existed). + /// + /// Items are moved via [`Vec::swap`], so no `T::drop` runs here — + /// the only `Option::::drop` calls are on the trailing `None`s + /// popped by `truncate`, and dropping `None` is a no-op. This + /// preserves the panic-safety invariant captured by + /// [`shrink_to_fit_does_not_drop_in_place`](tests::shrink_to_fit_does_not_drop_in_place). + fn compact(&mut self) -> IndexRemap { + // Snapshot the free list as a sorted `Vec` — these are + // the indexes that are about to be filled by the compaction + // below. We allocate unconditionally here rather than borrow + // from `self.free_list` because the free list needs to be + // cleared before we return, and the caller needs the sorted + // holes to outlive `self.free_list`'s state. + let mut holes: alloc::vec::Vec = + self.free_list.as_slice().to_vec(); + holes.sort_unstable(); + + // Two-pointer compaction: scan `items` forward; every `Some` + // slot gets swapped into the next write position. A `Some` + // that's already in the right place costs a single `is_some` + // check. + let mut write = 0; + for read in 0..self.items.len() { + if self.items[read].is_some() { + if write != read { + self.items.swap(write, read); + } + write += 1; + } + } + self.items.truncate(write); + self.free_list.clear(); + + IndexRemap { holes } } /// Tries to reserve capacity for at least `additional` more items. + /// + /// Reserves room in both the items buffer and the free list. + /// + /// After this call returns `Ok(())`, the next `additional` calls to + /// [`insert_at_next_index`](Self::insert_at_next_index) and any + /// interleaved [`remove`](Self::remove) calls are OOM-free. The + /// guarantee is reset by any of: + /// + /// * more than `additional` insertions past this point + /// (the items buffer grows through the infallible allocation + /// path); + /// * [`shrink_to_fit`](Self::shrink_to_fit) / + /// [`shrink_to`](Self::shrink_to_fit), which may release capacity + /// that the reservation was counting on. + /// + /// [`clear`](Self::clear) preserves the reservation. #[inline] pub(crate) fn try_reserve( &mut self, additional: usize, - ) -> Result<(), hashbrown::TryReserveError> { - self.items.try_reserve(additional) + ) -> Result<(), TryReserveError> { + self.items + .try_reserve(additional) + .map_err(TryReserveError::from_allocator_api2)?; + // Target free-list capacity: enough to hold the maximum possible + // vacancies after the caller inserts `additional` items. An + // upper bound is `self.items.len() + additional`. + let target = self.items.len().saturating_add(additional); + if target > 0 { + // SAFETY: `self.items.allocator()` is the allocator used for + // every prior free-list mutation, matching the contract; + // `target > 0` satisfies `try_reserve_total`'s nonzero + // precondition. + unsafe { + self.free_list + .try_reserve_total(target, self.items.allocator())?; + } + } + Ok(()) } } @@ -240,9 +548,9 @@ mod serde_impls { where S: Serializer, { - // Serialize just the items -- don't serialize the map keys. We'll - // rebuild the map keys on deserialization. - serializer.collect_seq(self.items.values()) + // Serialize just the items -- don't serialize the map keys. + // We'll rebuild the map keys on deserialization. + serializer.collect_seq(self.values()) } } } @@ -252,8 +560,7 @@ impl Index for ItemSet { #[inline] fn index(&self, index: usize) -> &Self::Output { - self.items - .get(&index) + self.get(index) .unwrap_or_else(|| panic!("ItemSet index not found: {index}")) } } @@ -261,8 +568,455 @@ impl Index for ItemSet { impl IndexMut for ItemSet { #[inline] fn index_mut(&mut self, index: usize) -> &mut Self::Output { - self.items - .get_mut(&index) + self.get_mut(index) .unwrap_or_else(|| panic!("ItemSet index not found: {index}")) } } + +// --- Iterators ---------------------------------------------------------- + +/// An iterator over `(index, &item)` pairs in an [`ItemSet`]. +pub(crate) struct Iter<'a, T> { + inner: core::iter::Enumerate>>, + remaining: usize, +} + +impl<'a, T> Iter<'a, T> { + fn new(set: &'a ItemSet) -> Self { + Self { inner: set.items.iter().enumerate(), remaining: set.len() } + } +} + +impl Clone for Iter<'_, T> { + fn clone(&self) -> Self { + Self { inner: self.inner.clone(), remaining: self.remaining } + } +} + +impl fmt::Debug for Iter<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Iter").field("remaining", &self.remaining).finish() + } +} + +impl Default for Iter<'_, T> { + fn default() -> Self { + let empty: &[Option] = &[]; + Self { inner: empty.iter().enumerate(), remaining: 0 } + } +} + +impl<'a, T> Iterator for Iter<'a, T> { + type Item = (usize, &'a T); + + #[inline] + fn next(&mut self) -> Option { + for (i, slot) in self.inner.by_ref() { + if let Some(v) = slot { + self.remaining -= 1; + return Some((i, v)); + } + } + None + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.remaining, Some(self.remaining)) + } +} + +impl ExactSizeIterator for Iter<'_, T> { + #[inline] + fn len(&self) -> usize { + self.remaining + } +} + +impl FusedIterator for Iter<'_, T> {} + +/// An iterator over `(index, &mut item)` pairs in an [`ItemSet`]. +pub(crate) struct IterMut<'a, T> { + inner: core::iter::Enumerate>>, + remaining: usize, +} + +impl<'a, T> IterMut<'a, T> { + fn new(set: &'a mut ItemSet) -> Self { + let remaining = set.len(); + Self { inner: set.items.iter_mut().enumerate(), remaining } + } +} + +impl fmt::Debug for IterMut<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("IterMut").field("remaining", &self.remaining).finish() + } +} + +impl<'a, T> Iterator for IterMut<'a, T> { + type Item = (usize, &'a mut T); + + #[inline] + fn next(&mut self) -> Option { + for (i, slot) in self.inner.by_ref() { + if let Some(v) = slot { + self.remaining -= 1; + return Some((i, v)); + } + } + None + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.remaining, Some(self.remaining)) + } +} + +impl ExactSizeIterator for IterMut<'_, T> { + #[inline] + fn len(&self) -> usize { + self.remaining + } +} + +impl FusedIterator for IterMut<'_, T> {} + +/// An iterator over `&item` references in an [`ItemSet`]. +pub(crate) struct Values<'a, T> { + inner: core::slice::Iter<'a, Option>, + remaining: usize, +} + +impl<'a, T> Values<'a, T> { + fn new(set: &'a ItemSet) -> Self { + Self { inner: set.items.iter(), remaining: set.len() } + } +} + +impl Clone for Values<'_, T> { + fn clone(&self) -> Self { + Self { inner: self.inner.clone(), remaining: self.remaining } + } +} + +impl fmt::Debug for Values<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Values").field("remaining", &self.remaining).finish() + } +} + +impl Default for Values<'_, T> { + fn default() -> Self { + let empty: &[Option] = &[]; + Self { inner: empty.iter(), remaining: 0 } + } +} + +impl<'a, T> Iterator for Values<'a, T> { + type Item = &'a T; + + #[inline] + fn next(&mut self) -> Option { + let v = self.inner.by_ref().flatten().next()?; + self.remaining -= 1; + Some(v) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.remaining, Some(self.remaining)) + } +} + +impl ExactSizeIterator for Values<'_, T> { + #[inline] + fn len(&self) -> usize { + self.remaining + } +} + +impl FusedIterator for Values<'_, T> {} + +/// An iterator over `&mut item` references in an [`ItemSet`]. +pub(crate) struct ValuesMut<'a, T> { + inner: core::slice::IterMut<'a, Option>, + remaining: usize, +} + +impl<'a, T> ValuesMut<'a, T> { + fn new(set: &'a mut ItemSet) -> Self { + let remaining = set.len(); + Self { inner: set.items.iter_mut(), remaining } + } +} + +impl fmt::Debug for ValuesMut<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ValuesMut").field("remaining", &self.remaining).finish() + } +} + +impl<'a, T> Iterator for ValuesMut<'a, T> { + type Item = &'a mut T; + + #[inline] + fn next(&mut self) -> Option { + let v = self.inner.by_ref().flatten().next()?; + self.remaining -= 1; + Some(v) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.remaining, Some(self.remaining)) + } +} + +impl ExactSizeIterator for ValuesMut<'_, T> { + #[inline] + fn len(&self) -> usize { + self.remaining + } +} + +impl FusedIterator for ValuesMut<'_, T> {} + +/// An owning iterator over the items in an [`ItemSet`]. +pub(crate) struct IntoValues { + inner: allocator_api2::vec::IntoIter, AllocWrapper>, + remaining: usize, +} + +impl IntoValues { + fn new(set: ItemSet) -> Self { + // Compute `remaining` before consuming the set: `ItemSet::len()` is + // derived from `items.len() - free_list.len()`, and `into_consuming` + // deallocates the free list. + let remaining = set.len(); + let consuming = set.into_consuming(); + Self { inner: consuming.items.into_iter(), remaining } + } +} + +impl fmt::Debug for IntoValues { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("IntoValues") + .field("remaining", &self.remaining) + .finish() + } +} + +impl Iterator for IntoValues { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + let v = self.inner.by_ref().flatten().next()?; + self.remaining -= 1; + Some(v) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.remaining, Some(self.remaining)) + } +} + +impl ExactSizeIterator for IntoValues { + #[inline] + fn len(&self) -> usize { + self.remaining + } +} + +impl FusedIterator for IntoValues {} + +/// An [`ItemSet`] consumed into an owned, by-index take-only view. +/// +/// Produced by [`ItemSet::into_consuming`]. The free list is gone and +/// the invariants of `ItemSet` no longer apply: indexes are taken one at +/// a time via [`take`](Self::take) and the type makes no attempt to +/// reuse vacated slots or maintain a live-count. +/// +/// Existing `Some` slots that are never taken are dropped by the underlying +/// `Vec` when `ConsumingItemSet` itself is dropped, so partial consumption +/// does not cause a memory leak. +pub(crate) struct ConsumingItemSet { + items: Vec, AllocWrapper>, +} + +impl fmt::Debug for ConsumingItemSet { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ConsumingItemSet") + .field("slots", &self.items.len()) + .finish() + } +} + +impl ConsumingItemSet { + /// Takes the item at `index`, leaving a `None` slot behind. + /// + /// Returns `None` if `index` is out of bounds or the slot has already been + /// taken. O(1) regardless of position. + #[cfg_attr(not(feature = "std"), expect(dead_code))] + #[inline] + pub(crate) fn take(&mut self, index: usize) -> Option { + self.items.get_mut(index)?.take() + } +} + +#[cfg(all(test, feature = "std"))] +mod tests { + use super::*; + use crate::internal::ValidateCompact; + use std::{ + cell::Cell, + panic::{AssertUnwindSafe, catch_unwind}, + rc::Rc, + }; + + /// Drops `T` via a user-supplied closure; if the closure panics, + /// the panic propagates out of `drop`. + struct PanickyDrop { + /// Bumped on every drop call so tests can count how many drops + /// ran before a panicking one aborted the sequence. + drop_count: Rc>, + /// When `true`, `drop` panics. + panic_on_drop: bool, + } + + impl Drop for PanickyDrop { + fn drop(&mut self) { + self.drop_count.set(self.drop_count.get() + 1); + if self.panic_on_drop { + panic!("PanickyDrop"); + } + } + } + + /// Checks that [`ItemSet::shrink_to_fit`] never drops `T` in place. + /// Only trailing `None` slots are popped, and `Option::::drop` on + /// `None` is a no-op, so a panicking `T::drop` cannot unwind through + /// the shrink path. + /// + /// A regression here would leave the free list referencing indexes + /// past a truncated `items` buffer (invariant 2 violated) or half- + /// dropped items mid-compaction. + #[test] + fn shrink_to_fit_does_not_drop_in_place() { + let drop_count = Rc::new(Cell::new(0)); + let mk = |panic_on_drop| PanickyDrop { + drop_count: drop_count.clone(), + panic_on_drop, + }; + + let mut set = ItemSet::::new(); + // items = [Some(0), Some(1), Some(2), Some(3), Some(4_panicky)] + for i in 0..5 { + set.insert_at_next_index(mk(i == 4)); + } + // Capture the panicky item by removing it first (so its drop + // never fires inside any ItemSet method). Then vacate the other + // three non-trailing slots. + let panicky = set.remove(4).expect("slot was occupied"); + for idx in [3, 2, 1] { + drop(set.remove(idx).expect("slot was occupied")); + } + assert_eq!(drop_count.get(), 3, "three non-panicky items dropped"); + // items = [Some(0), None, None, None, None] + // free_list = [4, 3, 2, 1] + + // shrink_to_fit pops the four trailing `None` slots and trims + // the free list. No `T::drop` runs here. + set.shrink_to_fit(); + assert_eq!( + drop_count.get(), + 3, + "shrink_to_fit pops only None slots; no T::drop runs" + ); + set.validate(ValidateCompact::NonCompact) + .expect("ItemSet invariants hold after shrink_to_fit"); + assert_eq!(set.len(), 1); + + // Drop the captured panicky item outside any ItemSet method. + // The panic is caught at this site; the set's state is + // untouched by that drop. + let caught = catch_unwind(AssertUnwindSafe(move || drop(panicky))); + assert!(caught.is_err(), "PanickyDrop panics on drop"); + set.validate(ValidateCompact::NonCompact).expect( + "ItemSet invariants still hold after the captured-value drop panic", + ); + } + + /// Checks that [`ItemSet::shrink_to_fit`] compacts away *middle* + /// holes (not only trailing ones) and returns an [`IndexRemap`] + /// that maps every surviving index to its new position. + /// + /// Before the `IndexRemap` rework, shrink only popped trailing + /// `None`s, so a map with gaps in the middle retained the dead + /// slots forever. A regression here would either leave holes in + /// the compacted `items` buffer, return an incorrect remap, or + /// drop `T` in place (breaking the panic-safety invariant in + /// [`shrink_to_fit_does_not_drop_in_place`]). + #[test] + fn shrink_to_fit_compacts_middle_holes() { + let drop_count = Rc::new(Cell::new(0)); + let mk = || PanickyDrop { + drop_count: drop_count.clone(), + panic_on_drop: false, + }; + + let mut set = ItemSet::::new(); + // items = [Some(0), Some(1), Some(2), Some(3), Some(4)] + let indexes: Vec<_, _> = + (0..5).map(|_| set.insert_at_next_index(mk())).collect(); + assert_eq!(indexes.as_slice(), &[0, 1, 2, 3, 4]); + + // Drop-count check baseline: none have dropped yet. + assert_eq!(drop_count.get(), 0); + + // Vacate two middle slots. After this: + // items = [Some(0), None, Some(2), None, Some(4)] + // free_list = [1, 3] (order is LIFO) + drop(set.remove(1).expect("slot was occupied")); + drop(set.remove(3).expect("slot was occupied")); + assert_eq!(drop_count.get(), 2, "removed items dropped at remove time"); + + // Compact. + let drop_count_before = drop_count.get(); + let remap = set.shrink_to_fit(); + assert_eq!( + drop_count.get(), + drop_count_before, + "shrink_to_fit moves items; no T::drop runs during compaction" + ); + + // Now: items = [Some(), Some(), Some()], + // free_list is empty, and the remap records holes [1, 3]. + assert_eq!(set.len(), 3); + set.validate(ValidateCompact::Compact) + .expect("ItemSet should be fully compact after shrink_to_fit"); + + assert!(!remap.is_empty()); + assert_eq!(remap.remap(0), 0); + assert_eq!(remap.remap(2), 1); + assert_eq!(remap.remap(4), 2); + } + + /// Shrinking a set with no holes is a no-op as far as the remap is + /// concerned: `IndexRemap::is_empty()` is `true` and outer callers + /// skip the table rewrite. + #[test] + fn shrink_to_fit_without_holes_returns_empty_remap() { + let mut set = ItemSet::::new(); + for i in 0..4 { + set.insert_at_next_index(i); + } + let remap = set.shrink_to_fit(); + assert!(remap.is_empty()); + set.validate(ValidateCompact::Compact) + .expect("a hole-free set is trivially compact after shrink_to_fit"); + } +} diff --git a/crates/iddqd/src/support/mod.rs b/crates/iddqd/src/support/mod.rs index 2f2ff930..a122c500 100644 --- a/crates/iddqd/src/support/mod.rs +++ b/crates/iddqd/src/support/mod.rs @@ -5,6 +5,7 @@ pub(crate) mod btree_table; #[cfg(feature = "daft")] pub(crate) mod daft_utils; pub(crate) mod fmt_utils; +pub(crate) mod free_list; pub(crate) mod hash_builder; pub(crate) mod hash_table; pub(crate) mod item_set; diff --git a/crates/iddqd/src/tri_hash_map/imp.rs b/crates/iddqd/src/tri_hash_map/imp.rs index 1e0c25c6..ea91eeec 100644 --- a/crates/iddqd/src/tri_hash_map/imp.rs +++ b/crates/iddqd/src/tri_hash_map/imp.rs @@ -914,9 +914,7 @@ impl TriHashMap { &mut self, additional: usize, ) -> Result<(), crate::errors::TryReserveError> { - self.items - .try_reserve(additional) - .map_err(crate::errors::TryReserveError::from_hashbrown)?; + self.items.try_reserve(additional)?; self.tables .k1_to_item .try_reserve(additional) @@ -984,10 +982,23 @@ impl TriHashMap { /// # } /// ``` pub fn shrink_to_fit(&mut self) { - self.items.shrink_to_fit(); - self.tables.k1_to_item.shrink_to_fit(); - self.tables.k2_to_item.shrink_to_fit(); - self.tables.k3_to_item.shrink_to_fit(); + let remap = self.items.shrink_to_fit(); + if !remap.is_empty() { + self.tables.k1_to_item.remap_indexes(&remap); + self.tables.k2_to_item.remap_indexes(&remap); + self.tables.k3_to_item.remap_indexes(&remap); + } + let items = &self.items; + let state = &self.tables.state; + self.tables + .k1_to_item + .shrink_to_fit(|ix| state.hash_one(items[*ix].key1())); + self.tables + .k2_to_item + .shrink_to_fit(|ix| state.hash_one(items[*ix].key2())); + self.tables + .k3_to_item + .shrink_to_fit(|ix| state.hash_one(items[*ix].key3())); } /// Shrinks the capacity of the map with a lower limit. It will drop @@ -1047,10 +1058,23 @@ impl TriHashMap { /// # } /// ``` pub fn shrink_to(&mut self, min_capacity: usize) { - self.items.shrink_to(min_capacity); - self.tables.k1_to_item.shrink_to(min_capacity); - self.tables.k2_to_item.shrink_to(min_capacity); - self.tables.k3_to_item.shrink_to(min_capacity); + let remap = self.items.shrink_to(min_capacity); + if !remap.is_empty() { + self.tables.k1_to_item.remap_indexes(&remap); + self.tables.k2_to_item.remap_indexes(&remap); + self.tables.k3_to_item.remap_indexes(&remap); + } + let items = &self.items; + let state = &self.tables.state; + self.tables + .k1_to_item + .shrink_to(min_capacity, |ix| state.hash_one(items[*ix].key1())); + self.tables + .k2_to_item + .shrink_to(min_capacity, |ix| state.hash_one(items[*ix].key2())); + self.tables + .k3_to_item + .shrink_to(min_capacity, |ix| state.hash_one(items[*ix].key3())); } /// Iterates over the items in the map. @@ -1196,7 +1220,7 @@ impl TriHashMap { self.tables.validate(self.len(), compactness)?; // Check that the indexes are all correct. - for (&ix, item) in self.items.iter() { + for (ix, item) in self.items.iter() { let key1 = item.key1(); let key2 = item.key2(); let key3 = item.key3(); diff --git a/crates/iddqd/src/tri_hash_map/iter.rs b/crates/iddqd/src/tri_hash_map/iter.rs index 8ebe6ff4..da2cbe4e 100644 --- a/crates/iddqd/src/tri_hash_map/iter.rs +++ b/crates/iddqd/src/tri_hash_map/iter.rs @@ -2,12 +2,11 @@ use super::{RefMut, tables::TriHashMapTables}; use crate::{ DefaultHashBuilder, TriHashItem, support::{ - alloc::{AllocWrapper, Allocator, Global}, - item_set::ItemSet, + alloc::{Allocator, Global}, + item_set::{self, ItemSet}, }, }; use core::{hash::BuildHasher, iter::FusedIterator}; -use hashbrown::hash_map; /// An iterator over the elements of a [`TriHashMap`] by shared reference. /// Created by [`TriHashMap::iter`]. @@ -20,7 +19,7 @@ use hashbrown::hash_map; /// [`HashMap`]: std::collections::HashMap #[derive(Clone, Debug, Default)] pub struct Iter<'a, T: TriHashItem> { - inner: hash_map::Values<'a, usize, T>, + inner: item_set::Values<'a, T>, } impl<'a, T: TriHashItem> Iter<'a, T> { @@ -45,7 +44,6 @@ impl ExactSizeIterator for Iter<'_, T> { } } -// hash_map::Iter is a FusedIterator, so Iter is as well. impl FusedIterator for Iter<'_, T> {} /// An iterator over the elements of a [`TriHashMap`] by mutable reference. @@ -67,7 +65,7 @@ pub struct IterMut< A: Allocator = Global, > { tables: &'a TriHashMapTables, - inner: hash_map::ValuesMut<'a, usize, T>, + inner: item_set::ValuesMut<'a, T>, } impl<'a, T: TriHashItem, S: Clone + BuildHasher, A: Allocator> @@ -103,7 +101,6 @@ impl ExactSizeIterator } } -// hash_map::IterMut is a FusedIterator, so IterMut is as well. impl FusedIterator for IterMut<'_, T, S, A> { @@ -120,7 +117,7 @@ impl FusedIterator /// [`HashMap`]: std::collections::HashMap #[derive(Debug)] pub struct IntoIter { - inner: hash_map::IntoValues>, + inner: item_set::IntoValues, } impl IntoIter { @@ -145,5 +142,4 @@ impl ExactSizeIterator for IntoIter { } } -// hash_map::IterMut is a FusedIterator, so IterMut is as well. impl FusedIterator for IntoIter {} diff --git a/crates/iddqd/tests/integration/bi_hash_map.rs b/crates/iddqd/tests/integration/bi_hash_map.rs index c38ebcea..1fd45766 100644 --- a/crates/iddqd/tests/integration/bi_hash_map.rs +++ b/crates/iddqd/tests/integration/bi_hash_map.rs @@ -44,11 +44,10 @@ fn debug_impls() { assert_eq!( format!("{map:?}"), - // This is a small-enough map that the order of iteration is - // deterministic. + // Iteration is in insertion order. "{{k1: 1, k2: 'a'}: SimpleItem { key1: 1, key2: 'a' }, \ - {k1: 10, k2: 'c'}: SimpleItem { key1: 10, key2: 'c' }, \ - {k1: 20, k2: 'b'}: SimpleItem { key1: 20, key2: 'b' }}", + {k1: 20, k2: 'b'}: SimpleItem { key1: 20, key2: 'b' }, \ + {k1: 10, k2: 'c'}: SimpleItem { key1: 10, key2: 'c' }}", ); assert_eq!( format!("{:?}", map.get1_mut(&1).unwrap()), @@ -67,7 +66,7 @@ fn debug_impls_borrowed() { assert_eq!( format!("{before:?}"), - r#"{{k1: "a", k2: [98, 48]}: BorrowedItem { key1: "a", key2: [98, 48], key3: "path0" }, {k1: "c", k2: [98, 50]}: BorrowedItem { key1: "c", key2: [98, 50], key3: "path2" }, {k1: "b", k2: [98, 49]}: BorrowedItem { key1: "b", key2: [98, 49], key3: "path1" }}"# + r#"{{k1: "a", k2: [98, 48]}: BorrowedItem { key1: "a", key2: [98, 48], key3: "path0" }, {k1: "b", k2: [98, 49]}: BorrowedItem { key1: "b", key2: [98, 49], key3: "path1" }, {k1: "c", k2: [98, 50]}: BorrowedItem { key1: "c", key2: [98, 50], key3: "path2" }}"# ); #[cfg(feature = "daft")] @@ -84,7 +83,7 @@ fn debug_impls_borrowed() { let diff = before.diff(&after).by_unique(); assert_eq!( format!("{diff:?}"), - r#"Diff { common: {{k1: "a", k2: [98, 48]}: IdLeaf { before: BorrowedItem { key1: "a", key2: [98, 48], key3: "path0" }, after: BorrowedItem { key1: "a", key2: [98, 48], key3: "path0" } }}, added: {{k1: "d", k2: [98, 52]}: BorrowedItem { key1: "d", key2: [98, 52], key3: "path4" }, {k1: "c", k2: [98, 51]}: BorrowedItem { key1: "c", key2: [98, 51], key3: "path3" }}, removed: {{k1: "c", k2: [98, 50]}: BorrowedItem { key1: "c", key2: [98, 50], key3: "path2" }, {k1: "b", k2: [98, 49]}: BorrowedItem { key1: "b", key2: [98, 49], key3: "path1" }} }"# + r#"Diff { common: {{k1: "a", k2: [98, 48]}: IdLeaf { before: BorrowedItem { key1: "a", key2: [98, 48], key3: "path0" }, after: BorrowedItem { key1: "a", key2: [98, 48], key3: "path0" } }}, added: {{k1: "c", k2: [98, 51]}: BorrowedItem { key1: "c", key2: [98, 51], key3: "path3" }, {k1: "d", k2: [98, 52]}: BorrowedItem { key1: "d", key2: [98, 52], key3: "path4" }}, removed: {{k1: "b", k2: [98, 49]}: BorrowedItem { key1: "b", key2: [98, 49], key3: "path1" }, {k1: "c", k2: [98, 50]}: BorrowedItem { key1: "c", key2: [98, 50], key3: "path2" }} }"# ); } } @@ -242,6 +241,13 @@ enum Operation { #[weight(2)] RetainModulo(#[strategy(0..3_u8)] u8, #[strategy(1..4_u8)] u8, bool), Clear, + // Rare because full compaction + table rewrite is expensive, but + // at weight 1 each these fire often enough per 1024-op run to + // exercise the remap path. + #[weight(1)] + ShrinkToFit, + #[weight(1)] + ShrinkTo(#[strategy(0..256_usize)] usize), } impl Operation { @@ -259,8 +265,12 @@ impl Operation { | Operation::RetainModulo(_, _, _) => { CompactnessChange::NoLongerCompact } - // Clear always makes the map compact (empty). - Operation::Clear => CompactnessChange::BecomesCompact, + // Clear always makes the map compact (empty). Shrink + // fully compacts the backing store, restoring the + // `Compact` invariant. + Operation::Clear + | Operation::ShrinkToFit + | Operation::ShrinkTo(_) => CompactnessChange::BecomesCompact, } } } @@ -371,6 +381,14 @@ fn proptest_ops( naive_map.clear(); map.validate(compactness).expect("map should be valid"); } + Operation::ShrinkToFit => { + map.shrink_to_fit(); + map.validate(compactness).expect("map should be valid"); + } + Operation::ShrinkTo(min_capacity) => { + map.shrink_to(min_capacity); + map.validate(compactness).expect("map should be valid"); + } } // Check that the iterators work correctly. diff --git a/crates/iddqd/tests/integration/id_hash_map.rs b/crates/iddqd/tests/integration/id_hash_map.rs index f6529b9f..d2298646 100644 --- a/crates/iddqd/tests/integration/id_hash_map.rs +++ b/crates/iddqd/tests/integration/id_hash_map.rs @@ -38,9 +38,8 @@ fn debug_impls() { assert_eq!( format!("{map:?}"), - // This is a small-enough map that the order of iteration is - // deterministic. - r#"{1: SimpleItem { key: 1 }, 10: SimpleItem { key: 10 }, 20: SimpleItem { key: 20 }}"# + // Iteration is in insertion order. + r#"{1: SimpleItem { key: 1 }, 20: SimpleItem { key: 20 }, 10: SimpleItem { key: 10 }}"# ); assert_eq!( format!("{:?}", map.get_mut(&1).unwrap()), @@ -59,7 +58,7 @@ fn debug_impls_borrowed() { assert_eq!( format!("{before:?}"), - r#"{"a": BorrowedItem { key1: "a", key2: [98, 48], key3: "path0" }, "c": BorrowedItem { key1: "c", key2: [98, 50], key3: "path2" }, "b": BorrowedItem { key1: "b", key2: [98, 49], key3: "path1" }}"# + r#"{"a": BorrowedItem { key1: "a", key2: [98, 48], key3: "path0" }, "b": BorrowedItem { key1: "b", key2: [98, 49], key3: "path1" }, "c": BorrowedItem { key1: "c", key2: [98, 50], key3: "path2" }}"# ); #[cfg(feature = "daft")] @@ -195,6 +194,14 @@ enum Operation { #[weight(2)] RetainModulo(#[strategy(0..3_u8)] u8, #[strategy(1..4_u8)] u8, bool), Clear, + // Shrink operations are intentionally rare: each one is expensive + // (full compaction + table rewrite) and dilutes per-op coverage if + // frequent, but at weight 1 each they fire often enough per + // 1024-op run to exercise the remap path. + #[weight(1)] + ShrinkToFit, + #[weight(1)] + ShrinkTo(#[strategy(0..256_usize)] usize), } impl Operation { @@ -211,8 +218,12 @@ impl Operation { | Operation::RetainModulo(_, _, _) => { CompactnessChange::NoLongerCompact } - // Clear always makes the map compact (empty). - Operation::Clear => CompactnessChange::BecomesCompact, + // Clear always makes the map compact (empty). Shrink + // operations fully compact the backing store, restoring + // the `Compact` invariant. + Operation::Clear + | Operation::ShrinkToFit + | Operation::ShrinkTo(_) => CompactnessChange::BecomesCompact, } } } @@ -301,6 +312,19 @@ fn proptest_ops( naive_map.clear(); map.validate(compactness).expect("map should be valid"); } + Operation::ShrinkToFit => { + map.shrink_to_fit(); + // The naive map has no shrink operation; contents stay + // unchanged. Compactness is validated below via the + // standard `map.validate(compactness)` call, which — + // thanks to the compactness-change tracker — now + // expects `Compact`. + map.validate(compactness).expect("map should be valid"); + } + Operation::ShrinkTo(min_capacity) => { + map.shrink_to(min_capacity); + map.validate(compactness).expect("map should be valid"); + } } // Check that the iterators work correctly. diff --git a/crates/iddqd/tests/integration/id_ord_map.rs b/crates/iddqd/tests/integration/id_ord_map.rs index 44a9f9ef..473ebd22 100644 --- a/crates/iddqd/tests/integration/id_ord_map.rs +++ b/crates/iddqd/tests/integration/id_ord_map.rs @@ -243,6 +243,13 @@ enum Operation { RetainModulo(#[strategy(0..3_u8)] u8, #[strategy(1..4_u8)] u8, bool), // clear is set to a lower weight since it makes the map empty. Clear, + // Rare because shrink rebuilds the BTreeSet index — but at weight + // 1 each these fire often enough per 1024-op run to exercise the + // btree remap path. + #[weight(1)] + ShrinkToFit, + #[weight(1)] + ShrinkTo(#[strategy(0..256_usize)] usize), } impl Operation { @@ -264,8 +271,12 @@ impl Operation { | Operation::RetainModulo(_, _, _) => { CompactnessChange::NoLongerCompact } - // Clear always makes the map compact (empty). - Operation::Clear => CompactnessChange::BecomesCompact, + // Clear always makes the map compact (empty). Shrink + // fully compacts the backing store, restoring the + // `Compact` invariant. + Operation::Clear + | Operation::ShrinkToFit + | Operation::ShrinkTo(_) => CompactnessChange::BecomesCompact, } } } @@ -434,6 +445,16 @@ fn proptest_ops( map.validate(compactness, ValidateChaos::No) .expect("map should be valid"); } + Operation::ShrinkToFit => { + map.shrink_to_fit(); + map.validate(compactness, ValidateChaos::No) + .expect("map should be valid"); + } + Operation::ShrinkTo(min_capacity) => { + map.shrink_to(min_capacity); + map.validate(compactness, ValidateChaos::No) + .expect("map should be valid"); + } } // Check that the iterators work correctly. diff --git a/crates/iddqd/tests/integration/tri_hash_map.rs b/crates/iddqd/tests/integration/tri_hash_map.rs index 0759dcd1..73f759c8 100644 --- a/crates/iddqd/tests/integration/tri_hash_map.rs +++ b/crates/iddqd/tests/integration/tri_hash_map.rs @@ -51,11 +51,10 @@ fn debug_impls() { assert_eq!( format!("{map:?}"), - // This is a small-enough map that the order of iteration is - // deterministic. + // Iteration is in insertion order. "{{k1: 1, k2: 'a', k3: 0}: SimpleItem { key1: 1, key2: 'a', key3: 0 }, \ - {k1: 10, k2: 'c', k3: 2}: SimpleItem { key1: 10, key2: 'c', key3: 2 }, \ - {k1: 20, k2: 'b', k3: 1}: SimpleItem { key1: 20, key2: 'b', key3: 1 }}", + {k1: 20, k2: 'b', k3: 1}: SimpleItem { key1: 20, key2: 'b', key3: 1 }, \ + {k1: 10, k2: 'c', k3: 2}: SimpleItem { key1: 10, key2: 'c', key3: 2 }}", ); assert_eq!( format!("{:?}", map.get1_mut(&1).unwrap()), @@ -74,7 +73,7 @@ fn debug_impls_borrowed() { assert_eq!( format!("{before:?}"), - r#"{{k1: "a", k2: [98, 48], k3: "path0"}: BorrowedItem { key1: "a", key2: [98, 48], key3: "path0" }, {k1: "c", k2: [98, 50], k3: "path2"}: BorrowedItem { key1: "c", key2: [98, 50], key3: "path2" }, {k1: "b", k2: [98, 49], k3: "path1"}: BorrowedItem { key1: "b", key2: [98, 49], key3: "path1" }}"# + r#"{{k1: "a", k2: [98, 48], k3: "path0"}: BorrowedItem { key1: "a", key2: [98, 48], key3: "path0" }, {k1: "b", k2: [98, 49], k3: "path1"}: BorrowedItem { key1: "b", key2: [98, 49], key3: "path1" }, {k1: "c", k2: [98, 50], k3: "path2"}: BorrowedItem { key1: "c", key2: [98, 50], key3: "path2" }}"# ); #[cfg(feature = "daft")] @@ -91,7 +90,7 @@ fn debug_impls_borrowed() { let diff = before.diff(&after).by_unique(); assert_eq!( format!("{diff:?}"), - r#"Diff { common: {{k1: "a", k2: [98, 48], k3: "path0"}: IdLeaf { before: BorrowedItem { key1: "a", key2: [98, 48], key3: "path0" }, after: BorrowedItem { key1: "a", key2: [98, 48], key3: "path0" } }}, added: {{k1: "d", k2: [98, 52], k3: "path4"}: BorrowedItem { key1: "d", key2: [98, 52], key3: "path4" }, {k1: "c", k2: [98, 51], k3: "path3"}: BorrowedItem { key1: "c", key2: [98, 51], key3: "path3" }}, removed: {{k1: "c", k2: [98, 50], k3: "path2"}: BorrowedItem { key1: "c", key2: [98, 50], key3: "path2" }, {k1: "b", k2: [98, 49], k3: "path1"}: BorrowedItem { key1: "b", key2: [98, 49], key3: "path1" }} }"# + r#"Diff { common: {{k1: "a", k2: [98, 48], k3: "path0"}: IdLeaf { before: BorrowedItem { key1: "a", key2: [98, 48], key3: "path0" }, after: BorrowedItem { key1: "a", key2: [98, 48], key3: "path0" } }}, added: {{k1: "c", k2: [98, 51], k3: "path3"}: BorrowedItem { key1: "c", key2: [98, 51], key3: "path3" }, {k1: "d", k2: [98, 52], k3: "path4"}: BorrowedItem { key1: "d", key2: [98, 52], key3: "path4" }}, removed: {{k1: "b", k2: [98, 49], k3: "path1"}: BorrowedItem { key1: "b", key2: [98, 49], key3: "path1" }, {k1: "c", k2: [98, 50], k3: "path2"}: BorrowedItem { key1: "c", key2: [98, 50], key3: "path2" }} }"# ); } } @@ -271,6 +270,13 @@ enum Operation { #[weight(2)] RetainModulo(#[strategy(0..3_u8)] u8, #[strategy(1..4_u8)] u8, bool), Clear, + // Rare because full compaction + table rewrite is expensive, but + // at weight 1 each these fire often enough per 1024-op run to + // exercise the remap path. + #[weight(1)] + ShrinkToFit, + #[weight(1)] + ShrinkTo(#[strategy(0..256_usize)] usize), } impl Operation { @@ -290,8 +296,12 @@ impl Operation { | Operation::RetainModulo(_, _, _) => { CompactnessChange::NoLongerCompact } - // Clear always makes the map compact (empty). - Operation::Clear => CompactnessChange::BecomesCompact, + // Clear always makes the map compact (empty). Shrink + // fully compacts the backing store, restoring the + // `Compact` invariant. + Operation::Clear + | Operation::ShrinkToFit + | Operation::ShrinkTo(_) => CompactnessChange::BecomesCompact, } } } @@ -415,6 +425,14 @@ fn proptest_ops( naive_map.clear(); map.validate(compactness).expect("map should be valid"); } + Operation::ShrinkToFit => { + map.shrink_to_fit(); + map.validate(compactness).expect("map should be valid"); + } + Operation::ShrinkTo(min_capacity) => { + map.shrink_to(min_capacity); + map.validate(compactness).expect("map should be valid"); + } } // Check that the iterators work correctly. diff --git a/crates/iddqd/tests/snapshots/map_sizes.txt b/crates/iddqd/tests/snapshots/map_sizes.txt index c9077c6a..af1d0932 100644 --- a/crates/iddqd/tests/snapshots/map_sizes.txt +++ b/crates/iddqd/tests/snapshots/map_sizes.txt @@ -1,10 +1,10 @@ -IdHashMap: 80 -IdHashMap: 88 +IdHashMap: 72 +IdHashMap: 80 -BiHashMap: 112 -BiHashMap: 120 +BiHashMap: 104 +BiHashMap: 112 -TriHashMap: 144 -TriHashMap: 152 +TriHashMap: 136 +TriHashMap: 144 -IdOrdMap: 72 +IdOrdMap: 64