diff --git a/gix-ref/Cargo.toml b/gix-ref/Cargo.toml index 0ef687cccf0..c2c3dde9a69 100644 --- a/gix-ref/Cargo.toml +++ b/gix-ref/Cargo.toml @@ -26,7 +26,7 @@ serde = ["dep:serde", "gix-hash/serde", "gix-actor/serde", "gix-object/serde"] parallel = ["gix-features/parallel"] [dependencies] -gix-features = { version = "^0.48.0", path = "../gix-features", features = ["walkdir"] } +gix-features = { version = "^0.48.0", path = "../gix-features", features = ["walkdir", "once_cell"] } gix-fs = { version = "^0.21.1", path = "../gix-fs" } gix-path = { version = "^0.12.0", path = "../gix-path" } gix-hash = { version = "^0.25.0", path = "../gix-hash" } diff --git a/gix-ref/src/store/packed/buffer.rs b/gix-ref/src/store/packed/buffer.rs index 9c1a757c7fc..b2839fdd62e 100644 --- a/gix-ref/src/store/packed/buffer.rs +++ b/gix-ref/src/store/packed/buffer.rs @@ -67,6 +67,8 @@ pub mod open { data: backing, path, object_hash, + lookup_count: std::sync::atomic::AtomicUsize::new(0), + name_index: gix_features::threading::OnceCell::new(), }) } diff --git a/gix-ref/src/store/packed/find.rs b/gix-ref/src/store/packed/find.rs index 915e725029a..1e12c594f21 100644 --- a/gix-ref/src/store/packed/find.rs +++ b/gix-ref/src/store/packed/find.rs @@ -1,7 +1,16 @@ +use std::sync::atomic::Ordering; + use gix_object::bstr::{BStr, BString, ByteSlice}; use crate::{FullNameRef, PartialNameRef, store_impl::packed}; +/// Number of `try_find_full_name` calls served by binary search before +/// the next call eagerly builds a name → offset index for the rest of the +/// buffer's lifetime. The threshold trades a small one-time build cost +/// against `log₂(n)`-per-call binary searches; below it, single-shot +/// lookups (typical CLI commands) pay nothing extra. +pub(super) const INDEX_BUILD_AFTER_LOOKUPS: usize = 8; + /// packed-refs specific functionality impl packed::Buffer { /// Find a reference with the given `name` and return it. @@ -37,6 +46,23 @@ impl packed::Buffer { } pub(crate) fn try_find_full_name(&self, name: &FullNameRef) -> Result>, Error> { + // Fast path: a built index turns the lookup into a single HashMap + // probe (plus an `decode::reference` re-parse at the matched offset). + if let Some(index) = self.name_index.get() { + return self.lookup_via_index(name, index); + } + // Cold path: count this lookup. After enough binary searches we + // amortize the index build cost, so build it now and serve this call + // from the index too. + let prev_lookups = self.lookup_count.fetch_add(1, Ordering::Relaxed); + if prev_lookups + 1 >= INDEX_BUILD_AFTER_LOOKUPS { + let index = self.name_index.get_or_init(|| self.build_name_index()); + return self.lookup_via_index(name, index); + } + self.try_find_full_name_via_binary_search(name) + } + + fn try_find_full_name_via_binary_search(&self, name: &FullNameRef) -> Result>, Error> { match self.binary_search_by(name.as_bstr()) { Ok(line_start) => { let mut input = &self.as_ref()[line_start..]; @@ -54,6 +80,66 @@ impl packed::Buffer { } } + fn lookup_via_index( + &self, + name: &FullNameRef, + index: &packed::NameIndex, + ) -> Result>, Error> { + match index.by_name.get(name.as_bstr()) { + Some(&offset) => { + let mut input = &self.as_ref()[offset..]; + Ok(Some( + packed::decode::reference(&mut input, self.object_hash).map_err(|_| Error::Parse)?, + )) + } + None => { + if index.encountered_parse_failure { + Err(Error::Parse) + } else { + Ok(None) + } + } + } + } + + /// Walk the entire buffer once, collecting `(name → record-start offset)` + /// pairs for every well-formed record. Malformed lines are skipped to + /// the next newline but flip [`packed::NameIndex::encountered_parse_failure`] + /// so the lookup path still reports them via [`Error::Parse`] on a miss. + fn build_name_index(&self) -> packed::NameIndex { + let body = self.as_ref(); + let body_start = body.as_ptr() as usize; + let mut by_name = std::collections::HashMap::with_capacity(estimate_record_count(body)); + let mut encountered_parse_failure = false; + let mut cursor = body; + loop { + if cursor.is_empty() { + break; + } + let record_offset = cursor.as_ptr() as usize - body_start; + let mut after = cursor; + match packed::decode::reference(&mut after, self.object_hash) { + Ok(r) => { + by_name.insert(r.name.as_bstr().to_owned(), record_offset); + cursor = after; + } + Err(_) => { + encountered_parse_failure = true; + // Skip to the next line so a single malformed record + // doesn't prevent us from indexing the rest. + cursor = match cursor.iter().position(|&b| b == b'\n') { + Some(pos) => &cursor[pos + 1..], + None => &[], + }; + } + } + } + packed::NameIndex { + by_name, + encountered_parse_failure, + } + } + /// Find a reference with the given `name` and return it. pub fn find<'a, Name, E>(&self, name: Name) -> Result, existing::Error> where @@ -137,6 +223,17 @@ pub mod existing { } } +/// A rough upper-bound estimate on the number of records in a packed-refs +/// buffer, used only to preallocate the HashMap built by +/// [`packed::Buffer::build_name_index`]. The shortest possible record is +/// `\n` which is at least `hex_len + 4` bytes (1-char name). +/// Overestimating is fine — the HashMap just grows naturally. +fn estimate_record_count(body: &[u8]) -> usize { + // Conservative average of ~50 bytes/record gives a sensible initial + // capacity without scanning the file. + body.len() / 50 +} + pub(crate) fn transform_full_name_for_lookup(name: &FullNameRef) -> Option<&FullNameRef> { match name.category_and_short_name() { Some((c, sn)) => { diff --git a/gix-ref/src/store/packed/mod.rs b/gix-ref/src/store/packed/mod.rs index 6da4d7b7e37..bd040cda865 100644 --- a/gix-ref/src/store/packed/mod.rs +++ b/gix-ref/src/store/packed/mod.rs @@ -1,5 +1,6 @@ -use std::path::PathBuf; +use std::{collections::HashMap, path::PathBuf, sync::atomic::AtomicUsize}; +use gix_features::threading::OnceCell; use gix_hash::ObjectId; use gix_object::bstr::{BStr, BString}; use memmap2::Mmap; @@ -26,6 +27,35 @@ pub struct Buffer { offset: usize, /// The path from which we were loaded path: PathBuf, + /// Number of [`Self::try_find_full_name`] calls served by the binary + /// search so far. After it crosses [`find::INDEX_BUILD_AFTER_LOOKUPS`] + /// the next lookup eagerly builds [`name_index`] for O(1) lookups, so + /// single-shot callers (typical CLI commands) don't pay the build cost. + pub(crate) lookup_count: AtomicUsize, + /// Lazily populated map from ref name to the offset of its record in + /// [`AsRef::as_ref`]. Built once per buffer snapshot and consulted on + /// every subsequent lookup, bypassing the per-call binary search. + pub(crate) name_index: OnceCell, +} + +/// A name → record-offset index for fast `try_find_full_name` lookups. +/// +/// Stored alongside [`Buffer`] inside a [`OnceCell`] so that once built it +/// is shared by every clone of the snapshot. Records that don't parse are +/// still detected: their presence flips [`Self::encountered_parse_failure`] +/// so a miss against a corrupt packed-refs file surfaces as +/// [`find::Error::Parse`] rather than `Ok(None)`, matching the binary-search +/// path's behavior. +#[derive(Debug)] +pub(crate) struct NameIndex { + /// Maps a ref's full name to the byte offset of its record's first + /// byte in [`Buffer::as_ref`] (i.e. the hash byte, not the line break + /// before it). + pub(crate) by_name: HashMap, + /// True if any record in the file failed to parse while the index was + /// being built. A missed lookup is reported as `Error::Parse` when this + /// is set, mirroring the binary-search path's parse-failure flag. + pub(crate) encountered_parse_failure: bool, } struct Edit { diff --git a/gix-ref/tests/refs/packed/find.rs b/gix-ref/tests/refs/packed/find.rs index 48bd7493ccf..d94751adbef 100644 --- a/gix-ref/tests/refs/packed/find.rs +++ b/gix-ref/tests/refs/packed/find.rs @@ -190,6 +190,194 @@ bogus refs/tags/gix-actor-v0.1.0 Ok(()) } +/// Build a packed-refs body with `count` valid records whose names are +/// `refs/heads/auto-NNNN`. The body is sorted, satisfying the "sorted" +/// header so `packed::Buffer::open` keeps the original byte layout. +fn synthetic_packed_refs(count: usize) -> Vec { + let mut out: Vec = b"# pack-refs with: peeled fully-peeled sorted\n".to_vec(); + for i in 0..count { + let line = format!("{:040x} refs/heads/auto-{i:04}\n", i as u128 + 1); + out.extend_from_slice(line.as_bytes()); + } + out +} + +/// Mixed corpus: `count` valid records plus a malformed line spliced in +/// the middle. Returned names are sorted so the file remains valid. +fn synthetic_packed_refs_with_corruption(count: usize) -> Vec { + let mut out: Vec = b"# pack-refs with: peeled fully-peeled sorted\n".to_vec(); + for i in 0..count { + let line = format!("{:040x} refs/heads/auto-{i:04}\n", i as u128 + 1); + out.extend_from_slice(line.as_bytes()); + if i == count / 2 { + // A line with the wrong shape (no space after the would-be hash) — + // `decode::reference` will reject it while leaving the surrounding + // records intact. + out.extend_from_slice(b"bogus refs/heads/auto-malformed\n"); + } + } + out +} + +#[test] +fn many_lookups_keep_returning_correct_results_across_the_index_threshold() -> crate::Result { + // Cross the lazy-index build threshold and verify every lookup — both + // the binary-search ones below it and the index-served ones above it — + // returns the same record we'd iterate. + let body = synthetic_packed_refs(64); + let (_keep, path) = write_packed_refs_with(&body)?; + let buf = packed::Buffer::open(path, 1024, HASH_KIND)?; + + let names: Vec<_> = (0..64).map(|i| format!("refs/heads/auto-{i:04}")).collect(); + for name in &names { + let found = buf.try_find(name.as_str())?.expect("ref is present"); + assert_eq!(found.name.as_bstr(), name.as_str(), "name round-trips"); + } + // A miss against a clean index reports `Ok(None)`, not `Err(Parse)`. + assert!( + buf.try_find("refs/heads/does-not-exist")?.is_none(), + "missing refs return Ok(None) when no malformed records are present" + ); + Ok(()) +} + +#[test] +fn index_path_surfaces_parse_failures_on_miss() -> crate::Result { + // Build a body large enough that `try_find` will trip the index-build + // threshold, then verify that a miss surfaces `Error::Parse` because a + // malformed record was encountered while building the index — matching + // the binary-search path's `encountered_parse_failure` semantics. + let body = synthetic_packed_refs_with_corruption(32); + let (_keep, path) = write_packed_refs_with(&body)?; + let buf = packed::Buffer::open(path, 1024, HASH_KIND)?; + + // First do enough valid lookups to push past `INDEX_BUILD_AFTER_LOOKUPS` + // and force the index to be built. + for i in 0..16 { + let name = format!("refs/heads/auto-{i:04}"); + let found = buf.try_find(name.as_str())?.expect("ref exists"); + assert_eq!(found.name.as_bstr(), name.as_str()); + } + + // Now ask for a name that doesn't exist. Because the malformed record + // is in the file, the index records `encountered_parse_failure=true`, + // and a miss surfaces as a parse error rather than `Ok(None)`. + assert_eq!( + buf.try_find("refs/heads/never-there") + .expect_err("miss against a corrupt file must surface as Error::Parse") + .to_string(), + "The reference could not be parsed", + "index lookup matches the binary-search path's behavior on miss when corruption exists", + ); + Ok(()) +} + +/// Profiling helper: simulate a wide-refs fetch's `update_refs` loop and +/// break down where the time goes. Run with: +/// +/// ```text +/// cargo test -p gix-ref --release --features sha1 --test refs \ +/// -- --ignored --nocapture loose_stat_overhead_profile +/// ``` +/// +/// Reports total time for two strategies against a ~150k-packed-ref repo: +/// A. `store.try_find(name)` — the current `try_find_reference` path, +/// which `stat`s the loose-ref file before falling through to packed. +/// B. Pre-built `HashSet` of loose names → bypass the stat when the name +/// isn't in it, then `packed.try_find(name)` directly. +/// +/// The delta (A − B) is the loose-stat overhead that a hypothetical +/// `file::Store`-level loose-name cache (or a caller-side enumeration like +/// the original #2605) would reclaim on top of this PR's packed-buffer +/// index. Useful for deciding whether such a follow-up is worth its +/// complexity, since the answer is workload-dependent (warm vs cold cache, +/// loose-ref count, filesystem characteristics). +#[test] +#[ignore = "profiling only; expensive and times vary across machines"] +fn loose_stat_overhead_profile() -> crate::Result { + use std::collections::HashSet; + use std::time::Instant; + + let store = store_at("make_repository_with_lots_of_packed_refs.sh")?; + let packed = store.open_packed_buffer()?.expect("packed-refs present"); + + // Collect all 150k packed ref names — these are what fetch's update_refs + // would iterate over. + let collect_start = Instant::now(); + let packed_names: Vec<_> = packed + .iter()? + .filter_map(Result::ok) + .map(|r| r.name.to_owned()) + .collect(); + eprintln!( + "Collected {} packed ref names in {:?}", + packed_names.len(), + collect_start.elapsed() + ); + + // Strategy A — current behavior, with loose-stat per call. + let warmup = Instant::now(); + for name in packed_names.iter().take(1000) { + let _ = store.try_find(name.as_ref())?; + } + eprintln!("Warmup (1000 lookups): {:?}", warmup.elapsed()); + + let a_start = Instant::now(); + let mut a_found = 0usize; + for name in &packed_names { + if store.try_find(name.as_ref())?.is_some() { + a_found += 1; + } + } + let a_elapsed = a_start.elapsed(); + eprintln!( + "Strategy A (store.try_find — per-call loose stat): {:?}, found {} of {} ({:.1} µs/lookup)", + a_elapsed, + a_found, + packed_names.len(), + a_elapsed.as_secs_f64() * 1_000_000.0 / packed_names.len() as f64, + ); + + // Strategy B — simulated loose-index: pre-enumerate loose names, skip stat + // when name isn't present. + let loose_set_start = Instant::now(); + let loose_set: HashSet<_> = store.loose_iter()?.filter_map(Result::ok).map(|r| r.name).collect(); + eprintln!( + "Built loose-name set ({} entries) in {:?}", + loose_set.len(), + loose_set_start.elapsed() + ); + + let b_start = Instant::now(); + let mut b_found = 0usize; + for name in &packed_names { + if loose_set.contains(name) { + // Take slow path for shadowed entries. + if store.try_find(name.as_ref())?.is_some() { + b_found += 1; + } + } else if packed.try_find(name.as_ref())?.is_some() { + b_found += 1; + } + } + let b_elapsed = b_start.elapsed(); + eprintln!( + "Strategy B (loose-set short-circuit + packed direct): {:?}, found {} of {} ({:.1} µs/lookup)", + b_elapsed, + b_found, + packed_names.len(), + b_elapsed.as_secs_f64() * 1_000_000.0 / packed_names.len() as f64, + ); + + eprintln!( + "Loose-stat overhead reclaimable by an in-Store loose-name cache: {:?} ({:.1}% of A)", + a_elapsed.saturating_sub(b_elapsed), + (a_elapsed.saturating_sub(b_elapsed).as_secs_f64() / a_elapsed.as_secs_f64()) * 100.0, + ); + assert_eq!(a_found, b_found, "both strategies find the same number of refs"); + Ok(()) +} + #[test] fn find_speed() -> crate::Result { let store = store_at("make_repository_with_lots_of_packed_refs.sh")?;