From aa9dc5550a164e4c7217ecd273e0bd11f585d76a Mon Sep 17 00:00:00 2001 From: Nikolay Bryskin Date: Sun, 17 May 2026 19:36:36 +0300 Subject: [PATCH 1/3] gix: cache packed-refs in a HashMap during fetch update_refs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `fetch::refs::update()` calls `repo.try_find_reference(name)` once per advertised mapping. Each call does a filesystem stat for a loose ref followed by a binary search over `packed-refs` — and on wide-refs mirrors (e.g. AUR's 154k branches) that loop dominates wall time. Snapshot `packed-refs` into a `HashMap` once and enumerate loose refs into a `HashSet` once at the start of `update()`. Per-mapping lookups consult the HashMap when the name has no loose shadow, falling back to `repo.try_find_reference()` only when a loose ref exists (preserving the precedence rule) or when the snapshot couldn't be built. The optimization is purely additive: any failure to build the snapshots falls back to the unchanged slow path. Measured on a `gitaur -Sy` against the AUR mirror (154k packed refs, ~200 loose refs, warm cache, no incoming updates): unpatched 11.0s wall / 8.1s user CPU (C) alone 8.0s wall / 5.8s user CPU (~27% faster) (B) + (C) 5.0s wall / 3.3s user CPU (~55% faster, ≈ git CLI) (B) is the sibling change in gix-ref that drops name validation from the binary-search comparator; (C) and (B) target different phases — the have-set build still drives binary searches (helped by B), while update_refs itself is now a hash lookup (helped by C). Co-authored-by: Claude Opus 4.7 --- .../connection/fetch/update_refs/mod.rs | 88 ++++++++++++++++++- 1 file changed, 85 insertions(+), 3 deletions(-) diff --git a/gix/src/remote/connection/fetch/update_refs/mod.rs b/gix/src/remote/connection/fetch/update_refs/mod.rs index 2494dbe95d2..e6ea42b27fe 100644 --- a/gix/src/remote/connection/fetch/update_refs/mod.rs +++ b/gix/src/remote/connection/fetch/update_refs/mod.rs @@ -1,7 +1,13 @@ #![allow(clippy::result_large_err)] -use std::{collections::BTreeMap, path::PathBuf}; +use std::{ + collections::{BTreeMap, HashMap, HashSet}, + path::PathBuf, +}; -use gix_object::Exists; +use gix_object::{ + Exists, + bstr::{BString, ByteSlice}, +}; use gix_ref::{ Target, TargetRef, transaction::{Change, LogChange, PreviousValue, RefEdit, RefLog}, @@ -77,6 +83,15 @@ pub(crate) fn update( let mut edit_indices_to_validate = Vec::new(); let mut checked_out_branches = worktree_branches(repo)?; + // For wide-refs fetches (e.g. mirror clones with 100k+ branches in + // `packed-refs`) the per-mapping `repo.try_find_reference()` call below + // dominates wall time: each call does one filesystem stat for a loose + // ref plus a binary search over `packed-refs`. The fast path here is + // purely additive — if either snapshot fails to build, fall through to + // the original lookup with unchanged semantics. Loose refs shadow + // packed entries, so the HashMap is only consulted when no loose ref + // exists for that name. + let lookup_fast_path = build_lookup_fast_path(repo); let implicit_tag_refspec = fetch_tags .to_refspec() .filter(|_| matches!(fetch_tags, crate::remote::fetch::Tags::Included)); @@ -113,7 +128,12 @@ pub(crate) fn update( } let (mode, edit_index, type_change) = match local { Some(name) => { - let (mode, reflog_message, name, previous_value) = match repo.try_find_reference(name)? { + let existing = match lookup_fast_path.as_ref().map(|fp| fp.lookup(name.as_bstr())) { + Some(LookupOutcome::Hit(r)) => Some(crate::Reference::from_ref(r, repo)), + Some(LookupOutcome::NotFound) => None, + Some(LookupOutcome::TakeSlowPath) | None => repo.try_find_reference(name)?, + }; + let (mode, reflog_message, name, previous_value) = match existing { Some(existing) => { if let Some(wt_dirs) = checked_out_branches.get_mut(existing.name()) { wt_dirs.sort(); @@ -471,5 +491,67 @@ fn worktree_branches(repo: &Repository) -> Result, + /// Names of loose refs which shadow entries in `packed` and must take the + /// slow path so existing precedence is preserved. + loose_shadows: HashSet, +} + +/// Result of a fast-path lookup. `TakeSlowPath` is distinct from `NotFound`: +/// the former means "we can't answer from the snapshot alone, defer to +/// `repo.try_find_reference()`" (e.g. a loose ref shadows this name), while +/// the latter means "we are certain no ref exists with this name". +enum LookupOutcome { + Hit(gix_ref::Reference), + NotFound, + TakeSlowPath, +} + +impl LookupFastPath { + fn lookup(&self, name: &gix_object::bstr::BStr) -> LookupOutcome { + if self.loose_shadows.contains(name) { + return LookupOutcome::TakeSlowPath; + } + match self.packed.get(name) { + Some(r) => LookupOutcome::Hit(r.clone()), + None => LookupOutcome::NotFound, + } + } +} + +/// Build [`LookupFastPath`] for the repo, returning `None` if either of the +/// inputs (packed-refs snapshot, loose refs enumeration) is unavailable. A +/// `None` return causes the caller to fall back to the unmodified slow path, +/// so the optimization is purely additive. +fn build_lookup_fast_path(repo: &Repository) -> Option { + let buf = repo.refs.cached_packed_buffer().ok().flatten()?; + let iter = buf.iter().ok()?; + let packed: HashMap = iter + .filter_map(Result::ok) + .map(|r| { + let name = r.name.as_bstr().to_owned(); + let r: gix_ref::Reference = r.into(); + (name, r) + }) + .collect(); + let loose_shadows: HashSet = repo + .refs + .loose_iter() + .ok()? + .filter_map(Result::ok) + .map(|r| r.name.as_bstr().to_owned()) + .collect(); + Some(LookupFastPath { + packed, + loose_shadows, + }) +} + #[cfg(test)] mod tests; From 2627db4c17bade4c9b76933ed57f905ca86284ee Mon Sep 17 00:00:00 2001 From: Nikolay Bryskin Date: Sun, 17 May 2026 22:14:22 +0300 Subject: [PATCH 2/3] fmt: collapse struct literal --- gix/src/remote/connection/fetch/update_refs/mod.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/gix/src/remote/connection/fetch/update_refs/mod.rs b/gix/src/remote/connection/fetch/update_refs/mod.rs index e6ea42b27fe..a1ffb36f5d2 100644 --- a/gix/src/remote/connection/fetch/update_refs/mod.rs +++ b/gix/src/remote/connection/fetch/update_refs/mod.rs @@ -547,10 +547,7 @@ fn build_lookup_fast_path(repo: &Repository) -> Option { .filter_map(Result::ok) .map(|r| r.name.as_bstr().to_owned()) .collect(); - Some(LookupFastPath { - packed, - loose_shadows, - }) + Some(LookupFastPath { packed, loose_shadows }) } #[cfg(test)] From 46126e14f4e9bca1c19a9ad6ae744c9b36a59129 Mon Sep 17 00:00:00 2001 From: Nikolay Bryskin Date: Mon, 18 May 2026 01:32:31 +0300 Subject: [PATCH 3/3] gix: bail fetch update_refs fast path on namespace or ref errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The packed-refs fast path keyed entries by their raw on-disk names, which include the namespace prefix when `core.refsNamespace` is set, while lookups use namespace-stripped local names — packed refs would be misclassified as missing. Bail out when a namespace is configured so the slow path applies/strips it correctly. Also stop dropping iterator errors via `filter_map(Result::ok)`. A malformed loose ref shadowing a packed entry would otherwise let the fast path return the packed value instead of surfacing the corruption the slow path raises. --- .../connection/fetch/update_refs/mod.rs | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/gix/src/remote/connection/fetch/update_refs/mod.rs b/gix/src/remote/connection/fetch/update_refs/mod.rs index a1ffb36f5d2..3907e8e2b3c 100644 --- a/gix/src/remote/connection/fetch/update_refs/mod.rs +++ b/gix/src/remote/connection/fetch/update_refs/mod.rs @@ -530,23 +530,26 @@ impl LookupFastPath { /// `None` return causes the caller to fall back to the unmodified slow path, /// so the optimization is purely additive. fn build_lookup_fast_path(repo: &Repository) -> Option { + // `cached_packed_buffer()` returns raw, namespace-prefixed names from + // `packed-refs`, while lookups here use namespace-stripped local names. + // Defer to the slow path, which applies the namespace correctly. + if repo.refs.namespace.is_some() { + return None; + } let buf = repo.refs.cached_packed_buffer().ok().flatten()?; - let iter = buf.iter().ok()?; - let packed: HashMap = iter - .filter_map(Result::ok) - .map(|r| { - let name = r.name.as_bstr().to_owned(); - let r: gix_ref::Reference = r.into(); - (name, r) - }) - .collect(); - let loose_shadows: HashSet = repo - .refs - .loose_iter() - .ok()? - .filter_map(Result::ok) - .map(|r| r.name.as_bstr().to_owned()) - .collect(); + let mut packed = HashMap::new(); + for r in buf.iter().ok()? { + // Bail to the slow path on parse errors so corruption surfaces via + // `repo.try_find_reference()` instead of being silently dropped. + let r = r.ok()?; + let name = r.name.as_bstr().to_owned(); + packed.insert(name, r.into()); + } + let mut loose_shadows = HashSet::new(); + for r in repo.refs.loose_iter().ok()? { + let r = r.ok()?; + loose_shadows.insert(r.name.as_bstr().to_owned()); + } Some(LookupFastPath { packed, loose_shadows }) }