From a8f747457e3c1d4e9507a24581e23efc5386ccfa Mon Sep 17 00:00:00 2001 From: Nikolay Bryskin Date: Sun, 17 May 2026 18:25:02 +0300 Subject: [PATCH 1/3] gix-ref: skip name validation in packed-refs binary search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `packed::Buffer::binary_search_by` calls `decode::reference` on every comparison step (~17 per lookup on a 154k-ref store), and each decode runs `gix_validate::reference::name` -> `gix_validate::tag::name_inner` plus full hex validation on the hash. The binary search only needs the name bytes for ordered comparison; the final match in `try_find_full_name` already re-parses the record through `decode::reference` and surfaces any parse error. Introduce `decode::name_at_record_start`, which scans for the `` shape and returns just the name slice without validating either piece, and use it from the binary search. Parse-failure detection is preserved via the same flag the previous implementation set. On a wide-refs incremental fetch (gitaur AUR mirror, 154k branches) this is the single largest CPU sink — `gix_validate::tag::name_inner` shows up at ~14% of total fetch CPU in `samply`, all of it from this loop. End-to-end on the same fetch the change brings wall time from ~11.0s to ~7.7s (~30%) and user CPU from ~8.1s to ~5.1s (~37%). Co-authored-by: Claude Opus 4.7 --- gix-ref/src/store/packed/decode.rs | 21 +++++++++++++++++++++ gix-ref/src/store/packed/find.rs | 18 ++++++++++++------ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/gix-ref/src/store/packed/decode.rs b/gix-ref/src/store/packed/decode.rs index 6ae93b2c97e..87ebc6b09a2 100644 --- a/gix-ref/src/store/packed/decode.rs +++ b/gix-ref/src/store/packed/decode.rs @@ -96,5 +96,26 @@ pub fn reference<'a>(input: &mut &'a [u8], object_hash: gix_hash::Kind) -> Resul Ok(packed::Reference { name, target, object }) } +/// Extract the name bytes from a packed-refs record without validating the +/// name as a [`FullNameRef`][crate::FullNameRef] or the hash as hex. +/// +/// Used by the binary search in [`super::Buffer::binary_search_by`], where +/// only the name bytes are needed for ordered comparison — the final match +/// is re-parsed through [`reference`] which validates fully. Returns `None` +/// when the record does not have the expected `` +/// shape, which the caller can treat as a parse failure. +pub(crate) fn name_at_record_start(input: &[u8], object_hash: gix_hash::Kind) -> Option<&[u8]> { + let hex_len = object_hash.len_in_hex(); + if input.get(hex_len) != Some(&b' ') { + return None; + } + let after = &input[hex_len + 1..]; + let end = after.iter().position(|&b| b == b'\r' || b == b'\n')?; + if end == 0 { + return None; + } + Some(&after[..end]) +} + #[cfg(test)] mod tests; diff --git a/gix-ref/src/store/packed/find.rs b/gix-ref/src/store/packed/find.rs index 915e725029a..eaf7bab2cfc 100644 --- a/gix-ref/src/store/packed/find.rs +++ b/gix-ref/src/store/packed/find.rs @@ -89,13 +89,19 @@ impl packed::Buffer { let mut encountered_parse_failure = false; a.binary_search_by_key(&full_name.as_ref(), |b: &u8| { let ofs = std::ptr::from_ref::(b) as usize - a.as_ptr() as usize; - let mut line = &a[search_start_of_record(ofs)..]; - packed::decode::reference(&mut line, self.object_hash) - .map(|r| r.name.as_bstr().as_bytes()) - .inspect_err(|_err| { + let line = &a[search_start_of_record(ofs)..]; + // The binary search only needs the name bytes for ordered + // comparison; skip ref-name and hex-hash validation here and let + // the final match site re-parse the record via `decode::reference` + // (which validates fully). On a 154k-ref packed-refs this saves + // ~17× redundant `gix_validate::reference::name` calls per query. + match packed::decode::name_at_record_start(line, self.object_hash) { + Some(name) => name, + None => { encountered_parse_failure = true; - }) - .unwrap_or(&[]) + &[] + } + } }) .map(search_start_of_record) .map_err(|pos| (encountered_parse_failure, search_start_of_record(pos))) From 0cf898015a32777f3f2f7d494aa4e242cbb22055 Mon Sep 17 00:00:00 2001 From: Nikolay Bryskin Date: Mon, 18 May 2026 12:02:33 +0300 Subject: [PATCH 2/3] gix-ref: unit tests for packed::decode::name_at_record_start Cover the comparator's contract directly: valid records terminated by `\n` or `\r`, the empty-name and missing-space rejections, truncated input, inputs shorter than the hash, SHA-256 records, and the "only the first line" rule that lets the match-site decoder handle the optional peeled `^` follow-up. The new helper sits inside the hot binary-search loop, so pinning its shape contract independently of `decode::reference` guards against accidental drift if either is touched later. Co-authored-by: Claude Opus 4.7 --- gix-ref/src/store/packed/decode/tests.rs | 102 +++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/gix-ref/src/store/packed/decode/tests.rs b/gix-ref/src/store/packed/decode/tests.rs index 92f9770f5a9..deceffc4dbb 100644 --- a/gix-ref/src/store/packed/decode/tests.rs +++ b/gix-ref/src/store/packed/decode/tests.rs @@ -86,6 +86,108 @@ mod reference { } } +mod name_at_record_start { + use crate::store_impl::packed::decode; + + const SHA1: gix_hash::Kind = gix_hash::Kind::Sha1; + const SHA256: gix_hash::Kind = gix_hash::Kind::Sha256; + + #[test] + fn extracts_name_terminated_by_lf() { + let input = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 refs/heads/main\n"; + assert_eq!( + decode::name_at_record_start(input, SHA1), + Some(b"refs/heads/main".as_slice()), + ); + } + + #[test] + fn extracts_name_terminated_by_cr() { + let input = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 refs/heads/main\r\n"; + assert_eq!( + decode::name_at_record_start(input, SHA1), + Some(b"refs/heads/main".as_slice()), + "CR is treated as a name terminator, matching `until_line_end_without_separator`", + ); + } + + #[test] + fn does_not_validate_name_or_hash_contents() { + let input = b"ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ not!a!valid!refname\n"; + assert_eq!( + decode::name_at_record_start(input, SHA1), + Some(b"not!a!valid!refname".as_slice()), + "the binary-search comparator only needs name bytes; validation happens at the match site", + ); + } + + #[test] + fn rejects_missing_space_after_hash() { + let input = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75-refs/heads/main\n"; + assert_eq!( + decode::name_at_record_start(input, SHA1), + None, + "lines that do not have the expected shape are reported as a parse failure", + ); + } + + #[test] + fn rejects_truncated_input_without_newline() { + let input = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 refs/heads/main"; + assert_eq!( + decode::name_at_record_start(input, SHA1), + None, + "missing line terminator means we can't bound the name", + ); + } + + #[test] + fn rejects_empty_name() { + let input = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 \n"; + assert_eq!( + decode::name_at_record_start(input, SHA1), + None, + "an empty name is treated as a shape mismatch so the slow path can surface it as a parse error", + ); + } + + #[test] + fn rejects_input_shorter_than_hash() { + assert_eq!(decode::name_at_record_start(b"", SHA1), None); + assert_eq!(decode::name_at_record_start(b"short", SHA1), None); + assert_eq!( + decode::name_at_record_start(b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75", SHA1), + None, + "input must be at least to be considered", + ); + } + + #[test] + fn sha256_record() { + let input = b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa refs/heads/main\n"; + assert_eq!( + decode::name_at_record_start(input, SHA256), + Some(b"refs/heads/main".as_slice()), + ); + assert_eq!( + decode::name_at_record_start(input, SHA1), + None, + "with a SHA1 hash kind the SHA256-sized prefix is not followed by a space at the expected offset", + ); + } + + #[test] + fn ignores_trailing_peeled_line() { + let input = + b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 refs/tags/v1\n^e9cdc958e7ce2290e2d7958cdb5aa9323ef35d37\n"; + assert_eq!( + decode::name_at_record_start(input, SHA1), + Some(b"refs/tags/v1".as_slice()), + "only the first line is considered; the peeled line is parsed by the match-site decoder", + ); + } +} + mod header { use gix_object::bstr::ByteSlice; From 9b86d7cdd7c097fcab5190dcb3ebfd92ec963e0f Mon Sep 17 00:00:00 2001 From: Nikolay Bryskin Date: Mon, 18 May 2026 14:20:14 +0300 Subject: [PATCH 3/3] gix-ref: clarify comparator and helper docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two small polish changes to the packed-refs binary-search hot path: - Reword the comment in `binary_search_by` to talk about `log₂(n)` redundant `gix_validate::reference::name` calls rather than the concrete "~17×" figure tied to a 154k-ref store, so the comment ages with the codebase rather than with a specific repo's ref count. - Expand the doc on `decode::name_at_record_start` to make the parse-failure contract explicit: returning `None` is the signal for the caller to flip its parse-failure flag, which is what lets a search miss against a corrupt packed-refs surface as `Error::Parse` instead of `Ok(None)`. Co-authored-by: Claude Opus 4.7 --- gix-ref/src/store/packed/decode.rs | 11 ++++++++--- gix-ref/src/store/packed/find.rs | 5 +++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/gix-ref/src/store/packed/decode.rs b/gix-ref/src/store/packed/decode.rs index 87ebc6b09a2..ccaffc9bfa8 100644 --- a/gix-ref/src/store/packed/decode.rs +++ b/gix-ref/src/store/packed/decode.rs @@ -101,9 +101,14 @@ pub fn reference<'a>(input: &mut &'a [u8], object_hash: gix_hash::Kind) -> Resul /// /// Used by the binary search in [`super::Buffer::binary_search_by`], where /// only the name bytes are needed for ordered comparison — the final match -/// is re-parsed through [`reference`] which validates fully. Returns `None` -/// when the record does not have the expected `` -/// shape, which the caller can treat as a parse failure. +/// is re-parsed through [`reference`] which validates fully. +/// +/// Returns `None` when the record does not have the expected +/// `` shape (including an empty name or input +/// shorter than the hash plus separator). The caller is expected to treat +/// `None` as a parse failure and flip its parse-failure flag so a search +/// miss can be surfaced as [`super::Error::Parse`] rather than silently +/// reported as `Ok(None)`. pub(crate) fn name_at_record_start(input: &[u8], object_hash: gix_hash::Kind) -> Option<&[u8]> { let hex_len = object_hash.len_in_hex(); if input.get(hex_len) != Some(&b' ') { diff --git a/gix-ref/src/store/packed/find.rs b/gix-ref/src/store/packed/find.rs index eaf7bab2cfc..2cd94f270d8 100644 --- a/gix-ref/src/store/packed/find.rs +++ b/gix-ref/src/store/packed/find.rs @@ -93,8 +93,9 @@ impl packed::Buffer { // The binary search only needs the name bytes for ordered // comparison; skip ref-name and hex-hash validation here and let // the final match site re-parse the record via `decode::reference` - // (which validates fully). On a 154k-ref packed-refs this saves - // ~17× redundant `gix_validate::reference::name` calls per query. + // (which validates fully). This saves the `log₂(n)` per-query + // `gix_validate::reference::name` calls the previous comparator + // ran, which dominate fetch CPU on wide-refs mirrors. match packed::decode::name_at_record_start(line, self.object_hash) { Some(name) => name, None => {