diff --git a/gix-ref/src/store/packed/decode.rs b/gix-ref/src/store/packed/decode.rs index 6ae93b2c97e..ccaffc9bfa8 100644 --- a/gix-ref/src/store/packed/decode.rs +++ b/gix-ref/src/store/packed/decode.rs @@ -96,5 +96,31 @@ pub fn reference<'a>(input: &mut &'a [u8], object_hash: gix_hash::Kind) -> Resul Ok(packed::Reference { name, target, object }) } +/// Extract the name bytes from a packed-refs record without validating the +/// name as a [`FullNameRef`][crate::FullNameRef] or the hash as hex. +/// +/// Used by the binary search in [`super::Buffer::binary_search_by`], where +/// only the name bytes are needed for ordered comparison — the final match +/// is re-parsed through [`reference`] which validates fully. +/// +/// Returns `None` when the record does not have the expected +/// `` shape (including an empty name or input +/// shorter than the hash plus separator). The caller is expected to treat +/// `None` as a parse failure and flip its parse-failure flag so a search +/// miss can be surfaced as [`super::Error::Parse`] rather than silently +/// reported as `Ok(None)`. +pub(crate) fn name_at_record_start(input: &[u8], object_hash: gix_hash::Kind) -> Option<&[u8]> { + let hex_len = object_hash.len_in_hex(); + if input.get(hex_len) != Some(&b' ') { + return None; + } + let after = &input[hex_len + 1..]; + let end = after.iter().position(|&b| b == b'\r' || b == b'\n')?; + if end == 0 { + return None; + } + Some(&after[..end]) +} + #[cfg(test)] mod tests; diff --git a/gix-ref/src/store/packed/decode/tests.rs b/gix-ref/src/store/packed/decode/tests.rs index 92f9770f5a9..deceffc4dbb 100644 --- a/gix-ref/src/store/packed/decode/tests.rs +++ b/gix-ref/src/store/packed/decode/tests.rs @@ -86,6 +86,108 @@ mod reference { } } +mod name_at_record_start { + use crate::store_impl::packed::decode; + + const SHA1: gix_hash::Kind = gix_hash::Kind::Sha1; + const SHA256: gix_hash::Kind = gix_hash::Kind::Sha256; + + #[test] + fn extracts_name_terminated_by_lf() { + let input = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 refs/heads/main\n"; + assert_eq!( + decode::name_at_record_start(input, SHA1), + Some(b"refs/heads/main".as_slice()), + ); + } + + #[test] + fn extracts_name_terminated_by_cr() { + let input = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 refs/heads/main\r\n"; + assert_eq!( + decode::name_at_record_start(input, SHA1), + Some(b"refs/heads/main".as_slice()), + "CR is treated as a name terminator, matching `until_line_end_without_separator`", + ); + } + + #[test] + fn does_not_validate_name_or_hash_contents() { + let input = b"ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ not!a!valid!refname\n"; + assert_eq!( + decode::name_at_record_start(input, SHA1), + Some(b"not!a!valid!refname".as_slice()), + "the binary-search comparator only needs name bytes; validation happens at the match site", + ); + } + + #[test] + fn rejects_missing_space_after_hash() { + let input = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75-refs/heads/main\n"; + assert_eq!( + decode::name_at_record_start(input, SHA1), + None, + "lines that do not have the expected shape are reported as a parse failure", + ); + } + + #[test] + fn rejects_truncated_input_without_newline() { + let input = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 refs/heads/main"; + assert_eq!( + decode::name_at_record_start(input, SHA1), + None, + "missing line terminator means we can't bound the name", + ); + } + + #[test] + fn rejects_empty_name() { + let input = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 \n"; + assert_eq!( + decode::name_at_record_start(input, SHA1), + None, + "an empty name is treated as a shape mismatch so the slow path can surface it as a parse error", + ); + } + + #[test] + fn rejects_input_shorter_than_hash() { + assert_eq!(decode::name_at_record_start(b"", SHA1), None); + assert_eq!(decode::name_at_record_start(b"short", SHA1), None); + assert_eq!( + decode::name_at_record_start(b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75", SHA1), + None, + "input must be at least to be considered", + ); + } + + #[test] + fn sha256_record() { + let input = b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa refs/heads/main\n"; + assert_eq!( + decode::name_at_record_start(input, SHA256), + Some(b"refs/heads/main".as_slice()), + ); + assert_eq!( + decode::name_at_record_start(input, SHA1), + None, + "with a SHA1 hash kind the SHA256-sized prefix is not followed by a space at the expected offset", + ); + } + + #[test] + fn ignores_trailing_peeled_line() { + let input = + b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 refs/tags/v1\n^e9cdc958e7ce2290e2d7958cdb5aa9323ef35d37\n"; + assert_eq!( + decode::name_at_record_start(input, SHA1), + Some(b"refs/tags/v1".as_slice()), + "only the first line is considered; the peeled line is parsed by the match-site decoder", + ); + } +} + mod header { use gix_object::bstr::ByteSlice; diff --git a/gix-ref/src/store/packed/find.rs b/gix-ref/src/store/packed/find.rs index 915e725029a..2cd94f270d8 100644 --- a/gix-ref/src/store/packed/find.rs +++ b/gix-ref/src/store/packed/find.rs @@ -89,13 +89,20 @@ impl packed::Buffer { let mut encountered_parse_failure = false; a.binary_search_by_key(&full_name.as_ref(), |b: &u8| { let ofs = std::ptr::from_ref::(b) as usize - a.as_ptr() as usize; - let mut line = &a[search_start_of_record(ofs)..]; - packed::decode::reference(&mut line, self.object_hash) - .map(|r| r.name.as_bstr().as_bytes()) - .inspect_err(|_err| { + let line = &a[search_start_of_record(ofs)..]; + // The binary search only needs the name bytes for ordered + // comparison; skip ref-name and hex-hash validation here and let + // the final match site re-parse the record via `decode::reference` + // (which validates fully). This saves the `log₂(n)` per-query + // `gix_validate::reference::name` calls the previous comparator + // ran, which dominate fetch CPU on wide-refs mirrors. + match packed::decode::name_at_record_start(line, self.object_hash) { + Some(name) => name, + None => { encountered_parse_failure = true; - }) - .unwrap_or(&[]) + &[] + } + } }) .map(search_start_of_record) .map_err(|pos| (encountered_parse_failure, search_start_of_record(pos)))