Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions gix-ref/src/store/packed/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,5 +96,31 @@ pub fn reference<'a>(input: &mut &'a [u8], object_hash: gix_hash::Kind) -> Resul
Ok(packed::Reference { name, target, object })
}

/// Extract the name bytes from a packed-refs record without validating the
/// name as a [`FullNameRef`][crate::FullNameRef] or the hash as hex.
///
/// Used by the binary search in [`super::Buffer::binary_search_by`], where
/// only the name bytes are needed for ordered comparison — the final match
/// is re-parsed through [`reference`] which validates fully.
///
/// Returns `None` when the record does not have the expected
/// `<hash><space><name><newline>` shape (including an empty name or input
/// shorter than the hash plus separator). The caller is expected to treat
/// `None` as a parse failure and flip its parse-failure flag so a search
/// miss can be surfaced as [`super::Error::Parse`] rather than silently
/// reported as `Ok(None)`.
pub(crate) fn name_at_record_start(input: &[u8], object_hash: gix_hash::Kind) -> Option<&[u8]> {
let hex_len = object_hash.len_in_hex();
if input.get(hex_len) != Some(&b' ') {
return None;
}
let after = &input[hex_len + 1..];
let end = after.iter().position(|&b| b == b'\r' || b == b'\n')?;
if end == 0 {
return None;
}
Some(&after[..end])
}

#[cfg(test)]
mod tests;
102 changes: 102 additions & 0 deletions gix-ref/src/store/packed/decode/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,108 @@ mod reference {
}
}

mod name_at_record_start {
use crate::store_impl::packed::decode;

const SHA1: gix_hash::Kind = gix_hash::Kind::Sha1;
const SHA256: gix_hash::Kind = gix_hash::Kind::Sha256;

#[test]
fn extracts_name_terminated_by_lf() {
let input = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 refs/heads/main\n";
assert_eq!(
decode::name_at_record_start(input, SHA1),
Some(b"refs/heads/main".as_slice()),
);
}

#[test]
fn extracts_name_terminated_by_cr() {
let input = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 refs/heads/main\r\n";
assert_eq!(
decode::name_at_record_start(input, SHA1),
Some(b"refs/heads/main".as_slice()),
"CR is treated as a name terminator, matching `until_line_end_without_separator`",
);
}

#[test]
fn does_not_validate_name_or_hash_contents() {
let input = b"ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ not!a!valid!refname\n";
assert_eq!(
decode::name_at_record_start(input, SHA1),
Some(b"not!a!valid!refname".as_slice()),
"the binary-search comparator only needs name bytes; validation happens at the match site",
);
}

#[test]
fn rejects_missing_space_after_hash() {
let input = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75-refs/heads/main\n";
assert_eq!(
decode::name_at_record_start(input, SHA1),
None,
"lines that do not have the expected shape are reported as a parse failure",
);
}

#[test]
fn rejects_truncated_input_without_newline() {
let input = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 refs/heads/main";
assert_eq!(
decode::name_at_record_start(input, SHA1),
None,
"missing line terminator means we can't bound the name",
);
}

#[test]
fn rejects_empty_name() {
let input = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 \n";
assert_eq!(
decode::name_at_record_start(input, SHA1),
None,
"an empty name is treated as a shape mismatch so the slow path can surface it as a parse error",
);
}

#[test]
fn rejects_input_shorter_than_hash() {
assert_eq!(decode::name_at_record_start(b"", SHA1), None);
assert_eq!(decode::name_at_record_start(b"short", SHA1), None);
assert_eq!(
decode::name_at_record_start(b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75", SHA1),
None,
"input must be at least <hash><space> to be considered",
);
}

#[test]
fn sha256_record() {
let input = b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa refs/heads/main\n";
assert_eq!(
decode::name_at_record_start(input, SHA256),
Some(b"refs/heads/main".as_slice()),
);
assert_eq!(
decode::name_at_record_start(input, SHA1),
None,
"with a SHA1 hash kind the SHA256-sized prefix is not followed by a space at the expected offset",
);
}

#[test]
fn ignores_trailing_peeled_line() {
let input =
b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 refs/tags/v1\n^e9cdc958e7ce2290e2d7958cdb5aa9323ef35d37\n";
assert_eq!(
decode::name_at_record_start(input, SHA1),
Some(b"refs/tags/v1".as_slice()),
"only the first line is considered; the peeled line is parsed by the match-site decoder",
);
}
}

mod header {
use gix_object::bstr::ByteSlice;

Expand Down
19 changes: 13 additions & 6 deletions gix-ref/src/store/packed/find.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,20 @@ impl packed::Buffer {
let mut encountered_parse_failure = false;
a.binary_search_by_key(&full_name.as_ref(), |b: &u8| {
let ofs = std::ptr::from_ref::<u8>(b) as usize - a.as_ptr() as usize;
let mut line = &a[search_start_of_record(ofs)..];
packed::decode::reference(&mut line, self.object_hash)
.map(|r| r.name.as_bstr().as_bytes())
.inspect_err(|_err| {
let line = &a[search_start_of_record(ofs)..];
// The binary search only needs the name bytes for ordered
// comparison; skip ref-name and hex-hash validation here and let
// the final match site re-parse the record via `decode::reference`
// (which validates fully). This saves the `log₂(n)` per-query
// `gix_validate::reference::name` calls the previous comparator
// ran, which dominate fetch CPU on wide-refs mirrors.
match packed::decode::name_at_record_start(line, self.object_hash) {
Some(name) => name,
None => {
encountered_parse_failure = true;
})
.unwrap_or(&[])
&[]
Comment on lines +99 to +103
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Surface malformed packed-refs records as parse failures

This match only treats shape mismatches as parse failures, so records with the right <hash><space><name><newline> layout but invalid hash/refname content are now considered comparable and no longer flip encountered_parse_failure. In a sorted packed-refs file containing such a malformed line (that is not the exact lookup target), try_find_full_name can return Ok(None) instead of Error::Parse, which hides on-disk corruption that the previous decode::reference-based comparison reported whenever the binary search touched that record.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pre-PR detection here was already opportunistic, not a guarantee: encountered_parse_failure only flipped for records the binary search happened to touch (~O(log n) per query), so corruption in untouched lines was never reported by try_find_full_name either. Skipping the full gix_validate::reference::name call on those comparisons is the whole point of the change.

Full validation still runs in the paths that matter:

  • on a match, try_find_full_name re-parses through decode::reference and returns Err(Parse) on bad content
  • full iteration (buf.iter(), used by loose_iter / iter_packed) validates every record

Leaving as-is.

}
}
})
.map(search_start_of_record)
.map_err(|pos| (encountered_parse_failure, search_start_of_record(pos)))
Expand Down
Loading