Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 37 additions & 36 deletions gix-pack/src/data/entry/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ impl data::Entry {
header: object,
decompressed_size: size,
data_offset: pack_offset + consumed as u64,
encoded_header_size: consumed.try_into().expect("pack entry headers fit into u16"),
})
}

Expand Down Expand Up @@ -96,6 +97,7 @@ impl data::Entry {
header: object,
decompressed_size: size,
data_offset: pack_offset + consumed as u64,
encoded_header_size: consumed.try_into().expect("pack entry headers fit into u16"),
})
}
}
Expand All @@ -121,12 +123,6 @@ fn streaming_parse_header_info(read: &mut dyn io::Read) -> Result<(u8, u64, usiz
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "pack entry header overflowed"))?;
shift += 7;
}
if i != encoded_pack_entry_header_size(size) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"pack entry header uses a non-canonical size encoding",
));
}
Ok((type_id, size, i))
}

Expand All @@ -149,11 +145,6 @@ fn parse_header_info(data: &[u8]) -> Result<(u8, u64, usize), Error> {
size = size.checked_add(component).ok_or(Error::Overflow)?;
shift += 7;
}
if i != encoded_pack_entry_header_size(size) {
return Err(Error::Corrupt {
message: "pack entry header uses a non-canonical size encoding",
});
}
Ok((type_id, size, i))
}

Expand All @@ -178,35 +169,45 @@ fn parse_leb64(data: &[u8]) -> Result<(u64, usize), Error> {
Ok((value, i))
}

/// Return the canonical byte length of a pack-entry size header for `size`.
///
/// We use this to reject overlong size encodings during parsing.
/// That matters for our delta resolution implementation, which later reconstructs an entry's
/// pack offset from `data_offset - header_size()`. If we accepted non-canonical encodings here,
/// `header_size()` would compute the canonical length while `data_offset` would reflect the
/// actually consumed bytes, breaking that invariant and allowing malformed delta entries to point
/// back to themselves or otherwise walk the wrong base objects.
fn encoded_pack_entry_header_size(mut size: u64) -> usize {
let mut bytes = 1;
size >>= 4;
while size != 0 {
bytes += 1;
size >>= 7;
}
bytes
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn rejects_non_canonical_pack_entry_header_encoding() {
assert!(matches!(
data::Entry::from_bytes(&[0xed, 0x00], 0, gix_hash::Kind::Sha1.len_in_bytes()),
Err(Error::Corrupt {
message: "pack entry header uses a non-canonical size encoding"
})
));
fn accepts_non_canonical_pack_entry_header_encoding() {
let pack_offset = 42;
let entry = data::Entry::from_bytes(&[0xb3, 0x00], pack_offset, gix_hash::Kind::Sha1.len_in_bytes())
.expect("non-canonical size encodings are accepted by git");

assert_eq!(entry.header, data::entry::Header::Blob);
assert_eq!(entry.decompressed_size, 3);
assert_eq!(entry.header_size(), 2);
assert_eq!(entry.pack_offset(), pack_offset);
assert_eq!(entry.data_offset, pack_offset + 2);
}

#[test]
fn non_canonical_pack_entry_header_keeps_ofs_delta_base_offsets_correct() {
let pack_offset = 100;
let base_distance = 5;
let entry = data::Entry::from_bytes(
&[0xe4, 0x00, base_distance],
pack_offset,
gix_hash::Kind::Sha1.len_in_bytes(),
)
.expect("non-canonical ofs-delta size encodings are accepted by git");

assert_eq!(
entry.header,
data::entry::Header::OfsDelta {
base_distance: base_distance.into()
}
);
assert_eq!(entry.header_size(), 3);
assert_eq!(entry.pack_offset(), pack_offset);
assert_eq!(
entry.checked_base_pack_offset(base_distance.into()),
Some(pack_offset - u64::from(base_distance))
);
}
}
9 changes: 6 additions & 3 deletions gix-pack/src/data/entry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ impl Entry {
/// Compute the pack offset to the base entry of the object represented by this entry, or
/// return `None` if the distance would underflow or is invalid.
pub fn checked_base_pack_offset(&self, distance: u64) -> Option<data::Offset> {
let pack_offset = self.data_offset - self.header_size() as u64;
Header::verified_base_pack_offset(pack_offset, distance)
Header::verified_base_pack_offset(self.pack_offset(), distance)
}

/// Compute the pack offset to the base entry of the object represented by this entry.
Expand All @@ -52,7 +51,11 @@ impl Entry {
}
/// The amount of bytes used to describe this entry in the pack. The header starts at [`Self::pack_offset()`]
pub fn header_size(&self) -> usize {
self.header.size(self.decompressed_size)
if self.encoded_header_size == 0 {
self.header.size(self.decompressed_size)
} else {
self.encoded_header_size.into()
}
}
Comment thread
0WD0 marked this conversation as resolved.
}

Expand Down
10 changes: 10 additions & 0 deletions gix-pack/src/data/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,16 @@ pub struct Entry {
pub decompressed_size: u64,
/// absolute offset to compressed object data in the pack, just behind the entry's header
pub data_offset: Offset,
/// The amount of bytes used to encode the entry header in the pack.
///
/// Git accepts non-canonical size encodings in pack entry headers, so this records the actual
/// byte length instead of recomputing the canonical length from the decoded size.
///
/// A value of `0` means the actual length is unknown, causing [`Entry::header_size()`] to
/// recompute the canonical length from the decoded size. This is useful for deserializing data
/// produced by older versions.
#[cfg_attr(feature = "serde", serde(default))]
pub encoded_header_size: u16,
}

mod file;
Expand Down
42 changes: 42 additions & 0 deletions gix-pack/tests/pack/data/fuzzed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,48 @@ fn oversized_pack_entry_header_is_reported_without_panicking() {
);
}

#[test]
fn non_canonical_pack_entry_header_is_accepted() {
fn deflate(bytes: &[u8]) -> Vec<u8> {
let mut out = gix_features::zlib::stream::deflate::Write::new(Vec::new());
out.write_all(bytes).expect("writing to deflater succeeds");
out.flush().expect("flushing deflater succeeds");
out.into_inner()
}

let mut bytes = data::header::encode(data::Version::V2, 1).to_vec();
bytes.extend_from_slice(&[0xb3, 0x00]);
bytes.extend_from_slice(&deflate(b"20\n"));
bytes.extend_from_slice(&[0; 20]);

let file = data::File::from_data(
bytes.as_slice(),
PathBuf::from("non-canonical-header.pack"),
gix_hash::Kind::Sha1,
)
.expect("pack header is syntactically valid");
let entry = file
.entry(12)
.expect("git-compatible non-canonical entry header is accepted");
assert_eq!(entry.header_size(), 2, "the actual header size is retained");
assert_eq!(entry.pack_offset(), 12, "the entry start remains recoverable");

let mut out = Vec::new();
let outcome = file
.decode_entry(
entry,
&mut out,
&mut Default::default(),
&|_, _| None,
&mut gix_pack::cache::Never,
)
.expect("non-canonical entry decodes like a canonical git object");

assert_eq!(out, b"20\n");
assert_eq!(outcome.kind, gix_object::Kind::Blob);
assert_eq!(outcome.object_size, 3);
}

/// Reproducer for the large-allocation fuzz case: attacker-controlled object sizes must not cause
/// `decode_entry()` to attempt multi-gigabyte allocations.
#[test]
Expand Down
Loading