GitoxideLabs · _WD_ (0WD0) · May 22, 2026
diff --git a/gix-pack/src/data/entry/decode.rs b/gix-pack/src/data/entry/decode.rs
@@ -58,6 +58,7 @@ impl data::Entry {
             header: object,
             decompressed_size: size,
             data_offset: pack_offset + consumed as u64,
+            encoded_header_size: consumed.try_into().expect("pack entry headers fit into u16"),
         })
     }
 
@@ -96,6 +97,7 @@ impl data::Entry {
             header: object,
             decompressed_size: size,
             data_offset: pack_offset + consumed as u64,
+            encoded_header_size: consumed.try_into().expect("pack entry headers fit into u16"),
         })
     }
 }
@@ -121,12 +123,6 @@ fn streaming_parse_header_info(read: &mut dyn io::Read) -> Result<(u8, u64, usiz
             .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "pack entry header overflowed"))?;
         shift += 7;
     }
-    if i != encoded_pack_entry_header_size(size) {
-        return Err(io::Error::new(
-            io::ErrorKind::InvalidData,
-            "pack entry header uses a non-canonical size encoding",
-        ));
-    }
     Ok((type_id, size, i))
 }
 
@@ -149,11 +145,6 @@ fn parse_header_info(data: &[u8]) -> Result<(u8, u64, usize), Error> {
         size = size.checked_add(component).ok_or(Error::Overflow)?;
         shift += 7;
     }
-    if i != encoded_pack_entry_header_size(size) {
-        return Err(Error::Corrupt {
-            message: "pack entry header uses a non-canonical size encoding",
-        });
-    }
     Ok((type_id, size, i))
 }
 
@@ -178,35 +169,45 @@ fn parse_leb64(data: &[u8]) -> Result<(u64, usize), Error> {
     Ok((value, i))
 }
 
-/// Return the canonical byte length of a pack-entry size header for `size`.
-///
-/// We use this to reject overlong size encodings during parsing.
-/// That matters for our delta resolution implementation, which later reconstructs an entry's
-/// pack offset from `data_offset - header_size()`. If we accepted non-canonical encodings here,
-/// `header_size()` would compute the canonical length while `data_offset` would reflect the
-/// actually consumed bytes, breaking that invariant and allowing malformed delta entries to point
-/// back to themselves or otherwise walk the wrong base objects.
-fn encoded_pack_entry_header_size(mut size: u64) -> usize {
-    let mut bytes = 1;
-    size >>= 4;
-    while size != 0 {
-        bytes += 1;
-        size >>= 7;
-    }
-    bytes
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
 
     #[test]
-    fn rejects_non_canonical_pack_entry_header_encoding() {
-        assert!(matches!(
-            data::Entry::from_bytes(&[0xed, 0x00], 0, gix_hash::Kind::Sha1.len_in_bytes()),
-            Err(Error::Corrupt {
-                message: "pack entry header uses a non-canonical size encoding"
-            })
-        ));
+    fn accepts_non_canonical_pack_entry_header_encoding() {
+        let pack_offset = 42;
+        let entry = data::Entry::from_bytes(&[0xb3, 0x00], pack_offset, gix_hash::Kind::Sha1.len_in_bytes())
+            .expect("non-canonical size encodings are accepted by git");
+
+        assert_eq!(entry.header, data::entry::Header::Blob);
+        assert_eq!(entry.decompressed_size, 3);
+        assert_eq!(entry.header_size(), 2);
+        assert_eq!(entry.pack_offset(), pack_offset);
+        assert_eq!(entry.data_offset, pack_offset + 2);
+    }
+
+    #[test]
+    fn non_canonical_pack_entry_header_keeps_ofs_delta_base_offsets_correct() {
+        let pack_offset = 100;
+        let base_distance = 5;
+        let entry = data::Entry::from_bytes(
+            &[0xe4, 0x00, base_distance],
+            pack_offset,
+            gix_hash::Kind::Sha1.len_in_bytes(),
+        )
+        .expect("non-canonical ofs-delta size encodings are accepted by git");
+
+        assert_eq!(
+            entry.header,
+            data::entry::Header::OfsDelta {
+                base_distance: base_distance.into()
+            }
+        );
+        assert_eq!(entry.header_size(), 3);
+        assert_eq!(entry.pack_offset(), pack_offset);
+        assert_eq!(
+            entry.checked_base_pack_offset(base_distance.into()),
+            Some(pack_offset - u64::from(base_distance))
+        );
     }
 }
diff --git a/gix-pack/src/data/entry/mod.rs b/gix-pack/src/data/entry/mod.rs
@@ -33,8 +33,7 @@ impl Entry {
     /// Compute the pack offset to the base entry of the object represented by this entry, or
     /// return `None` if the distance would underflow or is invalid.
     pub fn checked_base_pack_offset(&self, distance: u64) -> Option<data::Offset> {
-        let pack_offset = self.data_offset - self.header_size() as u64;
-        Header::verified_base_pack_offset(pack_offset, distance)
+        Header::verified_base_pack_offset(self.pack_offset(), distance)
     }
 
     /// Compute the pack offset to the base entry of the object represented by this entry.
@@ -52,7 +51,11 @@ impl Entry {
     }
     /// The amount of bytes used to describe this entry in the pack. The header starts at [`Self::pack_offset()`]
     pub fn header_size(&self) -> usize {
-        self.header.size(self.decompressed_size)
+        if self.encoded_header_size == 0 {
+            self.header.size(self.decompressed_size)
+        } else {
+            self.encoded_header_size.into()
+        }
     }
 }
 

diff --git a/gix-pack/src/data/mod.rs b/gix-pack/src/data/mod.rs
@@ -20,6 +20,16 @@ pub struct Entry {
     pub decompressed_size: u64,
     /// absolute offset to compressed object data in the pack, just behind the entry's header
     pub data_offset: Offset,
+    /// The amount of bytes used to encode the entry header in the pack.
+    ///
+    /// Git accepts non-canonical size encodings in pack entry headers, so this records the actual
+    /// byte length instead of recomputing the canonical length from the decoded size.
+    ///
+    /// A value of `0` means the actual length is unknown, causing [`Entry::header_size()`] to
+    /// recompute the canonical length from the decoded size. This is useful for deserializing data
+    /// produced by older versions.
+    #[cfg_attr(feature = "serde", serde(default))]
+    pub encoded_header_size: u16,
 }
 
 mod file;

diff --git a/gix-pack/tests/pack/data/fuzzed.rs b/gix-pack/tests/pack/data/fuzzed.rs
@@ -55,6 +55,48 @@ fn oversized_pack_entry_header_is_reported_without_panicking() {
     );
 }
 
+#[test]
+fn non_canonical_pack_entry_header_is_accepted() {
+    fn deflate(bytes: &[u8]) -> Vec<u8> {
+        let mut out = gix_features::zlib::stream::deflate::Write::new(Vec::new());
+        out.write_all(bytes).expect("writing to deflater succeeds");
+        out.flush().expect("flushing deflater succeeds");
+        out.into_inner()
+    }
+
+    let mut bytes = data::header::encode(data::Version::V2, 1).to_vec();
+    bytes.extend_from_slice(&[0xb3, 0x00]);
+    bytes.extend_from_slice(&deflate(b"20\n"));
+    bytes.extend_from_slice(&[0; 20]);
+
+    let file = data::File::from_data(
+        bytes.as_slice(),
+        PathBuf::from("non-canonical-header.pack"),
+        gix_hash::Kind::Sha1,
+    )
+    .expect("pack header is syntactically valid");
+    let entry = file
+        .entry(12)
+        .expect("git-compatible non-canonical entry header is accepted");
+    assert_eq!(entry.header_size(), 2, "the actual header size is retained");
+    assert_eq!(entry.pack_offset(), 12, "the entry start remains recoverable");
+
+    let mut out = Vec::new();
+    let outcome = file
+        .decode_entry(
+            entry,
+            &mut out,
+            &mut Default::default(),
+            &|_, _| None,
+            &mut gix_pack::cache::Never,
+        )
+        .expect("non-canonical entry decodes like a canonical git object");
+
+    assert_eq!(out, b"20\n");
+    assert_eq!(outcome.kind, gix_object::Kind::Blob);
+    assert_eq!(outcome.object_size, 3);
+}
+
 /// Reproducer for the large-allocation fuzz case: attacker-controlled object sizes must not cause
 /// `decode_entry()` to attempt multi-gigabyte allocations.
 #[test]