microsoft · mcroomp · Jan 2, 2025 · Nov 25, 2024 · Jan 2, 2025 · Jan 5, 2025
diff --git a/lib/src/helpers.rs b/lib/src/helpers.rs
@@ -61,20 +61,20 @@ pub fn has_ff(v: u64) -> bool {
 }
 
 #[inline(always)]
-pub const fn devli(s: u8, value: u16) -> i16 {
-    let shifted = 1 << s;
+pub const fn devli(s: u32, value: u32) -> i16 {
+    let shifted = (1 << s) - 1;
 
-    if value & (shifted >> 1) != 0 {
+    if (value << 1) > shifted {
         value as i16
     } else {
-        value.wrapping_add(2).wrapping_add(!shifted) as i16
+        value.wrapping_sub(shifted) as i16
     }
 }
 
 /// check to make sure the behavior hasn't changed even with the optimization
 #[test]
 fn devli_test() {
-    for s in 0u8..15 {
+    for s in 0u32..15 {
         for value in 0..(1 << s) {
             assert_eq!(
                 devli(s, value),

diff --git a/lib/src/jpeg/bit_reader.rs b/lib/src/jpeg/bit_reader.rs
@@ -8,7 +8,7 @@ use std::io::BufRead;
 
 use super::jpeg_code;
 use crate::helpers::has_ff;
-use crate::lepton_error::{ExitCode, err_exit_code};
+use crate::lepton_error::{ExitCode, Result, err_exit_code};
 use crate::{LeptonError, StreamPosition};
 
 // Implemenation of bit reader on top of JPEG data stream as read by a reader
@@ -59,25 +59,21 @@ impl<R: BufRead + StreamPosition> BitReader<R> {
 
 impl<R: BufRead> BitReader<R> {
     #[inline(always)]
-    pub fn read(&mut self, bits_to_read: u32) -> std::io::Result<u16> {
-        if bits_to_read == 0 {
-            return Ok(0);
-        }
-
+    pub fn read(&mut self, bits_to_read: u32) -> Result<u32> {
         if self.bits_left < bits_to_read {
-            self.fill_register(bits_to_read)?;
+            self.fill_register_slow(bits_to_read)?;
         }
 
         let retval =
-            (self.bits >> (self.bits_left - bits_to_read) & ((1 << bits_to_read) - 1)) as u16;
+            ((self.bits >> (self.bits_left - bits_to_read)) as u32) & ((1 << bits_to_read) - 1);
         self.bits_left -= bits_to_read;
         return Ok(retval);
     }
 
     #[inline(always)]
-    pub fn peek(&self) -> (u8, u32) {
+    pub fn peek(&self) -> (u32, u32) {
         (
-            ((self.bits.wrapping_shl(64 - self.bits_left)) >> 56) as u8,
+            ((self.bits.wrapping_shl(64 - self.bits_left)) >> 56) as u32,
             self.bits_left,
         )
     }
@@ -87,44 +83,95 @@ impl<R: BufRead> BitReader<R> {
         self.bits_left -= bits;
     }
 
+    /// Fills the register as much as possible with the current buffer in
+    /// a non-destructive manner. This requires the BufRead implementation to
+    /// have enough space available.
     #[inline(always)]
-    pub fn fill_register(&mut self, bits_to_read: u32) -> Result<(), std::io::Error> {
+    pub fn optimistic_fill(&mut self) {
+        if self.bits_left < 8 {
+            self.optimistic_fill_slow();
+        }
+    }
+
+    /// Assuming that there are less than 8 bits in our buffer, fill the rest of the buffer
+    /// with as many bits as possible, leaving the possibility of unwinding via the undo_read_ahead
+    /// function in certain corner cases.
+    #[inline(never)]
+    #[cold]
+    fn optimistic_fill_slow(&mut self) {
+        // for correctness, we need to have less than 8 bits left, otherwise we can't
+        // consume the read_ahead_bytes and successfully undo the read_ahead if we have to.
+        debug_assert!(self.bits_left < 8);
+
         // first consume the read_ahead bytes that we have now consumed
         // (otherwise we wouldn't have been called)
         self.inner.consume(self.read_ahead_bytes as usize);
 
-        let fb = self.inner.fill_buf()?;
+        if let Ok(fb) = self.inner.fill_buf() {
+            // if we have 8 bytes and there is no 0xff in them, then we can just read the bits directly as big endian
+            if fb.len() < 8 {
+                self.read_ahead_bytes = 0;
+                return;
+            }
 
-        // if we have 8 bytes and there is no 0xff in them, then we can just read the bits directly as big endian
-        let mut v;
-        if fb.len() < 8 || {
-            v = u64::from_le_bytes(fb[..8].try_into().unwrap());
-            has_ff(v)
-        } {
-            self.read_ahead_bytes = 0;
-            return self.fill_register_slow(bits_to_read);
-        }
+            let mut v = u64::from_le_bytes(fb[..8].try_into().unwrap());
+            if has_ff(v) {
+                // this is the expensive path, but rarer where there are 0xff bytes in the buffer
+                let mut bytes_left = 8;
+                self.read_ahead_bytes = 0;
 
-        v = v.to_be();
+                while bytes_left >= 2 {
+                    if v & 0xff == 0xff {
+                        if v & 0xff00 != 0 {
+                            // reset marker or end of scan, just exit the loop and let fill_register handle it
+                            break;
+                        }
 
-        // only fill 63 bits not 64 to avoid having to special case
-        // of self.bits << 64 which is a nop
-        let bytes_to_read = (63 - self.bits_left) / 8;
+                        self.bits = (self.bits << 8) | 0xff;
+                        self.bits_left += 8;
+                        self.read_ahead_bytes += 2;
+
+                        v >>= 16;
+                        bytes_left -= 2;
+                    } else {
+                        self.bits = (self.bits << 8) | (v & 0xff);
+                        self.bits_left += 8;
+                        self.read_ahead_bytes += 1;
 
-        self.bits = self.bits << (bytes_to_read * 8) | v >> (64 - bytes_to_read * 8);
-        self.bits_left += bytes_to_read * 8;
-        self.read_ahead_bytes = (self.bits_left - bits_to_read) / 8;
+                        v >>= 8;
+                        bytes_left -= 1;
+                    }
+                }
+            } else {
+                // no 0xff bytes, just read the bits all at a time and reverse endian to get them in the right order.
+                v = v.to_be();
 
-        self.inner
-            .consume((bytes_to_read - self.read_ahead_bytes) as usize);
+                // only fill 63 bits not 64 to avoid having to special case
+                // of self.bits << 64 which is a nop
+                let bytes_to_read = (63 - self.bits_left) / 8;
 
-        return Ok(());
+                self.bits = self.bits << (bytes_to_read * 8) | v >> (64 - bytes_to_read * 8);
+                self.bits_left += bytes_to_read * 8;
+                self.read_ahead_bytes = self.bits_left / 8;
+            }
+        }
     }
 
+    /// Fills the register up to the number of bits requested, with the assumption that these
+    /// will be immediately consumed.
+    ///
+    /// This function ends up being called very infrequently since almost all of the time the optimistic_fill ensures
+    /// that there are enough bits to work with. Effectively this function ends up only being called in corner cases
+    /// where we are near the end of a BufRead block, at the end of the file or about to hit a reset marker.
     #[cold]
-    fn fill_register_slow(&mut self, bits_to_read: u32) -> Result<(), std::io::Error> {
-        loop {
+    #[inline(never)]
+    fn fill_register_slow(&mut self, bits_to_read: u32) -> Result<()> {
+        self.inner.consume(self.read_ahead_bytes as usize);
+        self.read_ahead_bytes = 0;
+
+        while self.bits_left < bits_to_read {
             let fb = self.inner.fill_buf()?;
+
             if let &[b, ..] = fb {
                 self.inner.consume(1);
 
@@ -176,10 +223,6 @@ impl<R: BufRead> BitReader<R> {
 
                 // continue since we still might need to read more 0 bits
             }
-
-            if self.bits_left >= bits_to_read {
-                break;
-            }
         }
         Ok(())
     }
@@ -190,10 +233,7 @@ impl<R: BufRead> BitReader<R> {
 
     /// used to verify whether this image is using 1s or 0s as fill bits.
     /// Returns whether the fill bit was 1 or so or unknown (None)
-    pub fn read_and_verify_fill_bits(
-        &mut self,
-        pad_bit: &mut Option<u8>,
-    ) -> Result<(), LeptonError> {
+    pub fn read_and_verify_fill_bits(&mut self, pad_bit: &mut Option<u8>) -> Result<()> {
         self.undo_read_ahead();
 
         // if there are bits left, we need to see whether they
@@ -222,7 +262,7 @@ impl<R: BufRead> BitReader<R> {
                 }
                 Some(x) => {
                     // if we already saw a padding, then it should match
-                    let expected = u16::from(x) & all_one;
+                    let expected = u32::from(x) & all_one;
                     if actual != expected {
                         return err_exit_code(
                             ExitCode::InvalidPadding,
@@ -239,7 +279,7 @@ impl<R: BufRead> BitReader<R> {
         return Ok(());
     }
 
-    pub fn verify_reset_code(&mut self) -> Result<(), LeptonError> {
+    pub fn verify_reset_code(&mut self) -> Result<()> {
         // we reached the end of a MCU, so we need to find a reset code and the huffman codes start get padded out, but the spec
         // doesn't specify whether the padding should be 1s or 0s, so we ensure that at least the file is consistant so that we
         // can recode it again just by remembering the pad bit.
@@ -285,9 +325,15 @@ impl<R: BufRead> BitReader<R> {
     /// the only bits that are left are part of the current byte.
     pub fn undo_read_ahead(&mut self) {
         while self.bits_left >= 8 && self.read_ahead_bytes > 0 {
+            // if it was an 0xff then rewind 2 bytes since this was an escape code
+            if self.bits & 0xff == 0xff {
+                self.read_ahead_bytes -= 2;
+            } else {
+                self.read_ahead_bytes -= 1;
+            }
+
             self.bits_left -= 8;
             self.bits >>= 8;
-            self.read_ahead_bytes -= 1;
         }
 
         if self.read_ahead_bytes > 0 {

diff --git a/lib/src/jpeg/bit_writer.rs b/lib/src/jpeg/bit_writer.rs
@@ -192,7 +192,7 @@ mod tests {
             let mut r = BitReader::new(Cursor::new(&buf));
 
             for i in 1..2048 {
-                assert_eq!(i, r.read(u32_bit_length(i as u32) as u32).unwrap());
+                assert_eq!(i, r.read(u32_bit_length(i) as u32).unwrap());
             }
 
             let mut pad = Some(0xff);
@@ -231,7 +231,7 @@ mod tests {
             if rng.gen_range(0..100) == 0 {
                 test_data.push(Action::Pad(0xff));
             } else {
-                test_data.push(Action::Write(v as u16, bits as u8));
+                test_data.push(Action::Write(v, bits));
             }
         }
         test_data.push(Action::Pad(0xff));
@@ -264,16 +264,16 @@ mod tests {
                         let (peekcode, peekbits) = r.peek();
                         let num_valid_bits = peekbits.min(8).min(u32::from(numbits));
 
-                        let mask = (0xff00 >> num_valid_bits) as u8;
+                        let mask = 0xff00u32 >> num_valid_bits;
 
                         assert_eq!(
-                            expected_peek_byte & mask,
+                            expected_peek_byte as u32 & mask,
                             peekcode & mask,
                             "peek unexpected result"
                         );
 
                         assert_eq!(
-                            code,
+                            u32::from(code),
                             r.read(numbits as u32).unwrap(),
                             "read unexpected result"
                         );