From febd84560cb512266f624cc706cc9a348d5c0a20 Mon Sep 17 00:00:00 2001 From: Kristof Date: Thu, 2 Jan 2025 10:14:34 +0100 Subject: [PATCH 1/5] merge --- src/helpers.rs | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/src/helpers.rs b/src/helpers.rs index 871b54ab..c2d41e7b 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -102,29 +102,10 @@ pub fn has_ff(v: u64) -> bool { pub const fn devli(s: u8, value: u16) -> i16 { let shifted = 1 << s; - if value & (shifted >> 1) != 0 { + if value >= (shifted >> 1) { value as i16 } else { - value.wrapping_add(2).wrapping_add(!shifted) as i16 - } -} - -/// check to make sure the behavior hasn't changed even with the optimization -#[test] -fn devli_test() { - for s in 0u8..15 { - for value in 0..(1 << s) { - assert_eq!( - devli(s, value) as i16, - if s == 0 { - value as i16 - } else if value < (1 << (s as u16 - 1)) { - value as i16 + (-1 << s as i16) + 1 - } else { - value as i16 - } - ); - } + (value + !(shifted - 1) + 1) as i16 } } From 3f40d98c883248d8cdb193e162e77e2d7fa1ab84 Mon Sep 17 00:00:00 2001 From: Kristof Date: Mon, 25 Nov 2024 16:19:35 +0100 Subject: [PATCH 2/5] added test --- src/helpers.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/helpers.rs b/src/helpers.rs index c2d41e7b..c4734aba 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -109,6 +109,25 @@ pub const fn devli(s: u8, value: u16) -> i16 { } } +/// check to make sure the behavior hasn't changed even with the optimization +#[test] +fn devli_test() { + for s in 0u8..15 { + for value in 0..(1 << s) { + assert_eq!( + devli(s, value) as i16, + if s == 0 { + value as i16 + } else if value < (1 << (s as u16 - 1)) { + value as i16 + (-1 << s as i16) + 1 + } else { + value as i16 + } + ); + } + } +} + #[inline(always)] pub const fn b_short(v1: u8, v2: u8) -> u16 { ((v1 as u16) << 8) + v2 as u16 From d0dfe821ffbed5fe0e5509d09444103813281aab Mon Sep 17 00:00:00 2001 From: Kristof Date: Thu, 2 Jan 2025 10:41:33 +0100 Subject: [PATCH 3/5] use 32 bit math generally --- src/helpers.rs | 10 +++++----- src/jpeg/bit_reader.rs | 6 +++--- src/jpeg/bit_writer.rs | 2 +- src/jpeg/jpeg_read.rs | 16 ++++++++-------- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/helpers.rs b/src/helpers.rs index c4734aba..bb68b2fc 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -99,20 +99,20 @@ pub fn has_ff(v: u64) -> bool { } #[inline(always)] -pub const fn devli(s: u8, value: u16) -> i16 { - let shifted = 1 << s; +pub const fn devli(s: u32, value: u32) -> i16 { + let shifted = (1 << s) - 1; - if value >= (shifted >> 1) { + if (value << 1) > shifted { value as i16 } else { - (value + !(shifted - 1) + 1) as i16 + value.wrapping_sub(shifted) as i16 } } /// check to make sure the behavior hasn't changed even with the optimization #[test] fn devli_test() { - for s in 0u8..15 { + for s in 0u32..15 { for value in 0..(1 << s) { assert_eq!( devli(s, value) as i16, diff --git a/src/jpeg/bit_reader.rs b/src/jpeg/bit_reader.rs index b0447fdf..516038f2 100644 --- a/src/jpeg/bit_reader.rs +++ b/src/jpeg/bit_reader.rs @@ -59,7 +59,7 @@ impl BitReader { impl BitReader { #[inline(always)] - pub fn read(&mut self, bits_to_read: u32) -> std::io::Result { + pub fn read(&mut self, bits_to_read: u32) -> std::io::Result { if bits_to_read == 0 { return Ok(0); } @@ -69,7 +69,7 @@ impl BitReader { } let retval = - (self.bits >> (self.bits_left - bits_to_read) & ((1 << bits_to_read) - 1)) as u16; + ((self.bits >> (self.bits_left - bits_to_read)) as u32) & ((1 << bits_to_read) - 1); self.bits_left -= bits_to_read; return Ok(retval); } @@ -224,7 +224,7 @@ impl BitReader { } Some(x) => { // if we already saw a padding, then it should match - let expected = u16::from(x) & all_one; + let expected = u32::from(x) & all_one; if actual != expected { return err_exit_code(ExitCode::InvalidPadding, format!("padding of {0} bits should be set to 1 actual={1:b} expected={2:b}", num_bits_to_read, actual, expected).as_str()); } diff --git a/src/jpeg/bit_writer.rs b/src/jpeg/bit_writer.rs index e080bc2e..5be92bd6 100644 --- a/src/jpeg/bit_writer.rs +++ b/src/jpeg/bit_writer.rs @@ -271,7 +271,7 @@ fn roundtrip_randombits() { ); assert_eq!( - code, + code as u32, r.read(numbits as u32).unwrap(), "read unexpected result" ); diff --git a/src/jpeg/jpeg_read.rs b/src/jpeg/jpeg_read.rs index 05c89ce7..aa7059ed 100644 --- a/src/jpeg/jpeg_read.rs +++ b/src/jpeg/jpeg_read.rs @@ -683,7 +683,7 @@ fn next_huff_code(bit_reader: &mut BitReader, ctree: &HuffTree) - let mut node: u16 = 0; while node < 256 { - node = ctree.node[usize::from(node)][usize::from(bit_reader.read(1)?)]; + node = ctree.node[usize::from(node)][bit_reader.read(1)? as usize]; } if node == 0xffff { @@ -740,9 +740,9 @@ fn read_coef( // analyse code if hc != 0 { let z = usize::from(lbits(hc, 4)); - let literal_bits = rbits(hc, 4); + let literal_bits = u32::from(rbits(hc, 4)); - let value = bit_reader.read(u32::from(literal_bits))?; + let value = bit_reader.read(literal_bits)?; Ok(Some((z, devli(literal_bits, value)))) } else { Ok(None) @@ -773,8 +773,8 @@ fn decode_ac_prg_fs( if (l == 15) || (r > 0) { // decode run/level combination let mut z = l; - let s = r; - let n = bit_reader.read(u32::from(s))?; + let s = u32::from(r); + let n = bit_reader.read(s)?; if (z + bpos) > to { return err_exit_code(ExitCode::UnsupportedJpeg, "run is too long"); } @@ -791,7 +791,7 @@ fn decode_ac_prg_fs( // decode eobrun let s = l; let n = bit_reader.read(u32::from(s))?; - state.eobrun = decode_eobrun_bits(s, n); + state.eobrun = decode_eobrun_bits(s, n) as u16; state.eobrun -= 1; // decrement eobrun ( for this one ) @@ -869,7 +869,7 @@ fn decode_ac_prg_sa( eob = bpos; let s = l; let n = bit_reader.read(u32::from(s))?; - state.eobrun = decode_eobrun_bits(s, n); + state.eobrun = decode_eobrun_bits(s, n) as u16; // since we hit EOB, the rest can be done with the zero block decoder decode_eobrun_sa(bit_reader, block, state, bpos, to)?; @@ -906,6 +906,6 @@ fn decode_eobrun_sa( /// decoding for decoding eobrun lengths. The encoding chops off the most significant /// bit since it is always 1, so we need to add it back. -fn decode_eobrun_bits(s: u8, n: u16) -> u16 { +fn decode_eobrun_bits(s: u8, n: u32) -> u32 { n + (1 << s) } From d954fe03d6256a7e82f4c71afa297f3fd85f5066 Mon Sep 17 00:00:00 2001 From: Kristof Date: Sun, 5 Jan 2025 18:08:33 +0100 Subject: [PATCH 4/5] improve comments --- src/jpeg/bit_reader.rs | 130 ++++++++++++++++++++++++++++------------- src/jpeg/bit_writer.rs | 2 +- src/jpeg/jpeg_read.rs | 52 ++++++++--------- 3 files changed, 112 insertions(+), 72 deletions(-) diff --git a/src/jpeg/bit_reader.rs b/src/jpeg/bit_reader.rs index 516038f2..70672fac 100644 --- a/src/jpeg/bit_reader.rs +++ b/src/jpeg/bit_reader.rs @@ -8,7 +8,7 @@ use std::io::{BufRead, Seek}; use super::jpeg_code; use crate::helpers::has_ff; -use crate::lepton_error::{err_exit_code, ExitCode}; +use crate::lepton_error::{err_exit_code, ExitCode, Result}; use crate::LeptonError; // Implemenation of bit reader on top of JPEG data stream as read by a reader @@ -59,13 +59,9 @@ impl BitReader { impl BitReader { #[inline(always)] - pub fn read(&mut self, bits_to_read: u32) -> std::io::Result { - if bits_to_read == 0 { - return Ok(0); - } - + pub fn read(&mut self, bits_to_read: u32) -> Result { if self.bits_left < bits_to_read { - self.fill_register(bits_to_read)?; + self.fill_register_slow(bits_to_read)?; } let retval = @@ -75,9 +71,9 @@ impl BitReader { } #[inline(always)] - pub fn peek(&self) -> (u8, u32) { + pub fn peek(&self) -> (u32, u32) { ( - ((self.bits.wrapping_shl(64 - self.bits_left)) >> 56) as u8, + ((self.bits.wrapping_shl(64 - self.bits_left)) >> 56) as u32, self.bits_left, ) } @@ -87,44 +83,95 @@ impl BitReader { self.bits_left -= bits; } + /// Fills the register as much as possible with the current buffer in + /// a non-destructive manner. This requires the BufRead implementation to + /// have enough space available. #[inline(always)] - pub fn fill_register(&mut self, bits_to_read: u32) -> Result<(), std::io::Error> { + pub fn optimistic_fill(&mut self) { + if self.bits_left < 8 { + self.optimistic_fill_slow(); + } + } + + /// Assuming that there are less than 8 bits in our buffer, fill the rest of the buffer + /// with as many bits as possible, leaving the possibility of unwinding via the undo_read_ahead + /// function in certain corner cases. + #[inline(never)] + #[cold] + fn optimistic_fill_slow(&mut self) { + // for correctness, we need to have less than 8 bits left, otherwise we can't + // consume the read_ahead_bytes and successfully undo the read_ahead if we have to. + debug_assert!(self.bits_left < 8); + // first consume the read_ahead bytes that we have now consumed // (otherwise we wouldn't have been called) self.inner.consume(self.read_ahead_bytes as usize); - let fb = self.inner.fill_buf()?; + if let Ok(fb) = self.inner.fill_buf() { + // if we have 8 bytes and there is no 0xff in them, then we can just read the bits directly as big endian + if fb.len() < 8 { + self.read_ahead_bytes = 0; + return; + } - // if we have 8 bytes and there is no 0xff in them, then we can just read the bits directly as big endian - let mut v; - if fb.len() < 8 || { - v = u64::from_le_bytes(fb[..8].try_into().unwrap()); - has_ff(v) - } { - self.read_ahead_bytes = 0; - return self.fill_register_slow(bits_to_read); - } + let mut v = u64::from_le_bytes(fb[..8].try_into().unwrap()); + if has_ff(v) { + // this is the expensive path, but rarer where there are 0xff bytes in the buffer + let mut bytes_left = 8; + self.read_ahead_bytes = 0; - v = v.to_be(); + while bytes_left >= 2 { + if v & 0xff == 0xff { + if v & 0xff00 != 0 { + // escape sequence or reset marker, just exit the loop and let fill_register handle it + break; + } - // only fill 63 bits not 64 to avoid having to special case - // of self.bits << 64 which is a nop - let bytes_to_read = (63 - self.bits_left) / 8; + self.bits = (self.bits << 8) | 0xff; + self.bits_left += 8; + self.read_ahead_bytes += 2; + + v >>= 16; + bytes_left -= 2; + } else { + self.bits = (self.bits << 8) | (v & 0xff); + self.bits_left += 8; + self.read_ahead_bytes += 1; - self.bits = self.bits << (bytes_to_read * 8) | v >> (64 - bytes_to_read * 8); - self.bits_left += bytes_to_read * 8; - self.read_ahead_bytes = (self.bits_left - bits_to_read) / 8; + v >>= 8; + bytes_left -= 1; + } + } + } else { + // no 0xff bytes, just read the bits all at a time and reverse endian to get them in the right order. + v = v.to_be(); - self.inner - .consume((bytes_to_read - self.read_ahead_bytes) as usize); + // only fill 63 bits not 64 to avoid having to special case + // of self.bits << 64 which is a nop + let bytes_to_read = (63 - self.bits_left) / 8; - return Ok(()); + self.bits = self.bits << (bytes_to_read * 8) | v >> (64 - bytes_to_read * 8); + self.bits_left += bytes_to_read * 8; + self.read_ahead_bytes = self.bits_left / 8; + } + } } + /// Fills the register up to the number of bits requested, with the assumption that these + /// will be immediately consumed. + /// + /// This function ends up being called very infrequently since almost all of the time the optimistic_fill ensures + /// that there are enough bits to work with. Effectively this function ends up only being called in corner cases + /// where we are near the end of a BufRead block, at the end of the file or about to hit a reset marker. #[cold] - fn fill_register_slow(&mut self, bits_to_read: u32) -> Result<(), std::io::Error> { - loop { + #[inline(never)] + fn fill_register_slow(&mut self, bits_to_read: u32) -> Result<()> { + self.inner.consume(self.read_ahead_bytes as usize); + self.read_ahead_bytes = 0; + + while self.bits_left < bits_to_read { let fb = self.inner.fill_buf()?; + if let &[b, ..] = fb { self.inner.consume(1); @@ -177,10 +224,6 @@ impl BitReader { // continue since we still might need to read more 0 bits } - - if self.bits_left >= bits_to_read { - break; - } } Ok(()) } @@ -191,10 +234,7 @@ impl BitReader { /// used to verify whether this image is using 1s or 0s as fill bits. /// Returns whether the fill bit was 1 or so or unknown (None) - pub fn read_and_verify_fill_bits( - &mut self, - pad_bit: &mut Option, - ) -> Result<(), LeptonError> { + pub fn read_and_verify_fill_bits(&mut self, pad_bit: &mut Option) -> Result<()> { self.undo_read_ahead(); // if there are bits left, we need to see whether they @@ -235,7 +275,7 @@ impl BitReader { return Ok(()); } - pub fn verify_reset_code(&mut self) -> Result<(), LeptonError> { + pub fn verify_reset_code(&mut self) -> Result<()> { // we reached the end of a MCU, so we need to find a reset code and the huffman codes start get padded out, but the spec // doesn't specify whether the padding should be 1s or 0s, so we ensure that at least the file is consistant so that we // can recode it again just by remembering the pad bit. @@ -281,9 +321,15 @@ impl BitReader { /// the only bits that are left are part of the current byte. pub fn undo_read_ahead(&mut self) { while self.bits_left >= 8 && self.read_ahead_bytes > 0 { + // if it was an 0xff then rewind 2 bytes since this was an escape code + if self.bits & 0xff == 0xff { + self.read_ahead_bytes -= 2; + } else { + self.read_ahead_bytes -= 1; + } + self.bits_left -= 8; self.bits >>= 8; - self.read_ahead_bytes -= 1; } if self.read_ahead_bytes > 0 { diff --git a/src/jpeg/bit_writer.rs b/src/jpeg/bit_writer.rs index 5be92bd6..16ef49a6 100644 --- a/src/jpeg/bit_writer.rs +++ b/src/jpeg/bit_writer.rs @@ -266,7 +266,7 @@ fn roundtrip_randombits() { assert_eq!( expected_peek_byte & mask, - peekcode & mask, + (peekcode as u8) & mask, "peek unexpected result" ); diff --git a/src/jpeg/jpeg_read.rs b/src/jpeg/jpeg_read.rs index aa7059ed..b9c7f9c7 100644 --- a/src/jpeg/jpeg_read.rs +++ b/src/jpeg/jpeg_read.rs @@ -678,7 +678,8 @@ pub(super) fn decode_block_seq( } /// Reads and decodes next Huffman code from BitReader using the provided tree -#[inline(always)] +#[cold] +#[inline(never)] fn next_huff_code(bit_reader: &mut BitReader, ctree: &HuffTree) -> Result { let mut node: u16 = 0; @@ -693,6 +694,7 @@ fn next_huff_code(bit_reader: &mut BitReader, ctree: &HuffTree) - } } +#[inline(always)] fn read_dc(bit_reader: &mut BitReader, tree: &HuffTree) -> Result { let (z, coef) = read_coef(bit_reader, tree)?.unwrap_or((0, 0)); if z != 0 { @@ -710,32 +712,7 @@ fn read_coef( bit_reader: &mut BitReader, tree: &HuffTree, ) -> Result> { - // if the code we found is smaller or equal to the number of bits left, take the shortcut - let hc; - - loop { - // peek ahead to see if we can decode the symbol immediately - // given what has already been read into the bitreader - let (peek_value, peek_len) = bit_reader.peek(); - - // use lookup table to figure out the first code in this byte and how long it is - let (code, code_len) = tree.peek_code[peek_value as usize]; - - if u32::from(code_len) <= peek_len { - // found code directly, so advance by the number of bits immediately - hc = code; - bit_reader.advance(u32::from(code_len)); - break; - } else if peek_len < 8 { - // peek code works with up to 8 bits at a time. If we had less - // than this, then we need to read more bits into the bitreader - bit_reader.fill_register(8)?; - } else { - // take slow path since we have a code that is bigger than 8 bits (but pretty rare) - hc = next_huff_code(bit_reader, tree)?; - break; - } - } + let hc = read_code(bit_reader, tree)?; // analyse code if hc != 0 { @@ -749,6 +726,23 @@ fn read_coef( } } +#[inline(always)] +fn read_code(bit_reader: &mut BitReader, tree: &HuffTree) -> Result { + bit_reader.optimistic_fill(); + let (peek_value, peek_len) = bit_reader.peek(); + let (code, code_len) = tree.peek_code[peek_value as usize]; + + if u32::from(code_len) <= peek_len { + // found code directly, so advance by the number of bits immediately + bit_reader.advance(u32::from(code_len)); + + Ok(code) + } else { + // take slow path since we have a code that is bigger than 8 bits (but pretty rare) + next_huff_code(bit_reader, tree) + } +} + /// progressive AC decoding (first pass) fn decode_ac_prg_fs( bit_reader: &mut BitReader, @@ -764,7 +758,7 @@ fn decode_ac_prg_fs( let mut bpos = from; while bpos <= to { // decode next - let hc = next_huff_code(bit_reader, actree)?; + let hc = read_code(bit_reader, actree)?; let l = lbits(hc, 4); let r = rbits(hc, 4); @@ -820,7 +814,7 @@ fn decode_ac_prg_sa( // decode AC succesive approximation bits while bpos <= to { // decode next - let hc = next_huff_code(bit_reader, actree)?; + let hc = read_code(bit_reader, actree)?; let l = lbits(hc, 4); let r = rbits(hc, 4); From 5e01c3d5d77c3e7ca7a2ad1cec2d18745cc4c11f Mon Sep 17 00:00:00 2001 From: Kristof Date: Sun, 5 Jan 2025 18:09:25 +0100 Subject: [PATCH 5/5] fix comment --- src/jpeg/bit_reader.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jpeg/bit_reader.rs b/src/jpeg/bit_reader.rs index 70672fac..0a5c1949 100644 --- a/src/jpeg/bit_reader.rs +++ b/src/jpeg/bit_reader.rs @@ -123,7 +123,7 @@ impl BitReader { while bytes_left >= 2 { if v & 0xff == 0xff { if v & 0xff00 != 0 { - // escape sequence or reset marker, just exit the loop and let fill_register handle it + // reset marker or end of scan, just exit the loop and let fill_register handle it break; }