Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions src/structs/div.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
use std::cmp::max;

/// Fast division of |divident| < (1 << precision) by i32 divisor D.
// We need for edge DCT coefs prediction precision of 32 - 13 = 19 bits
// for quantization table values 0 < D < 65536.

/// Usage for n / D:
// let mult: i64 = recip(D);
// let shift = mult & 0xFF;
// let mut res = ((n as i64) * (mult & !0xFF)) >> shift;
// res += if res < 0 { 1 } else { 0 };
/// or variant lower through absolute values.

// We use some excessive precision here, effectively precision for division
// of i32 by (q << 13) is 32 - 13 = 19 bit, but we use 20 instead
const DIV_PRECISION: i32 = 20;

// Modified from https://web.archive.org/web/20160927064638/https://raw.githubusercontent.com/ridiculousfish/libdivide/master/divide_by_constants_codegen_reference.c
// Comments from original author ridiculousfish
// See also https://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html
/*
Reference implementations of computing and using the "magic number" approach to dividing
by constants, including codegen instructions. The unsigned division incorporates the
"round down" optimization per ridiculous_fish.

This is free and unencumbered software. Any copyright is dedicated to the Public Domain.
*/
pub fn recip(d: i32) -> i32 {
if d == 0 || d == 1 {
let mult = (1 << DIV_PRECISION) + 1;
let shift = DIV_PRECISION + 8;
return (mult << 8) | shift;
}

// Absolute value of D
let abs_d = d.unsigned_abs() as u64;

// The initial power of 2 is one less than the first one that can possibly work
// "two31" in Warren
let mut exponent = DIV_PRECISION + 1; //precision - 1;
let initial_power_of_2 = 1u64 << exponent;

// Compute the absolute value of our "test numerator,"
// which is the largest dividend whose remainder with d is d-1.
// This is called anc in Warren.
let tmp: u64 = (1u64 << DIV_PRECISION) + if d < 0 { 1 } else { 0 };
let abs_test_numer: u64 = max(tmp - 1 - tmp % abs_d, abs_d - 1);

// Initialize our quotients and remainders (q1, r1, q2, r2 in Warren)
let mut quotient1: u64 = initial_power_of_2 / abs_test_numer;
let mut remainder1: u64 = initial_power_of_2 % abs_test_numer;
let mut quotient2: u64 = initial_power_of_2 / abs_d;
let mut remainder2: u64 = initial_power_of_2 % abs_d;
let mut delta: u64;

// Begin our loop
loop {
// Update the exponent
exponent += 1;

// Update quotient1 and remainder1
quotient1 *= 2;
remainder1 *= 2;
if remainder1 >= abs_test_numer {
quotient1 += 1;
remainder1 -= abs_test_numer;
}

// Update quotient2 and remainder2
quotient2 *= 2;
remainder2 *= 2;
if remainder2 >= abs_d {
quotient2 += 1;
remainder2 -= abs_d;
}

// Keep going as long as (2**exponent) / abs_d <= delta
delta = abs_d - remainder2;
// !(quotient1 < delta || (quotient1 == delta && remainder1 == 0))
if quotient1 >= delta && (quotient1 != delta || remainder1 != 0) {
break;
}
}

let mult = (quotient2 + 1) as i32;
debug_assert!((mult > 0) && (mult < (1 << 23)));
let shift = exponent + 8;

return (mult << 8) | shift;
}

/// Precalculated arrays of reciprocals
//let recip_array: [i32; 1 << 16] = core::array::from_fn(|i| recip(i));

#[inline(always)]
pub fn div(dividend: i32, divisor_recip: i32) -> i32 {
let abs_d = dividend.unsigned_abs() as i64;
let shift = divisor_recip & 0xFF;
let mult = (divisor_recip & !0xFF) as i64;
let res = (abs_d * mult >> shift) as i32;
if dividend < 0 {
-res
} else {
res
}
}

// pub fn div(dividend: i32x8, divisor_recip: i32x8)
// {
// let shift = preliminary_shift + (divisor_recip & 0xFF);
// let res = (dividend as i64) * ((divisor_recip & !0xFF) as i64) >> shift;
// return (res + if res < 0 { 1 } else { 0 }) as i32;
// }
4 changes: 2 additions & 2 deletions src/structs/lepton_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,7 @@ pub fn read_coefficient_block<const ALL_PRESENT: bool, R: Read>(
)?;

// step 3, read the DC coefficient (0,0 of the block)
let q0 = qt.get_quantization_table()[0] as i32;
let predicted_dc = pt.adv_predict_dc_pix::<ALL_PRESENT>(&raster, q0, &neighbor_data, features);
let predicted_dc = pt.adv_predict_dc_pix::<ALL_PRESENT>(&raster, qt, &neighbor_data, features);

let coef = model.read_dc(
bool_reader,
Expand All @@ -351,6 +350,7 @@ pub fn read_coefficient_block<const ALL_PRESENT: bool, R: Read>(
) as i16);

// neighbor summary is used as a predictor for the next block
let q0 = qt.get_quantization_table()[0] as i32;
let neighbor_summary = NeighborSummary::new(
predicted_dc.next_edge_pixels_h,
predicted_dc.next_edge_pixels_v,
Expand Down
4 changes: 2 additions & 2 deletions src/structs/lepton_encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,9 +358,8 @@ pub fn write_coefficient_block<const ALL_PRESENT: bool, W: Write>(
.context()?;

// finally the DC coefficient (at 0,0)
let q0 = qt.get_quantization_table()[0] as i32;
let predicted_val =
pt.adv_predict_dc_pix::<ALL_PRESENT>(&raster, q0, &neighbors_data, features);
pt.adv_predict_dc_pix::<ALL_PRESENT>(&raster, qt, &neighbors_data, features);

let avg_predicted_dc = ProbabilityTables::adv_predict_or_unpredict_dc(
here_tr.get_dc(),
Expand Down Expand Up @@ -389,6 +388,7 @@ pub fn write_coefficient_block<const ALL_PRESENT: bool, W: Write>(
.context()?;

// neighbor summary is used as a predictor for the next block
let q0 = qt.get_quantization_table()[0] as i32;
let neighbor_summary = NeighborSummary::new(
predicted_val.next_edge_pixels_h,
predicted_val.next_edge_pixels_v,
Expand Down
1 change: 1 addition & 0 deletions src/structs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

mod block_context;
mod branch;
mod div;
mod idct;
mod lepton_decoder;
mod lepton_encoder;
Expand Down
25 changes: 20 additions & 5 deletions src/structs/probability_tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::consts::*;
use crate::enabled_features;
use crate::jpeg::block_based_image::AlignedBlock;
use crate::structs::block_context::NeighborData;
use crate::structs::div::div;
use crate::structs::idct::*;
use crate::structs::model::*;
use crate::structs::quantization_tables::*;
Expand Down Expand Up @@ -219,20 +220,30 @@ impl ProbabilityTables {
return 0;
}

let mut best_prior: i32 = pred[if HORIZONTAL {
let best_prior: i32 = pred[if HORIZONTAL {
coefficient_tr >> 3
} else {
coefficient_tr
}];
best_prior /= (qt.get_quantization_table_transposed()[coefficient_tr] as i32) << 13;
//best_prior / ((qt.get_quantization_table_transposed()[coefficient_tr] as i32) << 13)
let recip = qt.get_recip(if HORIZONTAL {
coefficient_tr >> 3
} else {
coefficient_tr + 8
});
let prior = div((best_prior.unsigned_abs() >> 13) as i32, recip);

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dont't know if optimizer takes care of this, but you could make div take unsigned parameters since you are doing this here, also maybe add the 13 shift to the recip


best_prior
if best_prior < 0 {
-prior
} else {
prior
}
}

pub fn adv_predict_dc_pix<const ALL_PRESENT: bool>(
&self,
raster_cols: &[i32x8; 8],
q0: i32,
qt: &QuantizationTables, //q0: i32,
neighbor_data: &NeighborData,
enabled_features: &enabled_features::EnabledFeatures,
) -> PredictDCResult {
Expand Down Expand Up @@ -326,8 +337,12 @@ impl ProbabilityTables {

let uncertainty2_val = (far_afield_value >> 3) as i16;

//let q0 = qt.get_quantization_table()[0] as i32;
let q0 = qt.get_recip(0);
let div = div(avgmed, q0);
return PredictDCResult {
predicted_dc: (avgmed / q0 + 4) >> 3,
//predicted_dc: (avgmed / q0 + 4) >> 3,
predicted_dc: (div + 4) >> 3,
uncertainty: uncertainty_val,
uncertainty2: uncertainty2_val,
next_edge_pixels_h,
Expand Down
12 changes: 12 additions & 0 deletions src/structs/quantization_tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::consts::*;
use crate::helpers::*;
use crate::jpeg::jpeg_header::JpegHeader;
use crate::lepton_error::err_exit_code;
use crate::structs::div::recip;
use crate::{ExitCode, Result};

pub struct QuantizationTables {
Expand All @@ -18,6 +19,8 @@ pub struct QuantizationTables {
// Calculated using approximate maximal magnitudes
// of these coefficients `FREQ_MAX`
min_noise_threshold: [u8; 14],
// Reciprocals for division-by-multiplicatio scheme
recip: [i32; 16],
}

impl QuantizationTables {
Expand All @@ -32,6 +35,7 @@ impl QuantizationTables {
quantization_table: [0; 64],
quantization_table_transposed: [0; 64],
min_noise_threshold: [0; 14],
recip: [0; 16],
};

for pixel_row in 0..8 {
Expand Down Expand Up @@ -59,6 +63,10 @@ impl QuantizationTables {
}
}
}
for i in 0..16 {
let coord = if i < 8 { i } else { (i - 8) * 8 };
retval.recip[i] = recip(retval.quantization_table[coord] as i32);
}

retval
}
Expand Down Expand Up @@ -97,4 +105,8 @@ impl QuantizationTables {
pub fn get_min_noise_threshold(&self, coef: usize) -> u8 {
self.min_noise_threshold[coef]
}

pub fn get_recip(&self, coef: usize) -> i32 {
self.recip[coef]
}
}