Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
453 changes: 317 additions & 136 deletions src/row/arch/neon/gray.rs

Large diffs are not rendered by default.

313 changes: 204 additions & 109 deletions src/row/arch/wasm_simd128/gray.rs

Large diffs are not rendered by default.

353 changes: 235 additions & 118 deletions src/row/arch/x86_avx2/gray.rs

Large diffs are not rendered by default.

349 changes: 233 additions & 116 deletions src/row/arch/x86_avx512/gray.rs

Large diffs are not rendered by default.

377 changes: 261 additions & 116 deletions src/row/arch/x86_sse41/gray.rs

Large diffs are not rendered by default.

229 changes: 117 additions & 112 deletions src/row/dispatch/gray.rs

Large diffs are not rendered by default.

140 changes: 80 additions & 60 deletions src/row/dispatch/grayf32.rs

Large diffs are not rendered by default.

142 changes: 86 additions & 56 deletions src/row/dispatch/ya16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,259 +27,289 @@ use crate::row::{

/// Dispatch `ya16_to_rgb_row`.
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn ya16_to_rgb_row(packed: &[u16], out: &mut [u8], width: usize, use_simd: bool) {
pub(crate) fn ya16_to_rgb_row<const BE: bool>(
packed: &[u16],
out: &mut [u8],
width: usize,
use_simd: bool,
) {
assert!(packed.len() >= ya_row_elems(width), "packed too short");
assert!(out.len() >= rgb_row_bytes(width), "out too short");
if !use_simd {
return scalar::ya16_to_rgb_row(packed, out, width);
return scalar::ya16_to_rgb_row::<BE>(packed, out, width);
}
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::ya16_to_rgb_row(packed, out, width); }
unsafe { arch::neon::ya16_to_rgb_row::<BE>(packed, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::ya16_to_rgb_row(packed, out, width); }
unsafe { arch::x86_avx512::ya16_to_rgb_row::<BE>(packed, out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::ya16_to_rgb_row(packed, out, width); }
unsafe { arch::x86_avx2::ya16_to_rgb_row::<BE>(packed, out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::ya16_to_rgb_row(packed, out, width); }
unsafe { arch::x86_sse41::ya16_to_rgb_row::<BE>(packed, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::ya16_to_rgb_row(packed, out, width); }
unsafe { arch::wasm_simd128::ya16_to_rgb_row::<BE>(packed, out, width); }
return;
}
},
_ => {}
}
scalar::ya16_to_rgb_row(packed, out, width);
scalar::ya16_to_rgb_row::<BE>(packed, out, width);
}

// ---- ya16_to_rgba_row ---------------------------------------------------------

/// Dispatch `ya16_to_rgba_row`.
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn ya16_to_rgba_row(packed: &[u16], out: &mut [u8], width: usize, use_simd: bool) {
pub(crate) fn ya16_to_rgba_row<const BE: bool>(
packed: &[u16],
out: &mut [u8],
width: usize,
use_simd: bool,
) {
assert!(packed.len() >= ya_row_elems(width), "packed too short");
assert!(out.len() >= rgba_row_bytes(width), "out too short");
if !use_simd {
return scalar::ya16_to_rgba_row(packed, out, width);
return scalar::ya16_to_rgba_row::<BE>(packed, out, width);
}
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::ya16_to_rgba_row(packed, out, width); }
unsafe { arch::neon::ya16_to_rgba_row::<BE>(packed, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::ya16_to_rgba_row(packed, out, width); }
unsafe { arch::x86_avx512::ya16_to_rgba_row::<BE>(packed, out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::ya16_to_rgba_row(packed, out, width); }
unsafe { arch::x86_avx2::ya16_to_rgba_row::<BE>(packed, out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::ya16_to_rgba_row(packed, out, width); }
unsafe { arch::x86_sse41::ya16_to_rgba_row::<BE>(packed, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::ya16_to_rgba_row(packed, out, width); }
unsafe { arch::wasm_simd128::ya16_to_rgba_row::<BE>(packed, out, width); }
return;
}
},
_ => {}
}
scalar::ya16_to_rgba_row(packed, out, width);
scalar::ya16_to_rgba_row::<BE>(packed, out, width);
}

// ---- ya16_to_rgb_u16_row ------------------------------------------------------

/// Dispatch `ya16_to_rgb_u16_row`.
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn ya16_to_rgb_u16_row(packed: &[u16], out: &mut [u16], width: usize, use_simd: bool) {
pub(crate) fn ya16_to_rgb_u16_row<const BE: bool>(
packed: &[u16],
out: &mut [u16],
width: usize,
use_simd: bool,
) {
assert!(packed.len() >= ya_row_elems(width), "packed too short");
assert!(out.len() >= rgb_row_elems(width), "out too short");
if !use_simd {
return scalar::ya16_to_rgb_u16_row(packed, out, width);
return scalar::ya16_to_rgb_u16_row::<BE>(packed, out, width);
}
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::ya16_to_rgb_u16_row(packed, out, width); }
unsafe { arch::neon::ya16_to_rgb_u16_row::<BE>(packed, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::ya16_to_rgb_u16_row(packed, out, width); }
unsafe { arch::x86_avx512::ya16_to_rgb_u16_row::<BE>(packed, out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::ya16_to_rgb_u16_row(packed, out, width); }
unsafe { arch::x86_avx2::ya16_to_rgb_u16_row::<BE>(packed, out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::ya16_to_rgb_u16_row(packed, out, width); }
unsafe { arch::x86_sse41::ya16_to_rgb_u16_row::<BE>(packed, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::ya16_to_rgb_u16_row(packed, out, width); }
unsafe { arch::wasm_simd128::ya16_to_rgb_u16_row::<BE>(packed, out, width); }
return;
}
},
_ => {}
}
scalar::ya16_to_rgb_u16_row(packed, out, width);
scalar::ya16_to_rgb_u16_row::<BE>(packed, out, width);
}

// ---- ya16_to_rgba_u16_row -----------------------------------------------------

/// Dispatch `ya16_to_rgba_u16_row`.
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn ya16_to_rgba_u16_row(packed: &[u16], out: &mut [u16], width: usize, use_simd: bool) {
pub(crate) fn ya16_to_rgba_u16_row<const BE: bool>(
packed: &[u16],
out: &mut [u16],
width: usize,
use_simd: bool,
) {
assert!(packed.len() >= ya_row_elems(width), "packed too short");
assert!(out.len() >= rgba_row_elems(width), "out too short");
if !use_simd {
return scalar::ya16_to_rgba_u16_row(packed, out, width);
return scalar::ya16_to_rgba_u16_row::<BE>(packed, out, width);
}
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::ya16_to_rgba_u16_row(packed, out, width); }
unsafe { arch::neon::ya16_to_rgba_u16_row::<BE>(packed, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::ya16_to_rgba_u16_row(packed, out, width); }
unsafe { arch::x86_avx512::ya16_to_rgba_u16_row::<BE>(packed, out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::ya16_to_rgba_u16_row(packed, out, width); }
unsafe { arch::x86_avx2::ya16_to_rgba_u16_row::<BE>(packed, out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::ya16_to_rgba_u16_row(packed, out, width); }
unsafe { arch::x86_sse41::ya16_to_rgba_u16_row::<BE>(packed, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::ya16_to_rgba_u16_row(packed, out, width); }
unsafe { arch::wasm_simd128::ya16_to_rgba_u16_row::<BE>(packed, out, width); }
return;
}
},
_ => {}
}
scalar::ya16_to_rgba_u16_row(packed, out, width);
scalar::ya16_to_rgba_u16_row::<BE>(packed, out, width);
}

// ---- ya16_to_luma_row ---------------------------------------------------------

/// Dispatch `ya16_to_luma_row`.
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn ya16_to_luma_row(packed: &[u16], out: &mut [u8], width: usize, use_simd: bool) {
pub(crate) fn ya16_to_luma_row<const BE: bool>(
packed: &[u16],
out: &mut [u8],
width: usize,
use_simd: bool,
) {
assert!(packed.len() >= ya_row_elems(width), "packed too short");
assert!(out.len() >= width, "out too short");
if !use_simd {
return scalar::ya16_to_luma_row(packed, out, width);
return scalar::ya16_to_luma_row::<BE>(packed, out, width);
}
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::ya16_to_luma_row(packed, out, width); }
unsafe { arch::neon::ya16_to_luma_row::<BE>(packed, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::ya16_to_luma_row(packed, out, width); }
unsafe { arch::x86_avx512::ya16_to_luma_row::<BE>(packed, out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::ya16_to_luma_row(packed, out, width); }
unsafe { arch::x86_avx2::ya16_to_luma_row::<BE>(packed, out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::ya16_to_luma_row(packed, out, width); }
unsafe { arch::x86_sse41::ya16_to_luma_row::<BE>(packed, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::ya16_to_luma_row(packed, out, width); }
unsafe { arch::wasm_simd128::ya16_to_luma_row::<BE>(packed, out, width); }
return;
}
},
_ => {}
}
scalar::ya16_to_luma_row(packed, out, width);
scalar::ya16_to_luma_row::<BE>(packed, out, width);
}

// ---- ya16_to_luma_u16_row -----------------------------------------------------

/// Dispatch `ya16_to_luma_u16_row`.
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn ya16_to_luma_u16_row(packed: &[u16], out: &mut [u16], width: usize, use_simd: bool) {
pub(crate) fn ya16_to_luma_u16_row<const BE: bool>(
packed: &[u16],
out: &mut [u16],
width: usize,
use_simd: bool,
) {
assert!(packed.len() >= ya_row_elems(width), "packed too short");
assert!(out.len() >= width, "out too short");
if !use_simd {
return scalar::ya16_to_luma_u16_row(packed, out, width);
return scalar::ya16_to_luma_u16_row::<BE>(packed, out, width);
}
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::ya16_to_luma_u16_row(packed, out, width); }
unsafe { arch::neon::ya16_to_luma_u16_row::<BE>(packed, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::ya16_to_luma_u16_row(packed, out, width); }
unsafe { arch::x86_avx512::ya16_to_luma_u16_row::<BE>(packed, out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::ya16_to_luma_u16_row(packed, out, width); }
unsafe { arch::x86_avx2::ya16_to_luma_u16_row::<BE>(packed, out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::ya16_to_luma_u16_row(packed, out, width); }
unsafe { arch::x86_sse41::ya16_to_luma_u16_row::<BE>(packed, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::ya16_to_luma_u16_row(packed, out, width); }
unsafe { arch::wasm_simd128::ya16_to_luma_u16_row::<BE>(packed, out, width); }
return;
}
},
_ => {}
}
scalar::ya16_to_luma_u16_row(packed, out, width);
scalar::ya16_to_luma_u16_row::<BE>(packed, out, width);
}

// ---- ya16_to_hsv_row ----------------------------------------------------------

/// Dispatch `ya16_to_hsv_row`.
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn ya16_to_hsv_row(
pub(crate) fn ya16_to_hsv_row<const BE: bool>(
packed: &[u16],
h_out: &mut [u8],
s_out: &mut [u8],
Expand All @@ -292,36 +322,36 @@ pub(crate) fn ya16_to_hsv_row(
assert!(s_out.len() >= width, "S out too short");
assert!(v_out.len() >= width, "V out too short");
if !use_simd {
return scalar::ya16_to_hsv_row(packed, h_out, s_out, v_out, width);
return scalar::ya16_to_hsv_row::<BE>(packed, h_out, s_out, v_out, width);
}
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::ya16_to_hsv_row(packed, h_out, s_out, v_out, width); }
unsafe { arch::neon::ya16_to_hsv_row::<BE>(packed, h_out, s_out, v_out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::ya16_to_hsv_row(packed, h_out, s_out, v_out, width); }
unsafe { arch::x86_avx512::ya16_to_hsv_row::<BE>(packed, h_out, s_out, v_out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::ya16_to_hsv_row(packed, h_out, s_out, v_out, width); }
unsafe { arch::x86_avx2::ya16_to_hsv_row::<BE>(packed, h_out, s_out, v_out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::ya16_to_hsv_row(packed, h_out, s_out, v_out, width); }
unsafe { arch::x86_sse41::ya16_to_hsv_row::<BE>(packed, h_out, s_out, v_out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::ya16_to_hsv_row(packed, h_out, s_out, v_out, width); }
unsafe { arch::wasm_simd128::ya16_to_hsv_row::<BE>(packed, h_out, s_out, v_out, width); }
return;
}
},
_ => {}
}
scalar::ya16_to_hsv_row(packed, h_out, s_out, v_out, width);
scalar::ya16_to_hsv_row::<BE>(packed, h_out, s_out, v_out, width);
}
Loading
Loading