Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion src/frame/packed_rgb_f16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ pub enum Rgbf16FrameError {
},
}

/// A validated packed **RGBF16** frame (FFmpeg `AV_PIX_FMT_RGBF16`).
/// A validated packed **RGBF16** frame (FFmpeg `AV_PIX_FMT_RGBF16LE`).
/// One plane, 3 × `f16` per pixel, channel order `R, G, B`.
///
/// Values are **linear** RGB by convention — no gamma / OETF handling
Expand All @@ -65,6 +65,25 @@ pub enum Rgbf16FrameError {
/// `stride` is in **`f16` elements** (≥ `3 * width`), matching the
/// per-format convention that stride aligns with the underlying slice
/// element type. No width parity constraint.
///
/// # Endian contract — **LE-encoded bytes**
///
/// The `&[half::f16]` plane is the **LE-encoded byte layout** reinterpreted
/// as `f16`, matching the FFmpeg **`AV_PIX_FMT_RGBF16LE`** pixel-format
/// convention. (FFmpeg's unsuffixed `AV_PIX_FMT_RGBF16` is a *target-endian*
/// alias — `RGBF16LE` on a little-endian host, `RGBF16BE` on a big-endian
/// host — so this contract pins the canonical `*LE` byte order regardless
/// of host endianness.)
///
/// On a little-endian host (every CI runner today) LE bytes _are_
/// host-native, so `&[half::f16]` is also a host-native f16 slice; on a
/// big-endian host the bytes have to be byte-swapped back to host-native
/// before arithmetic. Downstream row kernels handle this byte-swap (or
/// no-op on LE) under the hood — callers do **not** pre-swap.
///
/// Stride is in **f16 elements** (not bytes). Callers holding a byte buffer
/// from FFmpeg should cast via `bytemuck::cast_slice` and divide
/// `linesize[0]` by 2 before constructing.
#[derive(Debug, Clone, Copy)]
pub struct Rgbf16Frame<'a> {
rgb: &'a [half::f16],
Expand Down
25 changes: 24 additions & 1 deletion src/frame/packed_rgb_float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ pub enum Rgbf32FrameError {
},
}

/// A validated packed **RGBF32** frame (FFmpeg `AV_PIX_FMT_RGBF32`).
/// A validated packed **RGBF32** frame.
/// One plane, 3 × `f32` per pixel, channel order `R, G, B`.
///
/// Values are **linear** RGB by convention — no gamma / OETF handling
Expand All @@ -64,6 +64,29 @@ pub enum Rgbf32FrameError {
/// `stride` is in **`f32` elements** (≥ `3 * width`), matching the
/// per-format convention that stride aligns with the underlying slice
/// element type. No width parity constraint.
///
/// # Endian contract — **LE-encoded bytes** (`AV_PIX_FMT_RGBF32LE`)
///
/// The `&[f32]` plane is the **LE-encoded byte layout** reinterpreted as
/// `f32`. This frame maps to FFmpeg `AV_PIX_FMT_RGBF32LE`. FFmpeg also
/// defines `AV_PIX_FMT_RGBF32BE` and an unsuffixed `AV_PIX_FMT_RGBF32`
/// alias that is **target-endian** (resolves to `RGBF32LE` on LE hosts and
/// `RGBF32BE` on BE hosts). **Callers on a BE host who hold target-endian
/// `AV_PIX_FMT_RGBF32` bytes must convert them to LE before constructing
/// this frame** — otherwise the LE-decode contract here would re-interpret
/// the BE bytes as LE and produce byte-swapped float data. The 4-channel
/// `AV_PIX_FMT_RGBAF32LE` / `AV_PIX_FMT_RGBAF32BE` pair follows the same
/// `*LE` convention; this frame uses the analogous LE binding.
///
/// On a little-endian host (every CI runner today) LE bytes _are_
/// host-native, so `&[f32]` is also a host-native float slice; on a
/// big-endian host the bytes have to be byte-swapped back to host-native
/// before arithmetic. Downstream row kernels handle this byte-swap (or
/// no-op on LE) under the hood — callers do **not** pre-swap.
///
/// Stride is in **f32 elements** (not bytes). Callers holding a byte buffer
/// from FFmpeg should cast via `bytemuck::cast_slice` and divide
/// `linesize[0]` by 4 before constructing.
#[derive(Debug, Clone, Copy)]
pub struct Rgbf32Frame<'a> {
rgb: &'a [f32],
Expand Down
56 changes: 56 additions & 0 deletions src/frame/planar_gbr_float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,20 @@ const fn check_plane(
/// `f32` elements. Nominal range `[0.0, 1.0]`; HDR values > 1.0 are
/// preserved bit-exact on lossless pass-through outputs and clamped to
/// `[0.0, 1.0]` on integer-output paths.
///
/// # Endian contract — **LE-encoded bytes**
///
/// The three `&[f32]` planes are the **LE-encoded byte layout** reinterpreted
/// as `f32`, matching the FFmpeg `*LE` pixel-format suffix in the format
/// name. On a little-endian host (every CI runner today) LE bytes _are_
/// host-native, so the slices are also host-native float slices; on a
/// big-endian host the bytes have to be byte-swapped back to host-native
/// before arithmetic. Downstream row kernels handle this byte-swap (or
/// no-op on LE) under the hood — callers do **not** pre-swap.
///
/// Stride is in **f32 elements** (not bytes). Callers holding byte buffers
/// from FFmpeg should cast via `bytemuck::cast_slice` and divide each
/// `linesize[i]` by 4 before constructing.
#[derive(Debug, Clone, Copy)]
pub struct Gbrpf32Frame<'a> {
g: &'a [f32],
Expand Down Expand Up @@ -250,6 +264,20 @@ impl<'a> Gbrpf32Frame<'a> {
/// Four full-resolution `f32` planes in **G, B, R, A** order. Alpha is
/// real per-pixel; nominal range `[0.0, 1.0]` (opaque = 1.0). Stride is
/// in `f32` elements.
///
/// # Endian contract — **LE-encoded bytes**
///
/// The four `&[f32]` planes are the **LE-encoded byte layout** reinterpreted
/// as `f32`, matching the FFmpeg `*LE` pixel-format suffix in the format
/// name. On a little-endian host (every CI runner today) LE bytes _are_
/// host-native, so the slices are also host-native float slices; on a
/// big-endian host the bytes have to be byte-swapped back to host-native
/// before arithmetic. Downstream row kernels handle this byte-swap (or
/// no-op on LE) under the hood — callers do **not** pre-swap.
///
/// Stride is in **f32 elements** (not bytes). Callers holding byte buffers
/// from FFmpeg should cast via `bytemuck::cast_slice` and divide each
/// `linesize[i]` by 4 before constructing.
#[derive(Debug, Clone, Copy)]
pub struct Gbrapf32Frame<'a> {
g: &'a [f32],
Expand Down Expand Up @@ -372,6 +400,20 @@ impl<'a> Gbrapf32Frame<'a> {
/// Three full-resolution [`half::f16`] planes in **G, B, R** order. Stride
/// is in `f16` elements. Nominal range `[0.0, 1.0]`; HDR values > 1.0 are
/// permitted (saturation to `+Inf` occurs on f16→f32 narrowing paths).
///
/// # Endian contract — **LE-encoded bytes**
///
/// The three `&[half::f16]` planes are the **LE-encoded byte layout**
/// reinterpreted as `f16`, matching the FFmpeg `*LE` pixel-format suffix in
/// the format name. On a little-endian host (every CI runner today) LE
/// bytes _are_ host-native, so the slices are also host-native f16 slices;
/// on a big-endian host the bytes have to be byte-swapped back to
/// host-native before arithmetic. Downstream row kernels handle this
/// byte-swap (or no-op on LE) under the hood — callers do **not** pre-swap.
///
/// Stride is in **f16 elements** (not bytes). Callers holding byte buffers
/// from FFmpeg should cast via `bytemuck::cast_slice` and divide each
/// `linesize[i]` by 2 before constructing.
#[derive(Debug, Clone, Copy)]
pub struct Gbrpf16Frame<'a> {
g: &'a [half::f16],
Expand Down Expand Up @@ -475,6 +517,20 @@ impl<'a> Gbrpf16Frame<'a> {
/// Four full-resolution [`half::f16`] planes in **G, B, R, A** order.
/// Alpha is real per-pixel; nominal range `[0.0, 1.0]`. Stride is in
/// `f16` elements.
///
/// # Endian contract — **LE-encoded bytes**
///
/// The four `&[half::f16]` planes are the **LE-encoded byte layout**
/// reinterpreted as `f16`, matching the FFmpeg `*LE` pixel-format suffix in
/// the format name. On a little-endian host (every CI runner today) LE
/// bytes _are_ host-native, so the slices are also host-native f16 slices;
/// on a big-endian host the bytes have to be byte-swapped back to
/// host-native before arithmetic. Downstream row kernels handle this
/// byte-swap (or no-op on LE) under the hood — callers do **not** pre-swap.
///
/// Stride is in **f16 elements** (not bytes). Callers holding byte buffers
/// from FFmpeg should cast via `bytemuck::cast_slice` and divide each
/// `linesize[i]` by 2 before constructing.
#[derive(Debug, Clone, Copy)]
pub struct Gbrapf16Frame<'a> {
g: &'a [half::f16],
Expand Down
8 changes: 4 additions & 4 deletions src/row/arch/neon/alpha_extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ pub(crate) unsafe fn copy_alpha_packed_u16x4_to_u8_at_0(
}

if x < width {
scalar::copy_alpha_packed_u16x4_to_u8_at_0(
scalar::copy_alpha_packed_u16x4_to_u8_at_0::<false>(
&packed[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
Expand Down Expand Up @@ -154,7 +154,7 @@ pub(crate) unsafe fn copy_alpha_packed_u16x4_at_0(
}

if x < width {
scalar::copy_alpha_packed_u16x4_at_0(
scalar::copy_alpha_packed_u16x4_at_0::<false>(
&packed[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
Expand Down Expand Up @@ -357,7 +357,7 @@ mod tests {
pseudo_random_u8(&mut rgba_simd, 0xFEED);
let mut rgba_scalar = rgba_simd.clone();
unsafe { super::copy_alpha_packed_u16x4_to_u8_at_0(&packed, &mut rgba_simd, w) };
scalar::copy_alpha_packed_u16x4_to_u8_at_0(&packed, &mut rgba_scalar, w);
scalar::copy_alpha_packed_u16x4_to_u8_at_0::<false>(&packed, &mut rgba_scalar, w);
assert_eq!(rgba_simd, rgba_scalar, "width={w}");
}
}
Expand All @@ -375,7 +375,7 @@ mod tests {
pseudo_random_u16(&mut rgba_simd, 0x1337);
let mut rgba_scalar = rgba_simd.clone();
unsafe { super::copy_alpha_packed_u16x4_at_0(&packed, &mut rgba_simd, w) };
scalar::copy_alpha_packed_u16x4_at_0(&packed, &mut rgba_scalar, w);
scalar::copy_alpha_packed_u16x4_at_0::<false>(&packed, &mut rgba_scalar, w);
assert_eq!(rgba_simd, rgba_scalar, "width={w}");
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/row/arch/wasm_simd128/alpha_extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ pub(crate) unsafe fn copy_alpha_packed_u16x4_to_u8_at_0(
}

if x < width {
scalar::copy_alpha_packed_u16x4_to_u8_at_0(
scalar::copy_alpha_packed_u16x4_to_u8_at_0::<false>(
&packed[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
Expand Down Expand Up @@ -226,7 +226,7 @@ pub(crate) unsafe fn copy_alpha_packed_u16x4_at_0(
}

if x < width {
scalar::copy_alpha_packed_u16x4_at_0(
scalar::copy_alpha_packed_u16x4_at_0::<false>(
&packed[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
Expand Down Expand Up @@ -518,7 +518,7 @@ mod tests {
unsafe {
super::copy_alpha_packed_u16x4_to_u8_at_0(&packed, &mut rgba_simd, w);
}
scalar::copy_alpha_packed_u16x4_to_u8_at_0(&packed, &mut rgba_scalar, w);
scalar::copy_alpha_packed_u16x4_to_u8_at_0::<false>(&packed, &mut rgba_scalar, w);
assert_eq!(rgba_simd, rgba_scalar, "width={w}");
}
}
Expand All @@ -538,7 +538,7 @@ mod tests {
unsafe {
super::copy_alpha_packed_u16x4_at_0(&packed, &mut rgba_simd, w);
}
scalar::copy_alpha_packed_u16x4_at_0(&packed, &mut rgba_scalar, w);
scalar::copy_alpha_packed_u16x4_at_0::<false>(&packed, &mut rgba_scalar, w);
assert_eq!(rgba_simd, rgba_scalar, "width={w}");
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/row/arch/x86_avx2/alpha_extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ pub(crate) unsafe fn copy_alpha_packed_u16x4_to_u8_at_0(
}

if x < width {
scalar::copy_alpha_packed_u16x4_to_u8_at_0(
scalar::copy_alpha_packed_u16x4_to_u8_at_0::<false>(
&packed[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
Expand Down Expand Up @@ -294,7 +294,7 @@ pub(crate) unsafe fn copy_alpha_packed_u16x4_at_0(
}

if x < width {
scalar::copy_alpha_packed_u16x4_at_0(
scalar::copy_alpha_packed_u16x4_at_0::<false>(
&packed[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
Expand Down Expand Up @@ -636,7 +636,7 @@ mod tests {
pseudo_random_u8(&mut rgba_simd, 0xFEED);
let mut rgba_scalar = rgba_simd.clone();
unsafe { super::copy_alpha_packed_u16x4_to_u8_at_0(&packed, &mut rgba_simd, w) };
scalar::copy_alpha_packed_u16x4_to_u8_at_0(&packed, &mut rgba_scalar, w);
scalar::copy_alpha_packed_u16x4_to_u8_at_0::<false>(&packed, &mut rgba_scalar, w);
assert_eq!(rgba_simd, rgba_scalar, "width={w}");
}
}
Expand All @@ -657,7 +657,7 @@ mod tests {
pseudo_random_u16(&mut rgba_simd, 0x1337);
let mut rgba_scalar = rgba_simd.clone();
unsafe { super::copy_alpha_packed_u16x4_at_0(&packed, &mut rgba_simd, w) };
scalar::copy_alpha_packed_u16x4_at_0(&packed, &mut rgba_scalar, w);
scalar::copy_alpha_packed_u16x4_at_0::<false>(&packed, &mut rgba_scalar, w);
assert_eq!(rgba_simd, rgba_scalar, "width={w}");
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/row/arch/x86_avx512/alpha_extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ pub(crate) unsafe fn copy_alpha_packed_u16x4_to_u8_at_0(
}

if x < width {
scalar::copy_alpha_packed_u16x4_to_u8_at_0(
scalar::copy_alpha_packed_u16x4_to_u8_at_0::<false>(
&packed[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
Expand Down Expand Up @@ -294,7 +294,7 @@ pub(crate) unsafe fn copy_alpha_packed_u16x4_at_0(
}

if x < width {
scalar::copy_alpha_packed_u16x4_at_0(
scalar::copy_alpha_packed_u16x4_at_0::<false>(
&packed[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
Expand Down Expand Up @@ -604,7 +604,7 @@ mod tests {
pseudo_random_u8(&mut rgba_simd, 0xFEED);
let mut rgba_scalar = rgba_simd.clone();
unsafe { super::copy_alpha_packed_u16x4_to_u8_at_0(&packed, &mut rgba_simd, w) };
scalar::copy_alpha_packed_u16x4_to_u8_at_0(&packed, &mut rgba_scalar, w);
scalar::copy_alpha_packed_u16x4_to_u8_at_0::<false>(&packed, &mut rgba_scalar, w);
assert_eq!(rgba_simd, rgba_scalar, "width={w}");
}
}
Expand All @@ -627,7 +627,7 @@ mod tests {
pseudo_random_u16(&mut rgba_simd, 0x1337);
let mut rgba_scalar = rgba_simd.clone();
unsafe { super::copy_alpha_packed_u16x4_at_0(&packed, &mut rgba_simd, w) };
scalar::copy_alpha_packed_u16x4_at_0(&packed, &mut rgba_scalar, w);
scalar::copy_alpha_packed_u16x4_at_0::<false>(&packed, &mut rgba_scalar, w);
assert_eq!(rgba_simd, rgba_scalar, "width={w}");
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/row/arch/x86_sse41/alpha_extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ pub(crate) unsafe fn copy_alpha_packed_u16x4_to_u8_at_0(
}

if x < width {
scalar::copy_alpha_packed_u16x4_to_u8_at_0(
scalar::copy_alpha_packed_u16x4_to_u8_at_0::<false>(
&packed[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
Expand Down Expand Up @@ -227,7 +227,7 @@ pub(crate) unsafe fn copy_alpha_packed_u16x4_at_0(
}

if x < width {
scalar::copy_alpha_packed_u16x4_at_0(
scalar::copy_alpha_packed_u16x4_at_0::<false>(
&packed[x * 4..width * 4],
&mut rgba_out[x * 4..width * 4],
width - x,
Expand Down Expand Up @@ -521,7 +521,7 @@ mod tests {
pseudo_random_u8(&mut rgba_simd, 0xFEED);
let mut rgba_scalar = rgba_simd.clone();
unsafe { super::copy_alpha_packed_u16x4_to_u8_at_0(&packed, &mut rgba_simd, w) };
scalar::copy_alpha_packed_u16x4_to_u8_at_0(&packed, &mut rgba_scalar, w);
scalar::copy_alpha_packed_u16x4_to_u8_at_0::<false>(&packed, &mut rgba_scalar, w);
assert_eq!(rgba_simd, rgba_scalar, "width={w}");
}
}
Expand All @@ -542,7 +542,7 @@ mod tests {
pseudo_random_u16(&mut rgba_simd, 0x1337);
let mut rgba_scalar = rgba_simd.clone();
unsafe { super::copy_alpha_packed_u16x4_at_0(&packed, &mut rgba_simd, w) };
scalar::copy_alpha_packed_u16x4_at_0(&packed, &mut rgba_scalar, w);
scalar::copy_alpha_packed_u16x4_at_0::<false>(&packed, &mut rgba_scalar, w);
assert_eq!(rgba_simd, rgba_scalar, "width={w}");
}
}
Expand Down
Loading
Loading