Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
5568988
Add initial AVX-512 level; delegates to AVX2 for everything for now
Shnatsel Feb 21, 2026
45b2158
Implement native 512-bit vector support for AVX-512
claude Feb 21, 2026
e1d4668
Use native AVX-512 intrinsics for unary and binary ops
claude Feb 21, 2026
ff6b7b1
Use native AVX-512 intrinsics for select, shift, reinterpret, and wid…
claude Feb 21, 2026
d798461
Use native AVX-512 intrinsics for slide operations
claude Feb 21, 2026
ee2ba62
Use native AVX-512 intrinsics for comparison operations
claude Feb 21, 2026
11fc650
Remove force_generic_op override for x86
claude Feb 21, 2026
db4c255
Bump MSRV to 1.89 to get access to AVX-512 intrinsics
Shnatsel Feb 21, 2026
04b9272
Fix clippy cast_possible_wrap warning
claude Feb 21, 2026
3428f9f
Expand test coverage to include AVX-512
Shnatsel Feb 21, 2026
5846358
Fix AVX-512 bugs in shift, precise conversion, and zip operations
claude Feb 21, 2026
fd41e87
Use permutex2var for AVX-512 zip operations
claude Feb 22, 2026
318c085
Remove unused force_generic_op from Level trait
Shnatsel Feb 22, 2026
4fef749
Unify rounding mode representation between SSE and AVX-512, simplifyi…
Shnatsel Feb 22, 2026
52d5cab
Use VBMI permutex2var for AVX-512 cross-lane byte alignment
Shnatsel Feb 22, 2026
d36a716
Drop weird and unnecessary #[cfg(not(target_feature = avx512f))], pro…
Shnatsel Feb 22, 2026
873edc4
Drop erroneous avx512f cfg from avx2 level detection
Shnatsel Feb 22, 2026
35020b4
Use VBMI permutex2var for AVX-512 8-bit shift truncation
Shnatsel Feb 22, 2026
77dcb88
Remove unused cross_block_alignr_256x2 from AVX-512 generated code
Shnatsel Feb 22, 2026
5d6f416
Fix clippy lints in fearless_simd_gen
Shnatsel Feb 22, 2026
04c9168
Suppress an apparently buggy Clippy lint; surfaced only in `cargo cli…
Shnatsel Feb 22, 2026
aa0254f
appease toml format checker
Shnatsel Feb 22, 2026
daf5112
drop the erroneous 'not(target_feature = avx512f)' cfg's
Shnatsel Feb 22, 2026
487359b
dummy commit to re-run CI and get rid of transient failure
Shnatsel Feb 22, 2026
e1336f4
Fix typo
Shnatsel Feb 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ env:
# version like 1.70. Note that we only specify MAJOR.MINOR and not PATCH so that bugfixes still
# come automatically. If the version specified here is no longer the latest stable version,
# then please feel free to submit a PR that adjusts it along with the potential clippy fixes.
RUST_STABLE_VER: "1.88" # In quotes because otherwise (e.g.) 1.70 would be interpreted as 1.7
RUST_STABLE_VER: "1.89" # In quotes because otherwise (e.g.) 1.70 would be interpreted as 1.7
# The purpose of checking with the minimum supported Rust toolchain is to detect its staleness.
# If the compilation fails, then the version specified here needs to be bumped up to reality.
# Be sure to also update the rust-version property in the workspace Cargo.toml file,
# plus all the README.md files of the affected packages.
RUST_MIN_VER: "1.88"
RUST_MIN_VER: "1.89"
# List of packages that will be checked with the minimum supported Rust version.
# This should be limited to packages that are intended for publishing.
RUST_MIN_VER_PKGS: "-p fearless_simd"
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ license = "Apache-2.0 OR MIT"
repository = "https://github.com/linebender/fearless_simd"
# Keep in sync with RUST_MIN_VER in .github/workflows/ci.yml, with the relevant README.md files
# and with the MSRV in the `Unreleased` section of CHANGELOG.md.
rust-version = "1.88"
rust-version = "1.89"

[workspace.lints]

Expand Down Expand Up @@ -47,7 +47,7 @@ clippy.debug_assert_with_mut_call = "warn"
clippy.doc_markdown = "warn"
clippy.fn_to_numeric_cast_any = "warn"
clippy.infinite_loop = "warn"
clippy.large_stack_arrays = "warn"
clippy.large_stack_arrays = "allow" # appears to be buggy as of 1.93, fixed in nightly. TODO: re-enable
clippy.mismatching_type_param_order = "warn"
clippy.missing_assert_message = "warn"
clippy.missing_fields_in_debug = "warn"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ It benefited from conversations with Luca Versari, though he is not responsible

## Minimum supported Rust Version (MSRV)

This version of Fearless SIMD has been verified to compile with **Rust 1.88** and later.
This version of Fearless SIMD has been verified to compile with **Rust 1.89** and later.

Future versions of Fearless SIMD might increase the Rust version requirement.
It will not be treated as a breaking change and as such can even happen with small patch releases.
Expand Down
2 changes: 1 addition & 1 deletion fearless_simd/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ At least one of `std` and `libm` is required; `std` overrides `libm`.

## Minimum supported Rust Version (MSRV)

This version of Fearless SIMD has been verified to compile with **Rust 1.88** and later.
This version of Fearless SIMD has been verified to compile with **Rust 1.89** and later.

Future versions of Fearless SIMD might increase the Rust version requirement.
It will not be treated as a breaking change and as such can even happen with small patch releases.
Expand Down
21 changes: 21 additions & 0 deletions fearless_simd/src/core_arch/x86/avx512.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright 2025 the Fearless_SIMD Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT

//! Access to AVX-512 intrinsics (Ice Lake feature set).

/// A token for AVX-512 intrinsics (Ice Lake feature set) on `x86` and `x86_64`.
#[derive(Clone, Copy, Debug)]
pub struct Avx512 {
_private: (),
}

impl Avx512 {
/// Create a SIMD token.
///
/// # Safety
///
/// The required CPU features must be available.
pub const unsafe fn new_unchecked() -> Self {
Self { _private: () }
}
}
2 changes: 2 additions & 0 deletions fearless_simd/src/core_arch/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

mod avx;
mod avx2;
mod avx512;
mod fma;
mod sse;
mod sse2;
Expand All @@ -15,6 +16,7 @@ mod ssse3;

pub use avx::Avx;
pub use avx2::Avx2;
pub use avx512::Avx512;
pub use fma::Fma;
pub use sse::Sse;
pub use sse2::Sse2;
Expand Down
4 changes: 4 additions & 0 deletions fearless_simd/src/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod avx2;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod avx512;
mod fallback;
#[cfg(target_arch = "aarch64")]
mod neon;
Expand All @@ -59,6 +61,8 @@ mod wasm;

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub use avx2::*;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub use avx512::*;
pub use fallback::*;
#[cfg(target_arch = "aarch64")]
pub use neon::*;
Expand Down
14 changes: 7 additions & 7 deletions fearless_simd/src/generated/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8645,6 +8645,13 @@ unsafe fn cross_block_alignr_one(
};
unsafe { dyn_alignr_256(hi_blocks, lo_blocks, intra_shift) }
}
#[doc = r" Concatenates `b` and `a` (each 1 x __m256i = 2 blocks) and extracts 2 blocks starting at byte offset"]
#[doc = r" `shift_bytes`. Extracts from [b : a] (b in low bytes, a in high bytes), matching alignr semantics."]
#[inline(always)]
unsafe fn cross_block_alignr_256x1(a: __m256i, b: __m256i, shift_bytes: usize) -> __m256i {
let regs = [b, a];
unsafe { cross_block_alignr_one(&regs, 0, shift_bytes) }
}
#[doc = r" Concatenates `b` and `a` (each 2 x __m256i = 4 blocks) and extracts 4 blocks starting at byte offset"]
#[doc = r" `shift_bytes`. Extracts from [b : a] (b in low bytes, a in high bytes), matching alignr semantics."]
#[inline(always)]
Expand All @@ -8661,10 +8668,3 @@ unsafe fn cross_block_alignr_256x2(
]
}
}
#[doc = r" Concatenates `b` and `a` (each 1 x __m256i = 2 blocks) and extracts 2 blocks starting at byte offset"]
#[doc = r" `shift_bytes`. Extracts from [b : a] (b in low bytes, a in high bytes), matching alignr semantics."]
#[inline(always)]
unsafe fn cross_block_alignr_256x1(a: __m256i, b: __m256i, shift_bytes: usize) -> __m256i {
let regs = [b, a];
unsafe { cross_block_alignr_one(&regs, 0, shift_bytes) }
}
Loading