From e45eaffce5d5e8abfd20ffc1cc0fea56c4034265 Mon Sep 17 00:00:00 2001 From: "Sergey \"Shnatsel\" Davidoff" Date: Thu, 22 Jan 2026 17:30:14 +0000 Subject: [PATCH 1/7] drop #[inline] on vectorize from the generator --- fearless_simd_gen/src/level.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/fearless_simd_gen/src/level.rs b/fearless_simd_gen/src/level.rs index 7043a5784..f091b9a9a 100644 --- a/fearless_simd_gen/src/level.rs +++ b/fearless_simd_gen/src/level.rs @@ -102,7 +102,6 @@ pub(crate) trait Level { let vectorize = format_ident!("vectorize_{}", self.name().to_ascii_lowercase()); quote! { #[target_feature(enable = #target_features)] - #[inline] unsafe fn #vectorize R, R>(f: F) -> R { f() } @@ -149,7 +148,6 @@ pub(crate) trait Level { #level_body } - #[inline] fn vectorize R, R>(self, f: F) -> R { #vectorize_body } From 4e4c3679ac5891f74e260de2f549187749a5cd0c Mon Sep 17 00:00:00 2001 From: "Sergey \"Shnatsel\" Davidoff" Date: Thu, 22 Jan 2026 17:30:35 +0000 Subject: [PATCH 2/7] re-run the generator --- fearless_simd/src/generated/avx2.rs | 2 -- fearless_simd/src/generated/fallback.rs | 1 - fearless_simd/src/generated/neon.rs | 2 -- fearless_simd/src/generated/sse4_2.rs | 2 -- fearless_simd/src/generated/wasm.rs | 1 - 5 files changed, 8 deletions(-) diff --git a/fearless_simd/src/generated/avx2.rs b/fearless_simd/src/generated/avx2.rs index f09e03d07..f263c0cad 100644 --- a/fearless_simd/src/generated/avx2.rs +++ b/fearless_simd/src/generated/avx2.rs @@ -88,10 +88,8 @@ impl Simd for Avx2 { fn level(self) -> Level { Level::Avx2(self) } - #[inline] fn vectorize R, R>(self, f: F) -> R { #[target_feature(enable = "avx2,fma")] - #[inline] unsafe fn vectorize_avx2 R, R>(f: F) -> R { f() } diff --git a/fearless_simd/src/generated/fallback.rs b/fearless_simd/src/generated/fallback.rs index c2f39a1bd..f093eae47 100644 --- a/fearless_simd/src/generated/fallback.rs +++ b/fearless_simd/src/generated/fallback.rs @@ -146,7 +146,6 @@ impl Simd for Fallback { #[cfg(not(feature = "force_support_fallback"))] Level::baseline() } - #[inline] fn vectorize R, R>(self, f: F) -> R { f() } diff --git a/fearless_simd/src/generated/neon.rs b/fearless_simd/src/generated/neon.rs index def528dde..db245e975 100644 --- a/fearless_simd/src/generated/neon.rs +++ b/fearless_simd/src/generated/neon.rs @@ -80,10 +80,8 @@ impl Simd for Neon { fn level(self) -> Level { Level::Neon(self) } - #[inline] fn vectorize R, R>(self, f: F) -> R { #[target_feature(enable = "neon")] - #[inline] unsafe fn vectorize_neon R, R>(f: F) -> R { f() } diff --git a/fearless_simd/src/generated/sse4_2.rs b/fearless_simd/src/generated/sse4_2.rs index a9f7c1ad4..b3fbb88ca 100644 --- a/fearless_simd/src/generated/sse4_2.rs +++ b/fearless_simd/src/generated/sse4_2.rs @@ -93,10 +93,8 @@ impl Simd for Sse4_2 { Level::baseline() } } - #[inline] fn vectorize R, R>(self, f: F) -> R { #[target_feature(enable = "sse4.2")] - #[inline] unsafe fn vectorize_sse4_2 R, R>(f: F) -> R { f() } diff --git a/fearless_simd/src/generated/wasm.rs b/fearless_simd/src/generated/wasm.rs index 59c94768f..121d81170 100644 --- a/fearless_simd/src/generated/wasm.rs +++ b/fearless_simd/src/generated/wasm.rs @@ -80,7 +80,6 @@ impl Simd for WasmSimd128 { fn level(self) -> Level { Level::WasmSimd128(self) } - #[inline] fn vectorize R, R>(self, f: F) -> R { f() } From 5916f09607b27366a2585a5ef76b4106c0c7e4a8 Mon Sep 17 00:00:00 2001 From: "Sergey \"Shnatsel\" Davidoff" Date: Fri, 23 Jan 2026 17:04:14 +0000 Subject: [PATCH 3/7] Reinstate the #[inline] attribute on the outer vectorize() in the generator --- fearless_simd_gen/src/level.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/fearless_simd_gen/src/level.rs b/fearless_simd_gen/src/level.rs index f091b9a9a..070b99cb7 100644 --- a/fearless_simd_gen/src/level.rs +++ b/fearless_simd_gen/src/level.rs @@ -148,6 +148,7 @@ pub(crate) trait Level { #level_body } + #[inline] fn vectorize R, R>(self, f: F) -> R { #vectorize_body } From c8050d23c452a823e846644590f0ba5f9c82a5fd Mon Sep 17 00:00:00 2001 From: "Sergey \"Shnatsel\" Davidoff" Date: Fri, 23 Jan 2026 17:04:32 +0000 Subject: [PATCH 4/7] Re-run the generator --- fearless_simd/src/generated/avx2.rs | 1 + fearless_simd/src/generated/fallback.rs | 1 + fearless_simd/src/generated/neon.rs | 1 + fearless_simd/src/generated/sse4_2.rs | 1 + fearless_simd/src/generated/wasm.rs | 1 + 5 files changed, 5 insertions(+) diff --git a/fearless_simd/src/generated/avx2.rs b/fearless_simd/src/generated/avx2.rs index f263c0cad..36884c7c2 100644 --- a/fearless_simd/src/generated/avx2.rs +++ b/fearless_simd/src/generated/avx2.rs @@ -88,6 +88,7 @@ impl Simd for Avx2 { fn level(self) -> Level { Level::Avx2(self) } + #[inline] fn vectorize R, R>(self, f: F) -> R { #[target_feature(enable = "avx2,fma")] unsafe fn vectorize_avx2 R, R>(f: F) -> R { diff --git a/fearless_simd/src/generated/fallback.rs b/fearless_simd/src/generated/fallback.rs index f093eae47..c2f39a1bd 100644 --- a/fearless_simd/src/generated/fallback.rs +++ b/fearless_simd/src/generated/fallback.rs @@ -146,6 +146,7 @@ impl Simd for Fallback { #[cfg(not(feature = "force_support_fallback"))] Level::baseline() } + #[inline] fn vectorize R, R>(self, f: F) -> R { f() } diff --git a/fearless_simd/src/generated/neon.rs b/fearless_simd/src/generated/neon.rs index db245e975..cd795d72e 100644 --- a/fearless_simd/src/generated/neon.rs +++ b/fearless_simd/src/generated/neon.rs @@ -80,6 +80,7 @@ impl Simd for Neon { fn level(self) -> Level { Level::Neon(self) } + #[inline] fn vectorize R, R>(self, f: F) -> R { #[target_feature(enable = "neon")] unsafe fn vectorize_neon R, R>(f: F) -> R { diff --git a/fearless_simd/src/generated/sse4_2.rs b/fearless_simd/src/generated/sse4_2.rs index b3fbb88ca..c18883073 100644 --- a/fearless_simd/src/generated/sse4_2.rs +++ b/fearless_simd/src/generated/sse4_2.rs @@ -93,6 +93,7 @@ impl Simd for Sse4_2 { Level::baseline() } } + #[inline] fn vectorize R, R>(self, f: F) -> R { #[target_feature(enable = "sse4.2")] unsafe fn vectorize_sse4_2 R, R>(f: F) -> R { diff --git a/fearless_simd/src/generated/wasm.rs b/fearless_simd/src/generated/wasm.rs index 121d81170..59c94768f 100644 --- a/fearless_simd/src/generated/wasm.rs +++ b/fearless_simd/src/generated/wasm.rs @@ -80,6 +80,7 @@ impl Simd for WasmSimd128 { fn level(self) -> Level { Level::WasmSimd128(self) } + #[inline] fn vectorize R, R>(self, f: F) -> R { f() } From 91e239676c599c6fff85ab5d7e6aa87cd6040f1b Mon Sep 17 00:00:00 2001 From: "Sergey \"Shnatsel\" Davidoff" Date: Mon, 26 Jan 2026 00:04:49 +0000 Subject: [PATCH 5/7] Add a comment documenting the rationale for the lack of #[inline] --- fearless_simd_gen/src/level.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fearless_simd_gen/src/level.rs b/fearless_simd_gen/src/level.rs index 070b99cb7..d4674a039 100644 --- a/fearless_simd_gen/src/level.rs +++ b/fearless_simd_gen/src/level.rs @@ -101,6 +101,9 @@ pub(crate) trait Level { let vectorize_body = if let Some(target_features) = self.enabled_target_features() { let vectorize = format_ident!("vectorize_{}", self.name().to_ascii_lowercase()); quote! { + // This function is deliberately not marked #[inline]: + // The closure passed to it is already required to be #[inline(always)], + // so this wrapper is the only opportunity for the compiler to make inlining decisions. #[target_feature(enable = #target_features)] unsafe fn #vectorize R, R>(f: F) -> R { f() From ef5c9037fa5438749b48de06c4f129fadecc1d4a Mon Sep 17 00:00:00 2001 From: "Sergey \"Shnatsel\" Davidoff" Date: Mon, 26 Jan 2026 00:32:33 +0000 Subject: [PATCH 6/7] Add inlining barrier for the non-dynamically-dispatched codepath --- fearless_simd_gen/src/level.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fearless_simd_gen/src/level.rs b/fearless_simd_gen/src/level.rs index d4674a039..c26f1f4de 100644 --- a/fearless_simd_gen/src/level.rs +++ b/fearless_simd_gen/src/level.rs @@ -111,9 +111,16 @@ pub(crate) trait Level { unsafe { #vectorize(f) } } } else { - // If this SIMD level doesn't do runtime feature detection/enabling, just call the inner function as-is + // This SIMD level doesn't do runtime feature detection/enabling, so we could just call the passed closure as-is. + // + // But the inner function is required to be annotated `#[inline(always)]`, + // so we wrap it in a function that isn't `#[inline(always)]` + // to let the compiler make its own inlining decisions, as opposed to forcing it to inline everything. quote! { - f() + fn vectorize_inner R, R>(f: F) -> R { + f() + } + vectorize_inner(f) } }; From 3f1c00f8104687f19aa6bef7f6f95c77a5a4df5a Mon Sep 17 00:00:00 2001 From: "Sergey \"Shnatsel\" Davidoff" Date: Mon, 26 Jan 2026 00:32:59 +0000 Subject: [PATCH 7/7] re-run the generator --- fearless_simd/src/generated/fallback.rs | 5 ++++- fearless_simd/src/generated/wasm.rs | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fearless_simd/src/generated/fallback.rs b/fearless_simd/src/generated/fallback.rs index c2f39a1bd..78c0f121d 100644 --- a/fearless_simd/src/generated/fallback.rs +++ b/fearless_simd/src/generated/fallback.rs @@ -148,7 +148,10 @@ impl Simd for Fallback { } #[inline] fn vectorize R, R>(self, f: F) -> R { - f() + fn vectorize_inner R, R>(f: F) -> R { + f() + } + vectorize_inner(f) } #[inline(always)] fn splat_f32x4(self, val: f32) -> f32x4 { diff --git a/fearless_simd/src/generated/wasm.rs b/fearless_simd/src/generated/wasm.rs index 59c94768f..ef15b5c88 100644 --- a/fearless_simd/src/generated/wasm.rs +++ b/fearless_simd/src/generated/wasm.rs @@ -82,7 +82,10 @@ impl Simd for WasmSimd128 { } #[inline] fn vectorize R, R>(self, f: F) -> R { - f() + fn vectorize_inner R, R>(f: F) -> R { + f() + } + vectorize_inner(f) } #[inline(always)] fn splat_f32x4(self, val: f32) -> f32x4 {