diff --git a/fearless_simd/src/generated/avx2.rs b/fearless_simd/src/generated/avx2.rs index 7bae56354..425d46a11 100644 --- a/fearless_simd/src/generated/avx2.rs +++ b/fearless_simd/src/generated/avx2.rs @@ -91,7 +91,6 @@ impl Simd for Avx2 { #[inline] fn vectorize R, R>(self, f: F) -> R { #[target_feature(enable = "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,lzcnt,movbe,popcnt,xsave")] - #[inline] unsafe fn vectorize_avx2 R, R>(f: F) -> R { f() } diff --git a/fearless_simd/src/generated/fallback.rs b/fearless_simd/src/generated/fallback.rs index d13424ddf..dcf030769 100644 --- a/fearless_simd/src/generated/fallback.rs +++ b/fearless_simd/src/generated/fallback.rs @@ -148,7 +148,10 @@ impl Simd for Fallback { } #[inline] fn vectorize R, R>(self, f: F) -> R { - f() + fn vectorize_inner R, R>(f: F) -> R { + f() + } + vectorize_inner(f) } #[inline(always)] fn splat_f32x4(self, val: f32) -> f32x4 { diff --git a/fearless_simd/src/generated/neon.rs b/fearless_simd/src/generated/neon.rs index 625ad67c8..d571a637a 100644 --- a/fearless_simd/src/generated/neon.rs +++ b/fearless_simd/src/generated/neon.rs @@ -83,7 +83,6 @@ impl Simd for Neon { #[inline] fn vectorize R, R>(self, f: F) -> R { #[target_feature(enable = "neon")] - #[inline] unsafe fn vectorize_neon R, R>(f: F) -> R { f() } diff --git a/fearless_simd/src/generated/sse4_2.rs b/fearless_simd/src/generated/sse4_2.rs index ca60398fe..cc4efd0df 100644 --- a/fearless_simd/src/generated/sse4_2.rs +++ b/fearless_simd/src/generated/sse4_2.rs @@ -96,7 +96,6 @@ impl Simd for Sse4_2 { #[inline] fn vectorize R, R>(self, f: F) -> R { #[target_feature(enable = "sse4.2,cmpxchg16b,popcnt")] - #[inline] unsafe fn vectorize_sse4_2 R, R>(f: F) -> R { f() } diff --git a/fearless_simd/src/generated/wasm.rs b/fearless_simd/src/generated/wasm.rs index eb893a79c..7730650f6 100644 --- a/fearless_simd/src/generated/wasm.rs +++ b/fearless_simd/src/generated/wasm.rs @@ -82,7 +82,10 @@ impl Simd for WasmSimd128 { } #[inline] fn vectorize R, R>(self, f: F) -> R { - f() + fn vectorize_inner R, R>(f: F) -> R { + f() + } + vectorize_inner(f) } #[inline(always)] fn splat_f32x4(self, val: f32) -> f32x4 { diff --git a/fearless_simd_gen/src/level.rs b/fearless_simd_gen/src/level.rs index 31e0b3fcb..0bfe06234 100644 --- a/fearless_simd_gen/src/level.rs +++ b/fearless_simd_gen/src/level.rs @@ -107,17 +107,26 @@ pub(crate) trait Level { let vectorize_body = if let Some(target_features) = self.enabled_target_features() { let vectorize = format_ident!("vectorize_{}", self.name().to_ascii_lowercase()); quote! { + // This function is deliberately not marked #[inline]: + // The closure passed to it is already required to be #[inline(always)], + // so this wrapper is the only opportunity for the compiler to make inlining decisions. #[target_feature(enable = #target_features)] - #[inline] unsafe fn #vectorize R, R>(f: F) -> R { f() } unsafe { #vectorize(f) } } } else { - // If this SIMD level doesn't do runtime feature detection/enabling, just call the inner function as-is + // This SIMD level doesn't do runtime feature detection/enabling, so we could just call the passed closure as-is. + // + // But the inner function is required to be annotated `#[inline(always)]`, + // so we wrap it in a function that isn't `#[inline(always)]` + // to let the compiler make its own inlining decisions, as opposed to forcing it to inline everything. quote! { - f() + fn vectorize_inner R, R>(f: F) -> R { + f() + } + vectorize_inner(f) } };