Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions src/invoking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,21 @@ macro_rules! simd_compiletime_select {
}
};
($(#[$meta:meta])* $vis:vis fn $fn_name:ident ($($arg:ident:$typ:ty),* $(,)? ) $body:block ) => {
simd_runtime_generate!($(#[$meta])* $vis fn $fn_name ($($arg:$typ),*) -> () $body);
simd_compiletime_select!($(#[$meta])* $vis fn $fn_name ($($arg:$typ),*) -> () $body);
};
}

#[macro_export]
macro_rules! simd_unsafe_generate_all {
($(#[$meta:meta])* $vis:vis fn $fn_name:ident $(<$($lt:lifetime),+>)? ($($arg:ident:$typ:ty),* $(,)? ) -> $rt:ty $body:block ) => {
simdeez_paste_item! {
$(#[$meta])*
#[inline(always)]
$vis fn $fn_name $(<$($lt),+>)?($($arg:$typ,)*) -> $rt {
let args_tuple = ($($arg,)*);
__run_simd_runtime_decide::<[<__ $fn_name _dispatch_struct>], fix_tuple_type!(($($typ),*)), $rt>(args_tuple)
}

$(#[$meta])*
#[inline(always)]
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
Expand Down Expand Up @@ -159,7 +166,7 @@ macro_rules! simd_unsafe_generate_all {
}
};
($(#[$meta:meta])* $vis:vis fn $fn_name:ident ($($arg:ident:$typ:ty),* $(,)? ) $body:block ) => {
simd_runtime_generate!($(#[$meta])* $vis fn $fn_name ($($arg:$typ),*) -> () $body);
simd_unsafe_generate_all!($(#[$meta])* $vis fn $fn_name ($($arg:$typ),*) -> () $body);
};
}

Expand Down
22 changes: 5 additions & 17 deletions src/ops/f64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -654,35 +654,23 @@ impl_op! {
fn horizontal_add<f64> {
for Avx2(a: __m256d) -> f64 {
let a = _mm256_hadd_pd(a, a);
let b = _mm256_hadd_pd(a, a);

let first = _mm_cvtsd_f64(_mm256_extractf128_pd(b, 0));
let second = _mm_cvtsd_f64(_mm256_extractf128_pd(b, 1));

let first = _mm_cvtsd_f64(_mm256_extractf128_pd(a, 0));
let second = _mm_cvtsd_f64(_mm256_extractf128_pd(a, 1));
first + second
}
for Sse41(a: __m128d) -> f64 {
let a = _mm_hadd_pd(a, a);

let first = _mm_cvtsd_f64(a);
let second = _mm_cvtsd_f64(_mm_shuffle_pd(a, a, 1));

first + second
_mm_cvtsd_f64(_mm_hadd_pd(a, a))
}
for Sse2(a: __m128d) -> f64 {
let a = _mm_add_pd(a, _mm_shuffle_pd(a, a, 1));

let first = _mm_cvtsd_f64(a);
let second = _mm_cvtsd_f64(_mm_shuffle_pd(a, a, 1));

first + second
_mm_cvtsd_f64(a)
}
for Scalar(a: f64) -> f64 {
a
}
for Neon(a: float64x2_t) -> f64 {
let a = vpaddq_f64(a, a);
vgetq_lane_f64(a, 0) + vgetq_lane_f64(a, 1)
vgetq_lane_f64(a, 0)
}
for Wasm(a: v128) -> f64 {
let l0 = f64x2_extract_lane::<0>(a);
Expand Down
14 changes: 10 additions & 4 deletions src/tests/lib/numbers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,13 @@ impl ScalarNumber for f32 {
match precision {
EqPrecision::Exact => self == other,
EqPrecision::Almost { figs } => {
let epsilon = 10.0f32.powi(-(figs as i32));
if (self - other).abs() < epsilon {
return true;
}
let bigger = self.max(other);
let norm_diff = (self / bigger) - (other / bigger);
let epsilon = 10.0f32.powi(-(figs as i32));
norm_diff < epsilon
norm_diff.abs() < epsilon
}
}
}
Expand Down Expand Up @@ -189,10 +192,13 @@ impl ScalarNumber for f64 {
match precision {
EqPrecision::Exact => self == other,
EqPrecision::Almost { figs } => {
let epsilon = 10.0f64.powi(-(figs as i32));
if (self - other).abs() < epsilon {
return true;
}
let bigger = self.max(other);
let norm_diff = (self / bigger) - (other / bigger);
let epsilon = 10.0f64.powi(-(figs as i32));
norm_diff < epsilon
norm_diff.abs() < epsilon
}
}
}
Expand Down
Loading