From e9f5a5b4f290ec8e50f91c23156fe3fc3ee359ab Mon Sep 17 00:00:00 2001 From: jinjiwu <3426377882@qq.com> Date: Sat, 22 Jan 2022 13:59:16 +0800 Subject: [PATCH] tbb --- main.cpp | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 4 deletions(-) diff --git a/main.cpp b/main.cpp index a1d2625..f48f77d 100644 --- a/main.cpp +++ b/main.cpp @@ -4,6 +4,10 @@ #include #include #include +#include +#include +#include + #include "ticktock.h" // TODO: 并行化所有这些 for 循环 @@ -15,6 +19,20 @@ std::vector fill(std::vector &arr, Func const &func) { arr[i] = func(i); } TOCK(fill); + + return arr; +} + +template +std::vector fill_parallel(std::vector &arr, Func const &func) { + TICK(fill_parallel); + tbb::parallel_for(tbb::blocked_range(0, arr.size()), [&](tbb::blocked_range r) { + for (size_t i = r.begin(); i < r.end(); i++) { + arr[i] = func(i); + } + }); + TOCK(fill_parallel); + return arr; } @@ -22,11 +40,22 @@ template void saxpy(T a, std::vector &x, std::vector const &y) { TICK(saxpy); for (size_t i = 0; i < x.size(); i++) { - x[i] = a * x[i] + y[i]; + x[i] = a * x[i] + y[i]; } TOCK(saxpy); } +template +void saxpy_parallel(T a, std::vector &x, std::vector const &y) { + TICK(saxpy_parallel); + tbb::parallel_for(tbb::blocked_range(0, x.size()), [&](tbb::blocked_range r) { + for (size_t i = r.begin(); i < r.end(); i++) { + x[i] = a * x[i] + y[i]; + } + }); + TOCK(saxpy_parallel); +} + template T sqrtdot(std::vector const &x, std::vector const &y) { TICK(sqrtdot); @@ -39,6 +68,23 @@ T sqrtdot(std::vector const &x, std::vector const &y) { return ret; } +template +T sqrtdot_parallel(std::vector const &x, std::vector const &y) { + TICK(sqrtdot_parallel); + float ret = tbb::parallel_reduce( + tbb::blocked_range(0, x.size()), (T)0, + [&](tbb::blocked_range r, float local_res) { + for (size_t i = r.begin(); i < r.end(); i++) { + local_res += x[i] * y[i]; + } + return local_res; + }, + [](float x, float y) { return x + y; }); + ret = std::sqrt(ret); + TOCK(sqrtdot_parallel); + return ret; +} + template T minvalue(std::vector const &x) { TICK(minvalue); @@ -51,6 +97,31 @@ T minvalue(std::vector const &x) { return ret; } +template +T minvalue_parallel(std::vector const &x) { + TICK(minvalue_parallel); + float ret = tbb::parallel_reduce( + tbb::blocked_range(0, x.size()), (T)0, + [&](tbb::blocked_range r, float local_res) { + local_res = x[r.begin()]; + for (size_t i = r.begin(); i < r.end(); i++) { + if (x[i] < local_res) { + local_res = x[i]; + } + } + return local_res; + }, + [](float x, float y) { + if (x < y) { + return x; + } else { + return y; + }; + }); + TOCK(minvalue_parallel); + return ret; +} + template std::vector magicfilter(std::vector const &x, std::vector const &y) { TICK(magicfilter); @@ -67,6 +138,32 @@ std::vector magicfilter(std::vector const &x, std::vector const &y) { return res; } +template +std::vector magicfilter_parallel(std::vector const &x, std::vector const &y) { + TICK(magicfilter_parallel); + std::mutex mtx; + // tbb::spin_mutex mtx; + std::vector res; + size_t n = std::min(x.size(), y.size()); + res.reserve(n); + tbb::parallel_for(tbb::blocked_range(0, n), [&](tbb::blocked_range r) { + std::vector local_a; + local_a.reserve(r.size()); + for (size_t i = r.begin(); i < r.end(); i++) { + if (x[i] > y[i]) { + local_a.push_back(x[i]); + } else if (y[i] > x[i] && y[i] > 0.5f) { + local_a.push_back(y[i]); + local_a.push_back(x[i] * y[i]); + } + } + std::lock_guard grd{mtx}; + std::copy(local_a.begin(), local_a.end(), std::back_inserter(res)); + }); + TOCK(magicfilter_parallel); + return res; +} + template T scanner(std::vector &x) { TICK(scanner); @@ -79,24 +176,57 @@ T scanner(std::vector &x) { return ret; } +template +T scanner_patallel(std::vector &x) { + TICK(scanner_patallel); + size_t n = x.size(); + auto ret = tbb::parallel_scan( + tbb::blocked_range(0, n), (T)0, + [&](tbb::blocked_range r, size_t local_res, auto is_final) { + for (auto i = r.begin(); i < r.end(); i++) { + local_res += x[i]; + if (is_final) + x[i] = local_res; + } + return local_res; + }, + [](size_t x, size_t y) { return x + y; }); + TOCK(scanner_patallel); + return ret; +} + int main() { - size_t n = 1<<26; + size_t n = 1 << 26; std::vector x(n); std::vector y(n); + std::vector z(n); + std::vector w(n); - fill(x, [&] (size_t i) { return std::sin(i); }); - fill(y, [&] (size_t i) { return std::cos(i); }); + fill(x, [&](size_t i) { return std::sin(i); }); + fill(y, [&](size_t i) { return std::cos(i); }); + + fill_parallel(z, [&](size_t i) { return std::sin(i); }); + fill_parallel(w, [&](size_t i) { return std::cos(i); }); saxpy(0.5f, x, y); + saxpy_parallel(0.5f, z, w); std::cout << sqrtdot(x, y) << std::endl; + std::cout << sqrtdot_parallel(x, y) << std::endl; + std::cout << minvalue(x) << std::endl; + std::cout << minvalue_parallel(x) << std::endl; auto arr = magicfilter(x, y); std::cout << arr.size() << std::endl; + auto arr_p = magicfilter_parallel(x, y); + std::cout << arr_p.size() << std::endl; scanner(x); std::cout << std::reduce(x.begin(), x.end()) << std::endl; + auto k = x; + scanner_patallel(y); + std::cout << std::reduce(k.begin(), k.end()) << std::endl; return 0; }