From 44401c54224f76991c3ed627f1b091f43b5f3fda Mon Sep 17 00:00:00 2001 From: Leo-AO-99 <1835702163@qq.com> Date: Sat, 11 Feb 2023 15:21:31 +0800 Subject: [PATCH] hw06 done --- main.cpp | 132 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 105 insertions(+), 27 deletions(-) diff --git a/main.cpp b/main.cpp index a1d2625..451eca7 100644 --- a/main.cpp +++ b/main.cpp @@ -6,14 +6,49 @@ #include #include "ticktock.h" +#include +#include +#include +#include +#include "pod.h" + // TODO: 并行化所有这些 for 循环 +// origin: +// fill: 0.713545s +// fill: 0.726485s +// saxpy: 0.0369504s +// sqrtdot: 0.0733301s +// 5165.4 +// minvalue: 0.0715443s +// -1.11803 +// magicfilter: 0.190107s +// 55924034 +// scanner: 0.0710648s +// 6.18926e+07 + +// parallel: +// fill: 0.106531s +// fill: 0.109878s +// saxpy: 0.0345495s +// sqrtdot: 0.0193823s +// 5792.61 +// minvalue: 0.0108345s +// -1.11803 +// magicfilter: 0.126976s +// 55924034 +// scanner: 0.0371496s +// 6.19079e+07 + template std::vector fill(std::vector &arr, Func const &func) { TICK(fill); - for (size_t i = 0; i < arr.size(); i++) { - arr[i] = func(i); - } + tbb::parallel_for( + tbb::blocked_range(0, arr.size()), + [&](tbb::blocked_range r) { + for (size_t i = r.begin(); i < r.end(); ++i) + arr[i] = func(i); + }); TOCK(fill); return arr; } @@ -21,9 +56,12 @@ std::vector fill(std::vector &arr, Func const &func) { template void saxpy(T a, std::vector &x, std::vector const &y) { TICK(saxpy); - for (size_t i = 0; i < x.size(); i++) { - x[i] = a * x[i] + y[i]; - } + tbb::parallel_for( + tbb::blocked_range(0, x.size()), + [&](tbb::blocked_range r) { + for (size_t i = r.begin(); i < r.end(); ++i) + x[i] = a * x[i] + y[i]; + }); TOCK(saxpy); } @@ -31,9 +69,17 @@ template T sqrtdot(std::vector const &x, std::vector const &y) { TICK(sqrtdot); T ret = 0; - for (size_t i = 0; i < std::min(x.size(), y.size()); i++) { - ret += x[i] * y[i]; - } + ret = tbb::parallel_reduce( + tbb::blocked_range(0, std::min(x.size(), y.size())), + static_cast(0), + [&](tbb::blocked_range r, T temp_sum) { + for (size_t i = r.begin(); i < r.end(); ++i) + temp_sum += x[i] * y[i]; + return temp_sum; + }, + [](T a, T b) { + return a + b; + }); ret = std::sqrt(ret); TOCK(sqrtdot); return ret; @@ -43,26 +89,47 @@ template T minvalue(std::vector const &x) { TICK(minvalue); T ret = x[0]; - for (size_t i = 1; i < x.size(); i++) { - if (x[i] < ret) - ret = x[i]; - } + ret = tbb::parallel_reduce( + tbb::blocked_range(1, x.size()), + x[0], + [&](tbb::blocked_range r, T temp_min) { + for ( size_t i = r.begin(); i < r.end(); ++i) + if (temp_min > x[i]) + temp_min = x[i]; + return temp_min; + }, + [](T a, T b) { + return std::min(a, b); + } + ); TOCK(minvalue); - return ret; + return ret; } template -std::vector magicfilter(std::vector const &x, std::vector const &y) { +std::vector> magicfilter(std::vector const &x, std::vector const &y) { TICK(magicfilter); - std::vector res; - for (size_t i = 0; i < std::min(x.size(), y.size()); i++) { - if (x[i] > y[i]) { - res.push_back(x[i]); - } else if (y[i] > x[i] && y[i] > 0.5f) { - res.push_back(y[i]); - res.push_back(x[i] * y[i]); + std::vector> res; + res.reserve(2 * std::min(x.size(), y.size())); + std::mutex mtx; + tbb::parallel_for( + tbb::blocked_range(0, std::min(x.size(), y.size())), + [&](tbb::blocked_range r) { + std::vector> cache; + cache.reserve(2 * r.size()); + for (size_t i = r.begin(); i < r.end(); ++i) { + if (x[i] > y[i]) + cache.push_back(x[i]); + else if (y[i] > x[i] && y[i] > 0.5f) { + cache.push_back(y[i]); + cache.push_back(x[i] * y[i]); + } + } + std::lock_guard lck(mtx); + std::copy(cache.begin(), cache.end(), std::back_inserter(res)); } - } + ); + TOCK(magicfilter); return res; } @@ -71,10 +138,21 @@ template T scanner(std::vector &x) { TICK(scanner); T ret = 0; - for (size_t i = 0; i < x.size(); i++) { - ret += x[i]; - x[i] = ret; - } + ret = tbb::parallel_scan( + tbb::blocked_range(0, x.size()), + static_cast(0), + [&] (tbb::blocked_range r, T temp_sum, auto is_final) { + for (size_t i = r.begin(); i < r.end(); i++) { + temp_sum += x[i]; + if (is_final) + x[i] = temp_sum; + } + return temp_sum; + }, + [] (T a, T b) { + return a + b; + } + ); TOCK(scanner); return ret; }