diff --git a/after.txt b/after.txt new file mode 100644 index 0000000..0df39b0 --- /dev/null +++ b/after.txt @@ -0,0 +1,11 @@ +fill: 0.130924s +fill: 0.132913s +saxpy: 0.0101733s +sqrtdot: 0.0241576s +5792.62 +minvalue: 0.0222686s +-1.11803 +magicfilter: 0.115075s +55924034 +scanner: 0.0292774s +0 diff --git a/main.cpp b/main.cpp index a1d2625..0faeca3 100644 --- a/main.cpp +++ b/main.cpp @@ -5,15 +5,25 @@ #include #include #include "ticktock.h" +#include +#include +#include +#include "pod.h" // TODO: 并行化所有这些 for 循环 template std::vector fill(std::vector &arr, Func const &func) { TICK(fill); - for (size_t i = 0; i < arr.size(); i++) { - arr[i] = func(i); - } + tbb::task_arena ta(8); + ta.execute([&]{ + tbb::parallel_for(tbb::blocked_range(0,arr.size()),[&](tbb::blocked_range r){ + for (size_t i = r.begin(); i !=r.end(); i++) { + arr[i] = func(i); + } + }); + }); + TOCK(fill); return arr; } @@ -21,9 +31,15 @@ std::vector fill(std::vector &arr, Func const &func) { template void saxpy(T a, std::vector &x, std::vector const &y) { TICK(saxpy); - for (size_t i = 0; i < x.size(); i++) { - x[i] = a * x[i] + y[i]; - } + tbb::task_arena ta(8); + ta.execute([&]{ + tbb::parallel_for(tbb::blocked_range(0,x.size()),[&](tbb::blocked_range r){ + for (size_t i = r.begin(); i !=r.end(); i++) { + x[i] = a * x[i] + y[i]; + } + }); + }); + TOCK(saxpy); } @@ -31,9 +47,19 @@ template T sqrtdot(std::vector const &x, std::vector const &y) { TICK(sqrtdot); T ret = 0; - for (size_t i = 0; i < std::min(x.size(), y.size()); i++) { - ret += x[i] * y[i]; - } + tbb::task_arena ta(8); + ta.execute([&]{ + ret=tbb::parallel_reduce(tbb::blocked_range(0,std::min(x.size(), y.size())),(T )0, + [&](tbb::blocked_range r, T local_res){ + for (size_t i = r.begin(); i != r.end(); i++) { + local_res += x[i] * y[i]; + } + return local_res; + }, + [](T x,T y){ + return x+y; + }); + }); ret = std::sqrt(ret); TOCK(sqrtdot); return ret; @@ -43,26 +69,57 @@ template T minvalue(std::vector const &x) { TICK(minvalue); T ret = x[0]; - for (size_t i = 1; i < x.size(); i++) { - if (x[i] < ret) - ret = x[i]; - } + tbb::task_arena ta(8); + ta.execute([&]{ + ret=tbb::parallel_reduce(tbb::blocked_range(0, x.size()),x[0], + [&](tbb::blocked_range r, T local_res){ + for (size_t i = r.begin()+1; i != r.end(); i++) { + if (x[i] < local_res) + local_res = x[i]; + } + return local_res; + }, + [](T x,T y){ + return std::min(x,y); + }); + }); + TOCK(minvalue); return ret; } template -std::vector magicfilter(std::vector const &x, std::vector const &y) { +auto magicfilter(std::vector const &x, std::vector const &y) { TICK(magicfilter); - std::vector res; - for (size_t i = 0; i < std::min(x.size(), y.size()); i++) { - if (x[i] > y[i]) { - res.push_back(x[i]); - } else if (y[i] > x[i] && y[i] > 0.5f) { - res.push_back(y[i]); - res.push_back(x[i] * y[i]); - } - } + std::vector> res; + + std::atomic a_size=0; + size_t n = std::min(x.size(), y.size()); + res.resize(n); + + tbb::task_arena ta(8); + ta.execute([&]{ + tbb::parallel_for(tbb::blocked_range(0,n), + [&](tbb::blocked_range r){ + std::vector> la(r.size()); + size_t la_idx=0; + for (size_t i = r.begin(); i < r.end(); i++) { + if (x[i] > y[i]) { + la[la_idx++]=x[i]; + } else if (y[i] > x[i] && y[i] > 0.5f) { + la[la_idx++]=y[i]; + la[la_idx++]=x[i] * y[i]; + } + } + + size_t base=a_size.fetch_add(la_idx); + for (size_t i = 0; i < la_idx; ++i) { + res[base+i]=la[i]; + } + }); + }); + + res.resize(a_size); TOCK(magicfilter); return res; } @@ -70,11 +127,31 @@ std::vector magicfilter(std::vector const &x, std::vector const &y) { template T scanner(std::vector &x) { TICK(scanner); - T ret = 0; - for (size_t i = 0; i < x.size(); i++) { - ret += x[i]; - x[i] = ret; - } + T ret=0; + tbb::task_arena ta(8); + ta.execute([&]{ + ret = tbb::parallel_scan(tbb::blocked_range(0,x.size()),(T)0, + [&](tbb::blocked_range r,T local_res,auto is_final){ + for (size_t i = r.begin(); i < r.end(); i++) { + local_res += x[i]; + if(is_final){ + x[i] = ret; + } + } + + return local_res; + }, + [](T x,T y){ + return x+y; + }); + }); + + +// T ret=0; +// for (size_t i = 0; i < x.size(); i++) { +// ret += x[i]; +// x[i] = ret; +// } TOCK(scanner); return ret; }