diff --git a/main.cpp b/main.cpp index a1d2625..af0013f 100644 --- a/main.cpp +++ b/main.cpp @@ -4,36 +4,67 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include #include "ticktock.h" +#include "pod.h" // TODO: 并行化所有这些 for 循环 template std::vector fill(std::vector &arr, Func const &func) { TICK(fill); - for (size_t i = 0; i < arr.size(); i++) { - arr[i] = func(i); - } + //tbb::task_arena ta(4); + //ta.execute([&] { + tbb::parallel_for(tbb::blocked_range(0, arr.size()), + [&](tbb::blocked_range r){ + for(size_t i=std::begin(r);i +// std::vector fill(std::vector &arr, Func const &func) { +// TICK(fill); +// for(size_t i=0;i void saxpy(T a, std::vector &x, std::vector const &y) { TICK(saxpy); - for (size_t i = 0; i < x.size(); i++) { - x[i] = a * x[i] + y[i]; - } + tbb::parallel_for(tbb::blocked_range(0, x.size()), + [&](tbb::blocked_range r) { + for (size_t i = std::begin(r); i < std::end(r); i++) + x[i] = a*x[i]+y[i]; + }); TOCK(saxpy); } template T sqrtdot(std::vector const &x, std::vector const &y) { TICK(sqrtdot); - T ret = 0; - for (size_t i = 0; i < std::min(x.size(), y.size()); i++) { - ret += x[i] * y[i]; - } + T ret = tbb::parallel_reduce(tbb::blocked_range(0, std::min(x.size(), y.size())), (T)0, [&](tbb::blocked_range r, T local_sum) { + for (size_t i = std::begin(r); i != std::end(r); i++) + { + local_sum += x[i]*y[i]; + } + return local_sum; + }, + [](T x, T y) { + return x+y; + }); ret = std::sqrt(ret); TOCK(sqrtdot); return ret; @@ -42,20 +73,54 @@ T sqrtdot(std::vector const &x, std::vector const &y) { template T minvalue(std::vector const &x) { TICK(minvalue); - T ret = x[0]; - for (size_t i = 1; i < x.size(); i++) { - if (x[i] < ret) - ret = x[i]; - } + T ret = tbb::parallel_reduce(tbb::blocked_range(0, x.size()), (T)x[0], [&](tbb::blocked_range r,T local_min) { + for (size_t i = std::begin(r); i != std::end(r); i++) + { + local_min = std::min(local_min, x[i]); + } + return local_min; + }, + [](T x,T y) { + return std::min(x, y); + }); TOCK(minvalue); return ret; } template -std::vector magicfilter(std::vector const &x, std::vector const &y) { +std::vector> magicfilter(std::vector const &x, std::vector const &y) { TICK(magicfilter); - std::vector res; - for (size_t i = 0; i < std::min(x.size(), y.size()); i++) { + std::vector> res; + res.resize(2*std::min(x.size(),y.size())); + std::atomic res_size = 0; + + tbb::parallel_for(tbb::blocked_range(0, std::min(x.size(), y.size())), + [&](tbb::blocked_range r) { + std::vector> local_res; + local_res.resize(2 * r.size()); + size_t local_size = 0; + for (size_t i = std::begin(r); i != std::end(r); i++) + { + if (x[i] > y[i]) { + + local_res[local_size++] = x[i]; + } + else if (y[i] > x[i] && y[i] > 0.5f) { + + local_res[local_size++] = y[i]; + local_res[local_size++] = x[i] * y[i]; + } + } + size_t local_base = res_size.fetch_add(local_size); + for (size_t i = 0; i < local_size; i++) + { + res[local_base + i] = local_res[i]; + } + }); + res.resize(res_size); + + /* + for (size_t i = 0; i < std::min(x.size(), y.size()); i++) { if (x[i] > y[i]) { res.push_back(x[i]); } else if (y[i] > x[i] && y[i] > 0.5f) { @@ -63,6 +128,8 @@ std::vector magicfilter(std::vector const &x, std::vector const &y) { res.push_back(x[i] * y[i]); } } + */ + TOCK(magicfilter); return res; } @@ -71,10 +138,20 @@ template T scanner(std::vector &x) { TICK(scanner); T ret = 0; - for (size_t i = 0; i < x.size(); i++) { - ret += x[i]; - x[i] = ret; - } + ret = tbb::parallel_scan(tbb::blocked_range(0, x.size()), T(0), + [&](tbb::blocked_range r,T local_res,auto is_final) { + for (size_t i = std::begin(r); i != std::end(r); i++) + { + local_res += x[i]; + if (is_final) + { + x[i] = local_res; + } + } + return local_res; + }, [](T x,T y) { + return x + y; + }); TOCK(scanner); return ret; }