diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..1be854f --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.associations": { + "cmath": "cpp" + } +} \ No newline at end of file diff --git a/main.cpp b/main.cpp index a1d2625..f59764e 100644 --- a/main.cpp +++ b/main.cpp @@ -4,16 +4,24 @@ #include #include #include +#include +#include +#include +#include +#include #include "ticktock.h" + // TODO: 并行化所有这些 for 循环 template std::vector fill(std::vector &arr, Func const &func) { TICK(fill); - for (size_t i = 0; i < arr.size(); i++) { - arr[i] = func(i); - } + tbb::parallel_for((size_t)0, arr.size(), + [&](size_t i){ + arr[i] = func(i); + } + ); TOCK(fill); return arr; } @@ -21,19 +29,31 @@ std::vector fill(std::vector &arr, Func const &func) { template void saxpy(T a, std::vector &x, std::vector const &y) { TICK(saxpy); - for (size_t i = 0; i < x.size(); i++) { - x[i] = a * x[i] + y[i]; - } + tbb::parallel_for(tbb::blocked_range(0,x.size()), + [&](tbb::blocked_ranger){ + for (size_t i = r.begin(); i < r.end(); ++i) { + x[i] = a * x[i] + y[i]; + } + } + ); TOCK(saxpy); } template T sqrtdot(std::vector const &x, std::vector const &y) { TICK(sqrtdot); - T ret = 0; - for (size_t i = 0; i < std::min(x.size(), y.size()); i++) { - ret += x[i] * y[i]; - } + size_t n = std::min(x.size(), y.size()); + T ret = tbb::parallel_reduce(tbb::blocked_range(0,n), T{}, + [&](tbb::blocked_range r, T local_ret) { + for(size_t i = r.begin(); i < r.end(); ++i) { + local_ret += x[i] * y[i]; + } + return local_ret; + }, + [](T x, T y){ + return x + y; + } + ); ret = std::sqrt(ret); TOCK(sqrtdot); return ret; @@ -42,11 +62,19 @@ T sqrtdot(std::vector const &x, std::vector const &y) { template T minvalue(std::vector const &x) { TICK(minvalue); - T ret = x[0]; - for (size_t i = 1; i < x.size(); i++) { - if (x[i] < ret) - ret = x[i]; - } + T ret = tbb::parallel_reduce(tbb::blocked_range(1,x.size()), x[0], + [&](tbb::blocked_range r, T local_ret ) { + for(size_t i = r.begin(); i < r.end(); ++i) { + if(x[i] < local_ret) { + local_ret = x[i]; + } + } + return local_ret; + }, + [](T x , T y){ + return std::min(x,y); + } + ); TOCK(minvalue); return ret; } @@ -55,14 +83,28 @@ template std::vector magicfilter(std::vector const &x, std::vector const &y) { TICK(magicfilter); std::vector res; - for (size_t i = 0; i < std::min(x.size(), y.size()); i++) { - if (x[i] > y[i]) { - res.push_back(x[i]); - } else if (y[i] > x[i] && y[i] > 0.5f) { - res.push_back(y[i]); - res.push_back(x[i] * y[i]); + size_t n = std::min(x.size(), y.size()); + res.reserve(n*3); + tbb::spin_mutex mtx; + tbb::task_arena ta(20); + ta.execute([&]{ + tbb::parallel_for(tbb::blocked_range(0,n), + [&](tbb::blocked_range r){ + std::vector local_res; + local_res.reserve(r.size()*2); + for(size_t i = r.begin(); i y[i]) { + local_res.push_back(x[i]); + } else if (y[i] > x[i] && y[i] > 0.5f) { + local_res.push_back(y[i]); + local_res.push_back(x[i] * y[i]); + } + } + std::lock_guard lock_guard(mtx); + std::copy(local_res.begin(), local_res.end(), std::back_inserter(res)); + }, tbb::auto_partitioner{}); } - } + ); TOCK(magicfilter); return res; } @@ -70,11 +112,20 @@ std::vector magicfilter(std::vector const &x, std::vector const &y) { template T scanner(std::vector &x) { TICK(scanner); - T ret = 0; - for (size_t i = 0; i < x.size(); i++) { - ret += x[i]; - x[i] = ret; - } + T ret = tbb::parallel_scan(tbb::blocked_range(0,x.size()), T{}, + [&](tbb::blocked_ranger, T locale_ret, auto is_final) { + for(size_t i = r.begin(); i < r.end(); ++i) { + locale_ret += x[i]; + if(is_final) { + x[i] = locale_ret; + } + } + return locale_ret; + }, + [](T x, T y) { + return x + y; + } + ); TOCK(scanner); return ret; }