diff --git a/main.cpp b/main.cpp index a1d2625..98a47f6 100644 --- a/main.cpp +++ b/main.cpp @@ -1,102 +1,152 @@ -#include -#include -#include +#include +#include #include +#include +#include #include -#include +#include +#include +#include +#include +#include + +#include "pod.h" #include "ticktock.h" // TODO: 并行化所有这些 for 循环 template -std::vector fill(std::vector &arr, Func const &func) { - TICK(fill); - for (size_t i = 0; i < arr.size(); i++) { - arr[i] = func(i); - } - TOCK(fill); - return arr; +std::vector fill(std::vector& arr, Func const& func) { + TICK(fill); + const int n = arr.size(); + tbb::parallel_for(tbb::blocked_range(0, n), + [&](tbb::blocked_range r) { + for (size_t i = r.begin(); i < r.end(); ++i) { + arr[i] = func(i); + } + }); + TOCK(fill); + return arr; } -template -void saxpy(T a, std::vector &x, std::vector const &y) { - TICK(saxpy); - for (size_t i = 0; i < x.size(); i++) { - x[i] = a * x[i] + y[i]; - } - TOCK(saxpy); +template +void saxpy(t a, std::vector& x, std::vector const& y) { + TICK(saxpy); + const int n = x.size(); + tbb::task_arena ta(std::thread::hardware_concurrency()); + tbb::parallel_for(tbb::blocked_range(0, n), + [&](tbb::blocked_range r) { + for (size_t i = r.begin(); i < r.end(); ++i) { + x[i] = a * x[i] + y[i]; + } + }); + TOCK(saxpy); } template -T sqrtdot(std::vector const &x, std::vector const &y) { - TICK(sqrtdot); - T ret = 0; - for (size_t i = 0; i < std::min(x.size(), y.size()); i++) { - ret += x[i] * y[i]; - } - ret = std::sqrt(ret); - TOCK(sqrtdot); - return ret; +T sqrtdot(std::vector const& x, std::vector const& y) { + TICK(sqrtdot); + const int n = std::min(x.size(), y.size()); + // tbb::parallel_deterministic_reduce(tbb::blocked_range(0, n), + T ret = tbb::parallel_reduce( + tbb::blocked_range(0, n), static_cast(0), + [&](tbb::blocked_range r, T local_res) { + for (size_t i = r.begin(); i < r.end(); ++i) { + local_res += x[i] * y[i]; + } + return local_res; + }, + [](T x, T y) { return x + y; }); + ret = std::sqrt(ret); + TOCK(sqrtdot); + return ret; } template -T minvalue(std::vector const &x) { - TICK(minvalue); - T ret = x[0]; - for (size_t i = 1; i < x.size(); i++) { - if (x[i] < ret) - ret = x[i]; - } - TOCK(minvalue); - return ret; +T minvalue(std::vector const& x) { + TICK(minvalue); + const size_t n = x.size(); + T ret = tbb::parallel_reduce( + tbb::blocked_range(0, n), x[0], + [&](tbb::blocked_range r, T local_res) { + for (size_t i = r.begin(); i < r.end(); ++i) { + local_res = std::min(local_res, x[i]); + } + return local_res; + }, + [](T a, T b) { return std::min(a, b); }); + TOCK(minvalue); + return ret; } template -std::vector magicfilter(std::vector const &x, std::vector const &y) { - TICK(magicfilter); - std::vector res; - for (size_t i = 0; i < std::min(x.size(), y.size()); i++) { - if (x[i] > y[i]) { - res.push_back(x[i]); - } else if (y[i] > x[i] && y[i] > 0.5f) { - res.push_back(y[i]); - res.push_back(x[i] * y[i]); - } - } - TOCK(magicfilter); - return res; +std::vector> magicfilter(std::vector const& x, + std::vector const& y) { + TICK(magicfilter); + std::vector> res; + const int n = std::min(x.size(), y.size()); + res.resize(n * 2); + std::atomic idx = 0; + tbb::parallel_for(tbb::blocked_range(0, n), + [&](tbb::blocked_range r) { + std::vector> local_res(r.size() * 2); + size_t local_idx = 0; + for (size_t i = r.begin(); i < r.end(); ++i) { + if (x[i] > y[i]) { + local_res[local_idx++] = x[i]; + } else if (y[i] > x[i] && y[i] > 0.5f) { + local_res[local_idx++] = y[i]; + local_res[local_idx++] = x[i] * y[i]; + } + } + size_t base = idx.fetch_add(local_idx); + for (size_t i = 0; i < local_idx; ++i) { + res[base + i] = local_res[i]; + } + }); + res.resize(idx); + TOCK(magicfilter); + return res; } template -T scanner(std::vector &x) { - TICK(scanner); - T ret = 0; - for (size_t i = 0; i < x.size(); i++) { - ret += x[i]; - x[i] = ret; - } - TOCK(scanner); - return ret; +T scanner(std::vector& x) { + TICK(scanner); + const int n = x.size(); + T ret = tbb::parallel_scan( + tbb::blocked_range(0, n), static_cast(0), + [&](tbb::blocked_range r, T local_res, auto is_final) { + for (size_t i = r.begin(); i < r.end(); ++i) { + local_res += x[i]; + if (is_final) { + x[i] = local_res; + } + } + return local_res; + }, + [](T x, T y) { return x + y; }); + TOCK(scanner); + return ret; } int main() { - size_t n = 1<<26; - std::vector x(n); - std::vector y(n); + size_t n = 1 << 26; + std::vector x(n); + std::vector y(n); - fill(x, [&] (size_t i) { return std::sin(i); }); - fill(y, [&] (size_t i) { return std::cos(i); }); + fill(x, [&](size_t i) { return std::sin(i); }); + fill(y, [&](size_t i) { return std::cos(i); }); - saxpy(0.5f, x, y); + saxpy(0.5f, x, y); - std::cout << sqrtdot(x, y) << std::endl; - std::cout << minvalue(x) << std::endl; + std::cout << sqrtdot(x, y) << std::endl; + std::cout << minvalue(x) << std::endl; - auto arr = magicfilter(x, y); - std::cout << arr.size() << std::endl; + auto arr = magicfilter(x, y); + std::cout << arr.size() << std::endl; - scanner(x); - std::cout << std::reduce(x.begin(), x.end()) << std::endl; + scanner(x); + std::cout << std::reduce(x.begin(), x.end()) << std::endl; - return 0; + return 0; }