diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d6a8f8..3260e3a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,11 +7,11 @@ project(main LANGUAGES CXX) add_executable(main main.cpp) -#find_package(OpenMP REQUIRED) -#target_link_libraries(main PUBLIC OpenMP::OpenMP_CXX) +find_package(OpenMP REQUIRED) +target_link_libraries(main PUBLIC OpenMP::OpenMP_CXX) find_package(TBB REQUIRED) target_link_libraries(main PUBLIC TBB::tbb) -#find_package(benchmark REQUIRED) -#target_link_libraries(main PUBLIC benchmark::benchmark) +find_package(benchmark REQUIRED) +target_link_libraries(main PUBLIC benchmark::benchmark) diff --git a/main.cpp b/main.cpp index a1d2625..21b10a3 100644 --- a/main.cpp +++ b/main.cpp @@ -4,16 +4,25 @@ #include #include #include -#include "ticktock.h" +#include +#include +#include +#include + +#include "ticktock.h" +#include "pod.h" // TODO: 并行化所有这些 for 循环 template std::vector fill(std::vector &arr, Func const &func) { TICK(fill); - for (size_t i = 0; i < arr.size(); i++) { - arr[i] = func(i); - } + //直接并行 ,平均时间:0.0018s + tbb::parallel_for(tbb::blocked_range(0, arr.size()), [&](const tbb::blocked_range &r) { + for (size_t i = r.begin(); i != r.end(); i++) { + arr[i] = func(i); + } + }); TOCK(fill); return arr; } @@ -21,19 +30,27 @@ std::vector fill(std::vector &arr, Func const &func) { template void saxpy(T a, std::vector &x, std::vector const &y) { TICK(saxpy); - for (size_t i = 0; i < x.size(); i++) { - x[i] = a * x[i] + y[i]; - } + //直接并行 ,平均时间:0.05s + tbb::parallel_for(tbb::blocked_range(0, x.size()), [&](const tbb::blocked_range &r) { + for (size_t i = r.begin(); i != r.end(); i++) { + x[i] = a * x[i] + y[i]; + } + }); TOCK(saxpy); } template T sqrtdot(std::vector const &x, std::vector const &y) { TICK(sqrtdot); - T ret = 0; - for (size_t i = 0; i < std::min(x.size(), y.size()); i++) { - ret += x[i] * y[i]; - } + //并行缩并 ,平均时间:0.032s + T ret = tbb::parallel_reduce(tbb::blocked_range(0, x.size()), (T)0,[&](tbb::blocked_range &r, T local_res) { + for (size_t i = r.begin(); i != r.end(); i++) { + local_res += x[i] * y[i]; + } + return local_res; + }, [](T a, T b) { + return a + b; + }); ret = std::sqrt(ret); TOCK(sqrtdot); return ret; @@ -42,27 +59,45 @@ T sqrtdot(std::vector const &x, std::vector const &y) { template T minvalue(std::vector const &x) { TICK(minvalue); - T ret = x[0]; - for (size_t i = 1; i < x.size(); i++) { - if (x[i] < ret) - ret = x[i]; - } + //并行缩并求最小值 ,平均时间:0.015s + T ret = tbb::parallel_reduce(tbb::blocked_range(0, x.size()), (T)0,[&](tbb::blocked_range &r, T local_res) { + for (size_t i = r.begin(); i != r.end(); i++) { + if (x[i] < local_res) + local_res = x[i]; + } + return local_res; + }, [](T a, T b) { + return std::min(a, b); + }); TOCK(minvalue); return ret; } template -std::vector magicfilter(std::vector const &x, std::vector const &y) { +auto magicfilter(std::vector const &x, std::vector const &y) { + std::vector> res; + std::atomic res_size = 0; + //使用彭老师的头文件,平均时间:0.06s TICK(magicfilter); - std::vector res; - for (size_t i = 0; i < std::min(x.size(), y.size()); i++) { - if (x[i] > y[i]) { - res.push_back(x[i]); - } else if (y[i] > x[i] && y[i] > 0.5f) { - res.push_back(y[i]); - res.push_back(x[i] * y[i]); + res.resize(x.size()); + tbb::parallel_for(tbb::blocked_range(0, x.size()), + [&](const tbb::blocked_range &r) { + std::vector> local_a(r.size()); + size_t lasize = 0; + for (size_t i = r.begin(); i != r.end(); i++) { + if(x[i]>y[i]){ + local_a[lasize++] = x[i]; + } else if(y[i] > x[i] && y[i] >0.5f){ + local_a[lasize++] = y[i]; + local_a[lasize++] = x[i] * y[i]; + } + } + size_t base = res_size.fetch_add(lasize); + for(size_t i=0;i magicfilter(std::vector const &x, std::vector const &y) { template T scanner(std::vector &x) { TICK(scanner); + //平均时间:0.06s T ret = 0; - for (size_t i = 0; i < x.size(); i++) { - ret += x[i]; - x[i] = ret; - } + tbb::task_arena ta(4); + ta.execute([&] { + ret = tbb::parallel_scan(tbb::blocked_range(0, x.size()),T(0), [&](const tbb::blocked_range &r, T local_res,auto is_final) { + for (size_t i = r.begin(); i != r.end(); i++) { + local_res += x[i]; + if(is_final)x[i] = local_res; + } + return local_res; + },[] (T x, T y) { + return x + y; + },tbb::auto_partitioner()); + }); TOCK(scanner); return ret; } diff --git a/pod.h b/pod.h index 4f5cda8..d647c1f 100644 --- a/pod.h +++ b/pod.h @@ -63,4 +63,4 @@ struct pod { void destroy() { m_t.~T(); } -}; +}; \ No newline at end of file