From 9a8a39f535b653efb3f8eedb1484004607ce8949 Mon Sep 17 00:00:00 2001 From: Xianglin_Wang <1830638446@qq.com> Date: Thu, 4 Jan 2024 11:20:12 +0800 Subject: [PATCH 1/2] done --- after.txt | 11 +++++ main.cpp | 128 +++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 113 insertions(+), 26 deletions(-) create mode 100644 after.txt diff --git a/after.txt b/after.txt new file mode 100644 index 0000000..0df39b0 --- /dev/null +++ b/after.txt @@ -0,0 +1,11 @@ +fill: 0.130924s +fill: 0.132913s +saxpy: 0.0101733s +sqrtdot: 0.0241576s +5792.62 +minvalue: 0.0222686s +-1.11803 +magicfilter: 0.115075s +55924034 +scanner: 0.0292774s +0 diff --git a/main.cpp b/main.cpp index a1d2625..a86cb71 100644 --- a/main.cpp +++ b/main.cpp @@ -5,15 +5,24 @@ #include #include #include "ticktock.h" +#include +#include +#include // TODO: 并行化所有这些 for 循环 template std::vector fill(std::vector &arr, Func const &func) { TICK(fill); - for (size_t i = 0; i < arr.size(); i++) { - arr[i] = func(i); - } + tbb::task_arena ta(8); + ta.execute([&]{ + tbb::parallel_for(tbb::blocked_range(0,arr.size()),[&](tbb::blocked_range r){ + for (size_t i = r.begin(); i !=r.end(); i++) { + arr[i] = func(i); + } + }); + }); + TOCK(fill); return arr; } @@ -21,9 +30,15 @@ std::vector fill(std::vector &arr, Func const &func) { template void saxpy(T a, std::vector &x, std::vector const &y) { TICK(saxpy); - for (size_t i = 0; i < x.size(); i++) { - x[i] = a * x[i] + y[i]; - } + tbb::task_arena ta(8); + ta.execute([&]{ + tbb::parallel_for(tbb::blocked_range(0,x.size()),[&](tbb::blocked_range r){ + for (size_t i = r.begin(); i !=r.end(); i++) { + x[i] = a * x[i] + y[i]; + } + }); + }); + TOCK(saxpy); } @@ -31,9 +46,19 @@ template T sqrtdot(std::vector const &x, std::vector const &y) { TICK(sqrtdot); T ret = 0; - for (size_t i = 0; i < std::min(x.size(), y.size()); i++) { - ret += x[i] * y[i]; - } + tbb::task_arena ta(8); + ta.execute([&]{ + ret=tbb::parallel_reduce(tbb::blocked_range(0,std::min(x.size(), y.size())),(T )0, + [&](tbb::blocked_range r, T local_res){ + for (size_t i = r.begin(); i != r.end(); i++) { + local_res += x[i] * y[i]; + } + return local_res; + }, + [](T x,T y){ + return x+y; + }); + }); ret = std::sqrt(ret); TOCK(sqrtdot); return ret; @@ -43,10 +68,21 @@ template T minvalue(std::vector const &x) { TICK(minvalue); T ret = x[0]; - for (size_t i = 1; i < x.size(); i++) { - if (x[i] < ret) - ret = x[i]; - } + tbb::task_arena ta(8); + ta.execute([&]{ + ret=tbb::parallel_reduce(tbb::blocked_range(0, x.size()),x[0], + [&](tbb::blocked_range r, T local_res){ + for (size_t i = r.begin()+1; i != r.end(); i++) { + if (x[i] < local_res) + local_res = x[i]; + } + return local_res; + }, + [](T x,T y){ + return std::min(x,y); + }); + }); + TOCK(minvalue); return ret; } @@ -55,14 +91,34 @@ template std::vector magicfilter(std::vector const &x, std::vector const &y) { TICK(magicfilter); std::vector res; - for (size_t i = 0; i < std::min(x.size(), y.size()); i++) { - if (x[i] > y[i]) { - res.push_back(x[i]); - } else if (y[i] > x[i] && y[i] > 0.5f) { - res.push_back(y[i]); - res.push_back(x[i] * y[i]); - } - } + + std::atomic a_size=0; + size_t n = std::min(x.size(), y.size()); + res.resize(n); + + tbb::task_arena ta(8); + ta.execute([&]{ + tbb::parallel_for(tbb::blocked_range(0,n), + [&](tbb::blocked_range r){ + std::vector la(r.size()); + size_t la_idx=0; + for (size_t i = r.begin(); i < r.end(); i++) { + if (x[i] > y[i]) { + la[la_idx++]=x[i]; + } else if (y[i] > x[i] && y[i] > 0.5f) { + la[la_idx++]=y[i]; + la[la_idx++]=x[i] * y[i]; + } + } + + size_t base=a_size.fetch_add(la_idx); + for (size_t i = 0; i < la_idx; ++i) { + res[base+i]=la[i]; + } + }); + }); + + res.resize(a_size); TOCK(magicfilter); return res; } @@ -70,11 +126,31 @@ std::vector magicfilter(std::vector const &x, std::vector const &y) { template T scanner(std::vector &x) { TICK(scanner); - T ret = 0; - for (size_t i = 0; i < x.size(); i++) { - ret += x[i]; - x[i] = ret; - } + T ret=0; + tbb::task_arena ta(8); + ta.execute([&]{ + ret = tbb::parallel_scan(tbb::blocked_range(0,x.size()),(T)0, + [&](tbb::blocked_range r,T local_res,auto is_final){ + for (size_t i = r.begin(); i < r.end(); i++) { + local_res += x[i]; + if(is_final){ + x[i] = ret; + } + } + + return local_res; + }, + [](T x,T y){ + return x+y; + }); + }); + + +// T ret=0; +// for (size_t i = 0; i < x.size(); i++) { +// ret += x[i]; +// x[i] = ret; +// } TOCK(scanner); return ret; } From 7e86ca43cce55d0c9ac59397607f4af6eec424ff Mon Sep 17 00:00:00 2001 From: Xianglin_Wang <1830638446@qq.com> Date: Thu, 4 Jan 2024 12:03:57 +0800 Subject: [PATCH 2/2] use pod.h --- main.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/main.cpp b/main.cpp index a86cb71..0faeca3 100644 --- a/main.cpp +++ b/main.cpp @@ -8,6 +8,7 @@ #include #include #include +#include "pod.h" // TODO: 并行化所有这些 for 循环 @@ -88,9 +89,9 @@ T minvalue(std::vector const &x) { } template -std::vector magicfilter(std::vector const &x, std::vector const &y) { +auto magicfilter(std::vector const &x, std::vector const &y) { TICK(magicfilter); - std::vector res; + std::vector> res; std::atomic a_size=0; size_t n = std::min(x.size(), y.size()); @@ -100,7 +101,7 @@ std::vector magicfilter(std::vector const &x, std::vector const &y) { ta.execute([&]{ tbb::parallel_for(tbb::blocked_range(0,n), [&](tbb::blocked_range r){ - std::vector la(r.size()); + std::vector> la(r.size()); size_t la_idx=0; for (size_t i = r.begin(); i < r.end(); i++) { if (x[i] > y[i]) {