Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_BUILD_TYPE Release)

project(main LANGUAGES CXX)
Expand Down
152 changes: 124 additions & 28 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,77 +4,173 @@
#include <cmath>
#include <numeric>
#include <algorithm>
#include <thread>
#include "ticktock.h"
#include <tbb/parallel_for.h>
#include <tbb/parallel_reduce.h>
#include <tbb/parallel_scan.h>
#include <mutex>
#include <execution>

// TODO: 并行化所有这些 for 循环

template <class T, class Func>
std::vector<T> fill(std::vector<T> &arr, Func const &func) {
TICK(fill);
for (size_t i = 0; i < arr.size(); i++) {
arr[i] = func(i);
}
// ----------------------parallel_for---------------------------
tbb::parallel_for(tbb::blocked_range<size_t>(0, arr.size()),
[&](auto r){
for (size_t i = r.begin(); i < r.end(); i++) {
arr[i] = func(i);
}
}, tbb::auto_partitioner{});

// --------------------------old---------------------------
// for (size_t i = 0; i < arr.size(); i++) {
// arr[i] = func(i);
// }
TOCK(fill);
return arr;
}

template <class T>
void saxpy(T a, std::vector<T> &x, std::vector<T> const &y) {
TICK(saxpy);
for (size_t i = 0; i < x.size(); i++) {
x[i] = a * x[i] + y[i];
}
// --------------------------parallel_for---------------------------
tbb::parallel_for(tbb::blocked_range<size_t>(0, x.size()),
[&](auto r){
for (size_t i = r.begin(); i < r.end(); ++i){
x[i] = a * x[i] + y[i];
}
});

// --------------------------old---------------------------
// for (size_t i = 0; i < x.size(); i++) {
// x[i] = a * x[i] + y[i];
// }
TOCK(saxpy);
}

template <class T>
T sqrtdot(std::vector<T> const &x, std::vector<T> const &y) {
TICK(sqrtdot);
T ret = 0;
for (size_t i = 0; i < std::min(x.size(), y.size()); i++) {
ret += x[i] * y[i];
}
ret = std::sqrt(ret);
// --------------------------parallel_reduce---------------------------
T ret = std::sqrt(tbb::parallel_reduce(tbb::blocked_range<size_t>(0, std::min(x.size(), y.size())), (T)0,
[&](tbb::blocked_range<size_t> r, T local_res){
for(size_t i=r.begin(); i<r.end(); ++i){
local_res += x[i] * y[i];
}
return local_res;
}, [](T x, T y){
return x + y;
}));

// --------------------------old---------------------------
// T ret = 0;
// for (size_t i = 0; i < std::min(x.size(), y.size()); i++) {
// ret += x[i] * y[i];
// }
// ret = std::sqrt(ret);
TOCK(sqrtdot);
return ret;
}

template <class T>
T minvalue(std::vector<T> const &x) {
TICK(minvalue);
// --------------------------parallel_for with mutex---------------------------
T ret = x[0];
for (size_t i = 1; i < x.size(); i++) {
if (x[i] < ret)
ret = x[i];
}
std::mutex mtx;
tbb::parallel_for(tbb::blocked_range<size_t>(1, x.size()),
[&](auto r){
T tmp = x[r.begin()];
for(size_t i=r.begin()+1; i<r.end(); ++i)
if(x[i] < tmp)
tmp = x[i];
std::lock_guard lck(mtx);
ret = std::min(ret, tmp);
});

// --------------------------parallel_reduce version---------------------------
// T ret = tbb::parallel_reduce(tbb::blocked_range<size_t>(0, x.size()), (T)0,
// [&](tbb::blocked_range<size_t> r, T local_min){
// local_min = x[r.begin()];
// for(size_t i=r.begin()+1; i<r.end(); ++i){
// if(local_min > x[i])
// local_min = x[i];
// }
// return local_min;
// }, [](T x, T y){
// return (x < y ? x : y);
// });

// --------------------------old version---------------------------
// for (size_t i = 1; i < x.size(); i++) {
// if (x[i] < ret)
// ret = x[i];
// }
TOCK(minvalue);
return ret;
}

template <class T>
std::vector<T> magicfilter(std::vector<T> const &x, std::vector<T> const &y) {
TICK(magicfilter);
// --------------------------parallel_for with mutex---------------------------
std::vector<T> res;
for (size_t i = 0; i < std::min(x.size(), y.size()); i++) {
if (x[i] > y[i]) {
res.push_back(x[i]);
} else if (y[i] > x[i] && y[i] > 0.5f) {
res.push_back(y[i]);
res.push_back(x[i] * y[i]);
size_t n = std::min(x.size(), y.size());
res.reserve(n);
std::mutex mtx;
tbb::parallel_for(tbb::blocked_range<size_t>(0, n), [&](auto r){
std::vector<T> tmp_arr;
tmp_arr.reserve(r.size());
for(size_t i=r.begin(); i<r.end(); ++i){
if (x[i] > y[i]) {
tmp_arr.push_back(x[i]);
} else if (y[i] > x[i] && y[i] > 0.5f) {
tmp_arr.push_back(y[i]);
tmp_arr.push_back(x[i] * y[i]);
}
}
}
std::lock_guard lck(mtx);
std::copy(tmp_arr.begin(), tmp_arr.end(), std::back_inserter(res));
});

// --------------------------old---------------------------
// for (size_t i = 0; i < std::min(x.size(), y.size()); i++) {
// if (x[i] > y[i]) {
// res.push_back(x[i]);
// } else if (y[i] > x[i] && y[i] > 0.5f) {
// res.push_back(y[i]);
// res.push_back(x[i] * y[i]);
// }
// }
TOCK(magicfilter);
return res;
}

template <class T>
T scanner(std::vector<T> &x) {
TICK(scanner);
T ret = 0;
for (size_t i = 0; i < x.size(); i++) {
ret += x[i];
x[i] = ret;
}
// --------------------------parallel_scan---------------------------
T ret = tbb::parallel_scan(tbb::blocked_range<size_t>(0, x.size()), (T)0,
[&](auto r, T local_res, auto is_final){
for(size_t i=r.begin(); i<r.end(); ++i){
local_res += x[i];
if(is_final)
x[i] = local_res;
}
return local_res;
}, [](T x, T y){
return x + y;
});

// --------------------------old---------------------------
// T ret = 0;
// for (size_t i = 0; i < x.size(); i++) {
// ret += x[i];
// x[i] = ret;
// }
TOCK(scanner);
return ret;
}
Expand All @@ -96,7 +192,7 @@ int main() {
std::cout << arr.size() << std::endl;

scanner(x);
std::cout << std::reduce(x.begin(), x.end()) << std::endl;
std::cout << std::reduce(std::execution::seq, x.begin(), x.end()) << std::endl;

return 0;
}
28 changes: 28 additions & 0 deletions score.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,31 @@ magicfilter: 0.0378731s
55924034
scanner: 0.0257618s
6.19406e+07


----------------------------------Old result---------------------
fill: 2.09027s
fill: 2.13145s
saxpy: 0.0397575s
sqrtdot: 0.0888022s
5165.4
minvalue: 0.0843207s
-1.11803
magicfilter: 0.456461s
55924034
scanner: 0.105253s
5.28566e+07

----------------------------------Parallel result---------------------
fill: 0.517877s
fill: 0.520669s
saxpy: 0.0425261s
sqrtdot: 0.0366869s
5792.62
minvalue: 0.0220482s
-1.11803
magicfilter: 0.164679s
55924034
scanner: 0.0727342s
5.28613e+07