diff --git a/tasks/baldin_a_radix_sort/tbb/include/ops_tbb.hpp b/tasks/baldin_a_radix_sort/tbb/include/ops_tbb.hpp new file mode 100644 index 000000000..f15eada7b --- /dev/null +++ b/tasks/baldin_a_radix_sort/tbb/include/ops_tbb.hpp @@ -0,0 +1,22 @@ +#pragma once + +#include "baldin_a_radix_sort/common/include/common.hpp" +#include "task/include/task.hpp" + +namespace baldin_a_radix_sort { + +class BaldinARadixSortTBB : public BaseTask { + public: + static constexpr ppc::task::TypeOfTask GetStaticTypeOfTask() { + return ppc::task::TypeOfTask::kTBB; + } + explicit BaldinARadixSortTBB(const InType &in); + + private: + bool ValidationImpl() override; + bool PreProcessingImpl() override; + bool RunImpl() override; + bool PostProcessingImpl() override; +}; + +} // namespace baldin_a_radix_sort diff --git a/tasks/baldin_a_radix_sort/tbb/src/ops_tbb.cpp b/tasks/baldin_a_radix_sort/tbb/src/ops_tbb.cpp new file mode 100644 index 000000000..8bc8385e4 --- /dev/null +++ b/tasks/baldin_a_radix_sort/tbb/src/ops_tbb.cpp @@ -0,0 +1,139 @@ +#include "baldin_a_radix_sort/tbb/include/ops_tbb.hpp" + +#include + +#include +#include +#include +#include +#include +#include + +#include "baldin_a_radix_sort/common/include/common.hpp" +#include "oneapi/tbb/parallel_for.h" +#include "util/include/util.hpp" + +namespace baldin_a_radix_sort { + +BaldinARadixSortTBB::BaldinARadixSortTBB(const InType &in) { + SetTypeOfTask(GetStaticTypeOfTask()); + GetInput() = in; + GetOutput() = {}; +} + +bool BaldinARadixSortTBB::ValidationImpl() { + return true; +} + +bool BaldinARadixSortTBB::PreProcessingImpl() { + GetOutput() = GetInput(); + return true; +} + +namespace { + +void CountingSortStep(std::vector::iterator in_begin, std::vector::iterator in_end, + std::vector::iterator out_begin, size_t byte_index) { + size_t shift = byte_index * 8; + std::array count = {0}; + + for (auto it = in_begin; it != in_end; it++) { + auto raw_val = static_cast(*it); + unsigned int byte_val = (raw_val >> shift) & 0xFF; + + if (byte_index == sizeof(int) - 1) { + byte_val ^= 128; + } + count.at(byte_val)++; + } + + std::array prefix{}; + prefix[0] = 0; + for (int i = 1; i < 256; i++) { + prefix.at(i) = prefix.at(i - 1) + count.at(i - 1); + } + + for (auto it = in_begin; it != in_end; it++) { + auto raw_val = static_cast(*it); + unsigned int byte_val = (raw_val >> shift) & 0xFF; + + if (byte_index == sizeof(int) - 1) { + byte_val ^= 128; + } + + *(out_begin + static_cast(prefix.at(byte_val))) = *it; + prefix.at(byte_val)++; + } +} + +void RadixSortLocal(std::vector::iterator begin, std::vector::iterator end) { + size_t n = std::distance(begin, end); + if (n < 2) { + return; + } + + std::vector temp(n); + + for (size_t i = 0; i < sizeof(int); i++) { + size_t shift = i; + + if (i % 2 == 0) { + CountingSortStep(begin, end, temp.begin(), shift); + } else { + CountingSortStep(temp.begin(), temp.end(), begin, shift); + } + } +} + +} // namespace + +bool BaldinARadixSortTBB::RunImpl() { + auto &out = GetOutput(); + int n = static_cast(out.size()); + + int num_chunks = ppc::util::GetNumThreads(); + + std::vector offsets(num_chunks + 1); + int chunk_size = n / num_chunks; + int rem = n % num_chunks; + int curr = 0; + for (int i = 0; i < num_chunks; i++) { + offsets[i] = curr; + curr += chunk_size + (i < rem ? 1 : 0); + } + offsets[num_chunks] = n; + + tbb::parallel_for(tbb::blocked_range(0, num_chunks), [&](const tbb::blocked_range &r) { + for (int tid = r.begin(); tid != r.end(); tid++) { + auto begin = out.begin() + offsets[tid]; + auto end = out.begin() + offsets[tid + 1]; + RadixSortLocal(begin, end); + } + }); + + for (int step = 1; step < num_chunks; step *= 2) { + int num_merges = (num_chunks + (2 * step) - 1) / (2 * step); + tbb::parallel_for(tbb::blocked_range(0, num_merges), [&](const tbb::blocked_range &r) { + for (int m_idx = r.begin(); m_idx != r.end(); m_idx++) { + int i = m_idx * (2 * step); + + if (i + step < num_chunks) { + auto begin = out.begin() + offsets[i]; + auto middle = out.begin() + offsets[i + step]; + int end_idx = std::min(i + (2 * step), num_chunks); + auto end = out.begin() + offsets[end_idx]; + + std::inplace_merge(begin, middle, end); + } + } + }); + } + + return true; +} + +bool BaldinARadixSortTBB::PostProcessingImpl() { + return true; +} + +} // namespace baldin_a_radix_sort diff --git a/tasks/baldin_a_radix_sort/tests/functional/main.cpp b/tasks/baldin_a_radix_sort/tests/functional/main.cpp index eba817ee1..9e4ce63f3 100644 --- a/tasks/baldin_a_radix_sort/tests/functional/main.cpp +++ b/tasks/baldin_a_radix_sort/tests/functional/main.cpp @@ -11,6 +11,7 @@ #include "baldin_a_radix_sort/common/include/common.hpp" #include "baldin_a_radix_sort/omp/include/ops_omp.hpp" #include "baldin_a_radix_sort/seq/include/ops_seq.hpp" +#include "baldin_a_radix_sort/tbb/include/ops_tbb.hpp" #include "util/include/func_test_util.hpp" #include "util/include/util.hpp" @@ -83,7 +84,8 @@ const std::array kTestParam = { const auto kTestTasksList = std::tuple_cat(ppc::util::AddFuncTask(kTestParam, PPC_SETTINGS_baldin_a_radix_sort), - ppc::util::AddFuncTask(kTestParam, PPC_SETTINGS_baldin_a_radix_sort)); + ppc::util::AddFuncTask(kTestParam, PPC_SETTINGS_baldin_a_radix_sort), + ppc::util::AddFuncTask(kTestParam, PPC_SETTINGS_baldin_a_radix_sort)); const auto kGtestValues = ppc::util::ExpandToValues(kTestTasksList); diff --git a/tasks/baldin_a_radix_sort/tests/performance/main.cpp b/tasks/baldin_a_radix_sort/tests/performance/main.cpp index f2c1e4ae1..0253c5756 100644 --- a/tasks/baldin_a_radix_sort/tests/performance/main.cpp +++ b/tasks/baldin_a_radix_sort/tests/performance/main.cpp @@ -6,6 +6,7 @@ #include "baldin_a_radix_sort/common/include/common.hpp" #include "baldin_a_radix_sort/omp/include/ops_omp.hpp" #include "baldin_a_radix_sort/seq/include/ops_seq.hpp" +#include "baldin_a_radix_sort/tbb/include/ops_tbb.hpp" #include "util/include/perf_test_util.hpp" namespace baldin_a_radix_sort { @@ -44,7 +45,8 @@ TEST_P(BaldinARadixSortPerfTests, RunPerfModes) { namespace { const auto kAllPerfTasks = - ppc::util::MakeAllPerfTasks(PPC_SETTINGS_baldin_a_radix_sort); + ppc::util::MakeAllPerfTasks( + PPC_SETTINGS_baldin_a_radix_sort); const auto kGtestValues = ppc::util::TupleToGTestValues(kAllPerfTasks);