diff --git a/tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/tbb/include/ops_tbb.hpp b/tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/tbb/include/ops_tbb.hpp new file mode 100644 index 000000000..6fb3cd456 --- /dev/null +++ b/tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/tbb/include/ops_tbb.hpp @@ -0,0 +1,47 @@ +#pragma once + +#include + +#include "remizov_k_dense_matrix_multiplication_cannon_algorithm/common/include/common.hpp" +#include "task/include/task.hpp" + +namespace remizov_k_dense_matrix_multiplication_cannon_algorithm { + +class RemizovKDenseMatrixMultiplicationCannonAlgorithmTbb : public BaseTask { + public: + static constexpr ppc::task::TypeOfTask GetStaticTypeOfTask() { + return ppc::task::TypeOfTask::kTBB; + } + + explicit RemizovKDenseMatrixMultiplicationCannonAlgorithmTbb(const InType &in); + + private: + bool ValidationImpl() override; + bool PreProcessingImpl() override; + bool RunImpl() override; + bool PostProcessingImpl() override; + + static void MultiplyBlock(const std::vector> &a, const std::vector> &b, + std::vector> &c, int block_size); + + static void ShiftBlocksLeft(std::vector>>> &matrix_blocks, + int block_count); + + static void ShiftBlocksUp(std::vector>>> &matrix_blocks, int block_count); + + static void RunCannonCycle(std::vector>>> &a_blocks, + std::vector>>> &b_blocks, + std::vector>>> &c_blocks, int block_size, + int block_count); + + static void AssembleOutput(std::vector>>> &c_blocks, + std::vector> &output, int block_size, int block_count); + + static void InitializeBlocks(const std::vector> &matrix_a, + const std::vector> &matrix_b, + std::vector>>> &a_blocks, + std::vector>>> &b_blocks, int block_size, + int block_count); +}; + +} // namespace remizov_k_dense_matrix_multiplication_cannon_algorithm diff --git a/tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/tbb/src/ops_tbb.cpp b/tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/tbb/src/ops_tbb.cpp new file mode 100644 index 000000000..babecb868 --- /dev/null +++ b/tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/tbb/src/ops_tbb.cpp @@ -0,0 +1,185 @@ +#include "remizov_k_dense_matrix_multiplication_cannon_algorithm/tbb/include/ops_tbb.hpp" + +#include +#include + +#include +#include +#include + +#include "remizov_k_dense_matrix_multiplication_cannon_algorithm/common/include/common.hpp" + +namespace remizov_k_dense_matrix_multiplication_cannon_algorithm { + +RemizovKDenseMatrixMultiplicationCannonAlgorithmTbb::RemizovKDenseMatrixMultiplicationCannonAlgorithmTbb( + const InType &in) { + SetTypeOfTask(GetStaticTypeOfTask()); + GetInput() = in; +} + +bool RemizovKDenseMatrixMultiplicationCannonAlgorithmTbb::ValidationImpl() { + const auto &input_data = GetInput(); + + int block_dim = std::get<0>(input_data); + const auto &mat_a = std::get<1>(input_data); + const auto &mat_b = std::get<2>(input_data); + + if (block_dim <= 0) { + return false; + } + if (mat_a.empty() || mat_b.empty()) { + return false; + } + + size_t n = mat_a.size(); + if (n != mat_a[0].size()) { + return false; + } + if (n != mat_b.size() || n != mat_b[0].size()) { + return false; + } + + return (n % static_cast(block_dim) == 0); +} + +bool RemizovKDenseMatrixMultiplicationCannonAlgorithmTbb::PreProcessingImpl() { + GetOutput().clear(); + return true; +} + +void RemizovKDenseMatrixMultiplicationCannonAlgorithmTbb::MultiplyBlock(const std::vector> &a, + const std::vector> &b, + std::vector> &c, + int block_size) { + for (int i = 0; i < block_size; ++i) { + for (int j = 0; j < block_size; ++j) { + double accumulator = 0.0; + for (int k = 0; k < block_size; ++k) { + accumulator += a[i][k] * b[k][j]; + } + c[i][j] += accumulator; + } + } +} + +void RemizovKDenseMatrixMultiplicationCannonAlgorithmTbb::ShiftBlocksLeft( + std::vector>>> &matrix_blocks, int block_count) { + // Parallelize over rows; each row shift is independent + tbb::parallel_for(0, block_count, [&](int i) { + auto first_element = std::move(matrix_blocks[i][0]); + for (int j = 1; j < block_count; ++j) { + matrix_blocks[i][j - 1] = std::move(matrix_blocks[i][j]); + } + matrix_blocks[i][block_count - 1] = std::move(first_element); + }); +} + +void RemizovKDenseMatrixMultiplicationCannonAlgorithmTbb::ShiftBlocksUp( + std::vector>>> &matrix_blocks, int block_count) { + // Parallelize over columns; each column shift is independent + tbb::parallel_for(0, block_count, [&](int j) { + auto first_element = std::move(matrix_blocks[0][j]); + for (int i = 1; i < block_count; ++i) { + matrix_blocks[i - 1][j] = std::move(matrix_blocks[i][j]); + } + matrix_blocks[block_count - 1][j] = std::move(first_element); + }); +} + +void RemizovKDenseMatrixMultiplicationCannonAlgorithmTbb::RunCannonCycle( + std::vector>>> &a_blocks, + std::vector>>> &b_blocks, + std::vector>>> &c_blocks, int block_size, int block_count) { + for (int step = 0; step < block_count; ++step) { + // Parallel multiplication of all block pairs + tbb::parallel_for(tbb::blocked_range2d(0, block_count, 0, block_count), + [&](const tbb::blocked_range2d &r) { + for (int i = r.rows().begin(); i != r.rows().end(); ++i) { + for (int j = r.cols().begin(); j != r.cols().end(); ++j) { + MultiplyBlock(a_blocks[i][j], b_blocks[i][j], c_blocks[i][j], block_size); + } + } + }); + + if (step < block_count - 1) { + ShiftBlocksLeft(a_blocks, block_count); + ShiftBlocksUp(b_blocks, block_count); + } + } +} + +void RemizovKDenseMatrixMultiplicationCannonAlgorithmTbb::InitializeBlocks( + const std::vector> &matrix_a, const std::vector> &matrix_b, + std::vector>>> &a_blocks, + std::vector>>> &b_blocks, int block_size, int block_count) { + tbb::parallel_for(tbb::blocked_range2d(0, block_count, 0, block_count), [&](const tbb::blocked_range2d &r) { + for (int i = r.rows().begin(); i != r.rows().end(); ++i) { + for (int j = r.cols().begin(); j != r.cols().end(); ++j) { + int shift_value = (i + j) % block_count; + for (int bi = 0; bi < block_size; ++bi) { + for (int bj = 0; bj < block_size; ++bj) { + a_blocks[i][j][bi][bj] = matrix_a[(i * block_size) + bi][(shift_value * block_size) + bj]; + b_blocks[i][j][bi][bj] = matrix_b[(shift_value * block_size) + bi][(j * block_size) + bj]; + } + } + } + } + }); +} + +void RemizovKDenseMatrixMultiplicationCannonAlgorithmTbb::AssembleOutput( + std::vector>>> &c_blocks, std::vector> &output, + int block_size, int block_count) { + tbb::parallel_for(tbb::blocked_range2d(0, block_count, 0, block_count), [&](const tbb::blocked_range2d &r) { + for (int i = r.rows().begin(); i != r.rows().end(); ++i) { + for (int j = r.cols().begin(); j != r.cols().end(); ++j) { + for (int bi = 0; bi < block_size; ++bi) { + for (int bj = 0; bj < block_size; ++bj) { + output[(i * block_size) + bi][(j * block_size) + bj] = c_blocks[i][j][bi][bj]; + } + } + } + } + }); +} + +bool RemizovKDenseMatrixMultiplicationCannonAlgorithmTbb::RunImpl() { + const auto ¶ms = GetInput(); + + int block_dim = std::get<0>(params); + const auto &source_a = std::get<1>(params); + const auto &source_b = std::get<2>(params); + + int matrix_size = static_cast(source_a.size()); + int blocks_per_dim = matrix_size / block_dim; + + std::vector>>> blocks_a( + blocks_per_dim, + std::vector>>( + blocks_per_dim, std::vector>(block_dim, std::vector(block_dim, 0.0)))); + + std::vector>>> blocks_b( + blocks_per_dim, + std::vector>>( + blocks_per_dim, std::vector>(block_dim, std::vector(block_dim, 0.0)))); + + std::vector>>> blocks_c( + blocks_per_dim, + std::vector>>( + blocks_per_dim, std::vector>(block_dim, std::vector(block_dim, 0.0)))); + + InitializeBlocks(source_a, source_b, blocks_a, blocks_b, block_dim, blocks_per_dim); + RunCannonCycle(blocks_a, blocks_b, blocks_c, block_dim, blocks_per_dim); + + std::vector> result(matrix_size, std::vector(matrix_size, 0.0)); + AssembleOutput(blocks_c, result, block_dim, blocks_per_dim); + + GetOutput() = std::move(result); + return true; +} + +bool RemizovKDenseMatrixMultiplicationCannonAlgorithmTbb::PostProcessingImpl() { + return true; +} + +} // namespace remizov_k_dense_matrix_multiplication_cannon_algorithm diff --git a/tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/tests/functional/main.cpp b/tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/tests/functional/main.cpp index c151fbc76..90d623baa 100644 --- a/tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/tests/functional/main.cpp +++ b/tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/tests/functional/main.cpp @@ -10,6 +10,7 @@ #include "remizov_k_dense_matrix_multiplication_cannon_algorithm/common/include/common.hpp" #include "remizov_k_dense_matrix_multiplication_cannon_algorithm/omp/include/ops_omp.hpp" #include "remizov_k_dense_matrix_multiplication_cannon_algorithm/seq/include/ops_seq.hpp" +#include "remizov_k_dense_matrix_multiplication_cannon_algorithm/tbb/include/ops_tbb.hpp" #include "util/include/func_test_util.hpp" #include "util/include/util.hpp" @@ -102,22 +103,22 @@ const std::array kTestCases = { std::vector>(4, std::vector(4, 1.0)), std::vector>(4, std::vector(4, 4.0)))}; +const auto kTestNameFunc = RemizovKDenseMatrixMultiplicationCannonAlgorithmFuncTests::PrintFuncTestName< + RemizovKDenseMatrixMultiplicationCannonAlgorithmFuncTests>; + +} // namespace + +namespace { + const auto kTestTasksList = std::tuple_cat(ppc::util::AddFuncTask( kTestCases, PPC_SETTINGS_remizov_k_dense_matrix_multiplication_cannon_algorithm)); const auto kGtestValues = ppc::util::ExpandToValues(kTestTasksList); -const auto kTestNameFunc = RemizovKDenseMatrixMultiplicationCannonAlgorithmFuncTests::PrintFuncTestName< - RemizovKDenseMatrixMultiplicationCannonAlgorithmFuncTests>; - INSTANTIATE_TEST_SUITE_P(CannonTests, RemizovKDenseMatrixMultiplicationCannonAlgorithmFuncTests, kGtestValues, kTestNameFunc); -} // namespace - -namespace { - const auto kTestTasksListOmp = std::tuple_cat(ppc::util::AddFuncTask( kTestCases, PPC_SETTINGS_remizov_k_dense_matrix_multiplication_cannon_algorithm)); @@ -127,6 +128,15 @@ const auto kGtestValuesOmp = ppc::util::ExpandToValues(kTestTasksListOmp); INSTANTIATE_TEST_SUITE_P(CannonTestsOmp, RemizovKDenseMatrixMultiplicationCannonAlgorithmFuncTests, kGtestValuesOmp, kTestNameFunc); +const auto kTestTasksListTbb = + std::tuple_cat(ppc::util::AddFuncTask( + kTestCases, PPC_SETTINGS_remizov_k_dense_matrix_multiplication_cannon_algorithm)); + +const auto kGtestValuesTbb = ppc::util::ExpandToValues(kTestTasksListTbb); + +INSTANTIATE_TEST_SUITE_P(CannonTestsTbb, RemizovKDenseMatrixMultiplicationCannonAlgorithmFuncTests, kGtestValuesTbb, + kTestNameFunc); + } // namespace } // namespace remizov_k_dense_matrix_multiplication_cannon_algorithm diff --git a/tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/tests/performance/main.cpp b/tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/tests/performance/main.cpp index 54577fb89..129e71fe0 100644 --- a/tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/tests/performance/main.cpp +++ b/tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/tests/performance/main.cpp @@ -8,6 +8,7 @@ #include "remizov_k_dense_matrix_multiplication_cannon_algorithm/common/include/common.hpp" #include "remizov_k_dense_matrix_multiplication_cannon_algorithm/omp/include/ops_omp.hpp" #include "remizov_k_dense_matrix_multiplication_cannon_algorithm/seq/include/ops_seq.hpp" +#include "remizov_k_dense_matrix_multiplication_cannon_algorithm/tbb/include/ops_tbb.hpp" #include "util/include/perf_test_util.hpp" namespace remizov_k_dense_matrix_multiplication_cannon_algorithm { @@ -88,4 +89,16 @@ INSTANTIATE_TEST_SUITE_P(PerfTestsOmp, RemizovKDenseMatrixMultiplicationCannonAl } // namespace +namespace { + +const auto kAllPerfTasksTbb = ppc::util::MakeAllPerfTasks( + PPC_SETTINGS_remizov_k_dense_matrix_multiplication_cannon_algorithm); + +const auto kGtestValuesTbb = ppc::util::TupleToGTestValues(kAllPerfTasksTbb); + +INSTANTIATE_TEST_SUITE_P(PerfTestsTbb, RemizovKDenseMatrixMultiplicationCannonAlgorithmPerfTests, kGtestValuesTbb, + kPerfTestName); + +} // namespace + } // namespace remizov_k_dense_matrix_multiplication_cannon_algorithm