Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
25365e0
seq version init commit
AlKurpiakov Feb 26, 2026
5aaa9f3
seq version init commit
AlKurpiakov Feb 26, 2026
6d67784
fix clang-format
AlKurpiakov Feb 26, 2026
cf2e469
fix clang-tidy
AlKurpiakov Feb 26, 2026
71a3627
fix clang-tidy
AlKurpiakov Feb 26, 2026
7b4e26f
fix clang-tidy
AlKurpiakov Feb 26, 2026
a114548
fix clang tidy
AlKurpiakov Feb 26, 2026
1fff801
fix
AlKurpiakov Feb 26, 2026
b337aca
fix
AlKurpiakov Feb 26, 2026
827a0a4
fix
AlKurpiakov Feb 26, 2026
40c0bcf
fix
AlKurpiakov Feb 26, 2026
15cf335
Changes to be committed:
AlKurpiakov Apr 13, 2026
45bdf7a
Changes to be committed:
AlKurpiakov Apr 13, 2026
b0d58cf
Merge branch 'master' into kurpiakov_a_sp_comp_mat_mul_tbb
AlKurpiakov Apr 13, 2026
8d82484
fix clang-format
AlKurpiakov Apr 13, 2026
1445926
Update main.cpp
AlKurpiakov Apr 13, 2026
a7cdb51
Update main.cpp
AlKurpiakov Apr 14, 2026
ac77f0c
Update main.cpp
AlKurpiakov Apr 14, 2026
f581635
Update common.hpp
AlKurpiakov Apr 14, 2026
c13076d
implement stl
AlKurpiakov Apr 14, 2026
7c9325d
Update ops_tbb.cpp
AlKurpiakov Apr 14, 2026
27e83f8
Changes to be committed:
AlKurpiakov Apr 14, 2026
4c46cab
Changes to be committed:
AlKurpiakov Apr 14, 2026
a4ebd62
Merge branch 'kurpiakov_a_sp_comp_mat_mul_tbb' into kurpiakov_a_sp_co…
AlKurpiakov Apr 14, 2026
775cf26
Update main.cpp
AlKurpiakov Apr 14, 2026
10c9346
Update main.cpp
AlKurpiakov Apr 14, 2026
67232e4
implement stl+mpi version
AlKurpiakov Apr 14, 2026
74d5fc7
Update ops_stl.cpp
AlKurpiakov Apr 14, 2026
87fc416
Update main.cpp
AlKurpiakov Apr 14, 2026
455e2fc
Changes to be committed:
AlKurpiakov Apr 14, 2026
182896a
Merge branch 'kurpiakov_a_sp_comp_mat_mul_stl' into kurpiakov_a_sp_co…
AlKurpiakov Apr 14, 2026
66a7637
Update ops_all.cpp
AlKurpiakov Apr 16, 2026
23c66d5
Update main.cpp
AlKurpiakov Apr 16, 2026
cdc38bf
Update main.cpp
AlKurpiakov Apr 16, 2026
3466024
Changes to be committed:
AlKurpiakov Apr 16, 2026
9b09252
Changes to be committed:
AlKurpiakov Apr 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
22 changes: 22 additions & 0 deletions tasks/kurpiakov_a_sp_comp_mat_mul/all/include/ops_all.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#pragma once

#include "kurpiakov_a_sp_comp_mat_mul/common/include/common.hpp"
#include "task/include/task.hpp"

namespace kurpiakov_a_sp_comp_mat_mul {

class KurpiakovACRSMatMulALL : public BaseTask {
public:
static constexpr ppc::task::TypeOfTask GetStaticTypeOfTask() {
return ppc::task::TypeOfTask::kALL;
}
explicit KurpiakovACRSMatMulALL(const InType &in);

private:
bool ValidationImpl() override;
bool PreProcessingImpl() override;
bool RunImpl() override;
bool PostProcessingImpl() override;
};

} // namespace kurpiakov_a_sp_comp_mat_mul
248 changes: 248 additions & 0 deletions tasks/kurpiakov_a_sp_comp_mat_mul/all/src/ops_all.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
#include "kurpiakov_a_sp_comp_mat_mul/all/include/ops_all.hpp"

#include <mpi.h>

#include <algorithm>
#include <atomic>
#include <cstddef>
#include <thread>
#include <utility>
#include <vector>

#include "kurpiakov_a_sp_comp_mat_mul/common/include/common.hpp"
#include "util/include/util.hpp"

namespace kurpiakov_a_sp_comp_mat_mul {

namespace {

bool ValidateCSR(const SparseMatrix &m) {
if (m.rows <= 0 || m.cols <= 0) {
return false;
}
if (static_cast<int>(m.row_ptr.size()) != m.rows + 1) {
return false;
}
if (m.row_ptr[0] != 0) {
return false;
}
if (std::cmp_not_equal(m.values.size(), m.row_ptr[m.rows])) {
return false;
}
if (m.col_indices.size() != m.values.size()) {
return false;
}
for (int i = 0; i < m.rows; ++i) {
for (int j = m.row_ptr[i]; j < m.row_ptr[i + 1]; ++j) {
if (m.col_indices[j] < 0 || m.col_indices[j] >= m.cols) {
return false;
}
}
}
return true;
}

std::pair<int, int> GetRowRange(int total_rows, int rank, int size) {
const int begin = (total_rows * rank) / size;
const int end = (total_rows * (rank + 1)) / size;
return {begin, end};
}

void MultiplySingleRow(const SparseMatrix &a, const SparseMatrix &b, int row_idx, std::vector<ComplexD> &row_acc,
std::vector<char> &row_used, std::vector<int> &used_cols, std::vector<ComplexD> &out_values,
std::vector<int> &out_cols) {
used_cols.clear();

for (int ja = a.row_ptr[row_idx]; ja < a.row_ptr[row_idx + 1]; ++ja) {
const int ka = a.col_indices[ja];
const ComplexD &a_val = a.values[ja];

for (int jb = b.row_ptr[ka]; jb < b.row_ptr[ka + 1]; ++jb) {
const int cb = b.col_indices[jb];
const ComplexD &b_val = b.values[jb];

if (row_used[cb] == 0) {
row_used[cb] = 1;
row_acc[cb] = ComplexD();
used_cols.push_back(cb);
}

row_acc[cb] += a_val * b_val;
}
}

std::ranges::sort(used_cols);

out_values.clear();
out_cols.clear();
out_values.reserve(used_cols.size());
out_cols.reserve(used_cols.size());

for (int col : used_cols) {
out_values.push_back(row_acc[col]);
out_cols.push_back(col);
row_used[col] = 0;
}
}

void ComputeLocalRowsThreads(const SparseMatrix &a, const SparseMatrix &b, int row_begin, int row_end,
std::vector<std::vector<ComplexD>> &local_values,
std::vector<std::vector<int>> &local_cols) {
const int local_rows = row_end - row_begin;
const int requested_threads = ppc::util::GetNumThreads();
const int max_threads = std::max(1, local_rows);
const int num_threads = std::max(1, std::min(requested_threads, max_threads));

std::atomic<int> next_row(row_begin);
std::vector<std::thread> workers;
workers.reserve(num_threads);

for (int tid = 0; tid < num_threads; ++tid) {
workers.emplace_back([&]() {
std::vector<ComplexD> row_acc(b.cols);
std::vector<char> row_used(b.cols, 0);
std::vector<int> used_cols;

while (true) {
const int row = next_row.fetch_add(1, std::memory_order_relaxed);
if (row >= row_end) {
break;
}

const int local_idx = row - row_begin;
MultiplySingleRow(a, b, row, row_acc, row_used, used_cols, local_values[local_idx], local_cols[local_idx]);
}
});
}

for (auto &worker : workers) {
worker.join();
}
}

} // namespace

KurpiakovACRSMatMulALL::KurpiakovACRSMatMulALL(const InType &in) {
SetTypeOfTask(GetStaticTypeOfTask());
GetInput() = in;
GetOutput() = SparseMatrix();
}

bool KurpiakovACRSMatMulALL::ValidationImpl() {
const auto &[a, b] = GetInput();

if (!ValidateCSR(a) || !ValidateCSR(b)) {
return false;
}

return a.cols == b.rows;
}

bool KurpiakovACRSMatMulALL::PreProcessingImpl() {
return true;
}

bool KurpiakovACRSMatMulALL::RunImpl() {
const auto &[a, b] = GetInput();
const int rows = a.rows;
const int cols = b.cols;

int rank = 0;
int world_size = 1;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &world_size);

const auto [row_begin, row_end] = GetRowRange(rows, rank, world_size);
const int local_rows = row_end - row_begin;

std::vector<std::vector<ComplexD>> local_values(local_rows);
std::vector<std::vector<int>> local_cols(local_rows);

ComputeLocalRowsThreads(a, b, row_begin, row_end, local_values, local_cols);

std::vector<int> local_row_nnz(rows, 0);
for (int local_i = 0; local_i < local_rows; ++local_i) {
local_row_nnz[row_begin + local_i] = static_cast<int>(local_values[local_i].size());
}

std::vector<int> global_row_nnz(rows, 0);
MPI_Allreduce(local_row_nnz.data(), global_row_nnz.data(), rows, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

std::vector<int> global_row_ptr(rows + 1, 0);
for (int i = 0; i < rows; ++i) {
global_row_ptr[i + 1] = global_row_ptr[i] + global_row_nnz[i];
}

const int total_nnz = global_row_ptr[rows];
const int local_nnz = global_row_ptr[row_end] - global_row_ptr[row_begin];

std::vector<double> local_re(local_nnz);
std::vector<double> local_im(local_nnz);
std::vector<int> local_col_indices(local_nnz);

int pos = 0;
for (int local_i = 0; local_i < local_rows; ++local_i) {
const auto &vals = local_values[local_i];
const auto &cols_row = local_cols[local_i];

for (size_t j = 0; j < vals.size(); ++j) {
local_re[pos] = vals[j].re;
local_im[pos] = vals[j].im;
local_col_indices[pos] = cols_row[j];
++pos;
}
}

std::vector<int> recv_counts(world_size, 0);
MPI_Allgather(&local_nnz, 1, MPI_INT, recv_counts.data(), 1, MPI_INT, MPI_COMM_WORLD);

std::vector<int> recv_displs(world_size, 0);
for (int rec = 1; rec < world_size; ++rec) {
recv_displs[rec] = recv_displs[rec - 1] + recv_counts[rec - 1];
}

std::vector<double> global_re;
std::vector<double> global_im;
std::vector<int> global_col_indices;

if (rank == 0) {
global_re.resize(total_nnz);
global_im.resize(total_nnz);
global_col_indices.resize(total_nnz);
}

MPI_Gatherv(local_re.data(), local_nnz, MPI_DOUBLE, global_re.data(), recv_counts.data(), recv_displs.data(),
MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Gatherv(local_im.data(), local_nnz, MPI_DOUBLE, global_im.data(), recv_counts.data(), recv_displs.data(),
MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Gatherv(local_col_indices.data(), local_nnz, MPI_INT, global_col_indices.data(), recv_counts.data(),
recv_displs.data(), MPI_INT, 0, MPI_COMM_WORLD);

if (rank != 0) {
global_re.resize(total_nnz);
global_im.resize(total_nnz);
global_col_indices.resize(total_nnz);
}

MPI_Bcast(global_re.data(), total_nnz, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(global_im.data(), total_nnz, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(global_col_indices.data(), total_nnz, MPI_INT, 0, MPI_COMM_WORLD);

SparseMatrix result(rows, cols);
result.row_ptr = std::move(global_row_ptr);
result.col_indices = std::move(global_col_indices);
result.values.resize(static_cast<size_t>(total_nnz));

for (int i = 0; i < total_nnz; ++i) {
result.values[static_cast<size_t>(i)] = ComplexD(global_re[i], global_im[i]);
}

GetOutput() = std::move(result);
return true;
}

bool KurpiakovACRSMatMulALL::PostProcessingImpl() {
return true;
}

} // namespace kurpiakov_a_sp_comp_mat_mul
82 changes: 82 additions & 0 deletions tasks/kurpiakov_a_sp_comp_mat_mul/common/include/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,88 @@ class CSRMatrix {
return result;
}

private:
static void ProcessRow(int i, const CSRMatrix &self, const CSRMatrix &other, std::vector<T> &acc_re,
std::vector<T> &acc_im, std::vector<bool> &local_used,
std::vector<std::vector<Complex<T>>> &row_values,
std::vector<std::vector<int>> &row_col_indices) {
std::vector<int> used_cols;
used_cols.reserve(other.cols);

for (int ja = self.row_ptr[i]; ja < self.row_ptr[i + 1]; ++ja) {
const int ka = self.col_indices[ja];
const T a_re = self.values[ja].re;
const T a_im = self.values[ja].im;
const int jb_start = other.row_ptr[ka];
const int jb_end = other.row_ptr[ka + 1];

for (int jb = jb_start; jb < jb_end; ++jb) {
const int cb = other.col_indices[jb];
if (!local_used[cb]) {
local_used[cb] = true;
acc_re[cb] = T(0);
acc_im[cb] = T(0);
used_cols.push_back(cb);
}
acc_re[cb] += (a_re * other.values[jb].re) - (a_im * other.values[jb].im);
acc_im[cb] += (a_re * other.values[jb].im) + (a_im * other.values[jb].re);
}
}

std::ranges::sort(used_cols);
row_values[i].reserve(used_cols.size());
row_col_indices[i].reserve(used_cols.size());

for (const int c : used_cols) {
row_values[i].emplace_back(acc_re[c], acc_im[c]);
row_col_indices[i].push_back(c);
local_used[c] = false;
}
}

public:
[[nodiscard]] CSRMatrix OMPMultiply(const CSRMatrix &other) const {
if (cols != other.rows) {
return {};
}

CSRMatrix result(rows, other.cols);
std::vector<std::vector<Complex<T>>> row_values(rows);
std::vector<std::vector<int>> row_col_indices(rows);

const CSRMatrix &self = *this;
const int nrows = rows;
const int ncols = other.cols;

#pragma omp parallel default(none) shared(self, other, row_values, row_col_indices, nrows, ncols)
{
std::vector<T> acc_re(static_cast<std::size_t>(ncols));
std::vector<T> acc_im(static_cast<std::size_t>(ncols));
std::vector<bool> local_used(static_cast<std::size_t>(ncols), false);

#pragma omp for schedule(dynamic)
for (int i = 0; i < nrows; ++i) {
ProcessRow(i, self, other, acc_re, acc_im, local_used, row_values, row_col_indices);
}
}

int total_nnz = 0;
for (int i = 0; i < rows; ++i) {
total_nnz += static_cast<int>(row_values[i].size());
}

result.values.reserve(static_cast<std::size_t>(total_nnz));
result.col_indices.reserve(static_cast<std::size_t>(total_nnz));

for (int i = 0; i < rows; ++i) {
result.values.insert(result.values.end(), row_values[i].begin(), row_values[i].end());
result.col_indices.insert(result.col_indices.end(), row_col_indices[i].begin(), row_col_indices[i].end());
result.row_ptr[i + 1] = static_cast<int>(result.values.size());
}

return result;
}

[[nodiscard]] std::vector<Complex<T>> ToDense() const {
std::vector<Complex<T>> dense(rows * cols);
for (int i = 0; i < rows; ++i) {
Expand Down
22 changes: 22 additions & 0 deletions tasks/kurpiakov_a_sp_comp_mat_mul/omp/include/ops_omp.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#pragma once

#include "kurpiakov_a_sp_comp_mat_mul/common/include/common.hpp"
#include "task/include/task.hpp"

namespace kurpiakov_a_sp_comp_mat_mul {

class KurpiakovACRSMatMulOMP : public BaseTask {
public:
static constexpr ppc::task::TypeOfTask GetStaticTypeOfTask() {
return ppc::task::TypeOfTask::kOMP;
}
explicit KurpiakovACRSMatMulOMP(const InType &in);

private:
bool ValidationImpl() override;
bool PreProcessingImpl() override;
bool RunImpl() override;
bool PostProcessingImpl() override;
};

} // namespace kurpiakov_a_sp_comp_mat_mul
Loading
Loading