Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions include/spmv/common.h
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#ifndef SPMV_COMMON_H
#define SPMV_COMMON_H

#include "cuda_compat.h"

#include <cstdint>
#include <cstdio>
#include <stdexcept>
#include <string>

#include "cuda_compat.h"

namespace spmv {

/**
Expand Down
3 changes: 1 addition & 2 deletions include/spmv/cuda_buffer.h
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
#ifndef SPMV_CUDA_BUFFER_H
#define SPMV_CUDA_BUFFER_H

#include "cuda_compat.h"

#include <cstddef>
#include <utility>
#include <vector>

#include "common.h"
#include "cuda_compat.h"

namespace spmv {

Expand Down
6 changes: 2 additions & 4 deletions include/spmv/cuda_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,8 @@ inline cudaError_t cudaMemset(void* dst, int value, size_t count) {
return cudaSuccess;
}

inline cudaError_t cudaCreateTextureObject(cudaTextureObject_t* tex,
const cudaResourceDesc*,
const cudaTextureDesc*,
const void*) {
inline cudaError_t cudaCreateTextureObject(cudaTextureObject_t* tex, const cudaResourceDesc*,
const cudaTextureDesc*, const void*) {
static cudaTextureObject_t next_texture = 1;
if (!tex) {
return cudaErrorInvalidValue;
Expand Down
4 changes: 2 additions & 2 deletions include/spmv/spmv.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ class SpMVExecutionContext {
bool is_texture_bound() const { return tex_x_ != 0; }

private:
friend int spmv_prepare_texture(SpMVExecutionContext* context, const float* d_x, size_t x_length,
bool requested, cudaTextureObject_t* tex_out,
friend int spmv_prepare_texture(SpMVExecutionContext* context, const float* d_x,
size_t x_length, bool requested, cudaTextureObject_t* tex_out,
bool* use_texture_out);

cudaTextureObject_t tex_x_ = 0;
Expand Down
1 change: 1 addition & 0 deletions src/bandwidth.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "spmv/bandwidth.h"

#include "spmv/cuda_compat.h"

#include <algorithm>
Expand Down
11 changes: 8 additions & 3 deletions src/csr_matrix.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#include "internal/csr_device.h"
#include "spmv/csr_matrix.h"

#include <algorithm>
Expand All @@ -8,6 +7,8 @@
#include <fstream>
#include <new>

#include "internal/csr_device.h"

namespace spmv {

// Public API -----------------------------------------------------------------
Expand Down Expand Up @@ -132,9 +133,13 @@ float csr_get_element(const CSRMatrix* mat, int row, int col) {
return 0.0f;
}

int csr_to_gpu(CSRMatrix* mat) { return csr_upload_device_data(mat); }
int csr_to_gpu(CSRMatrix* mat) {
return csr_upload_device_data(mat);
}

int csr_from_gpu(CSRMatrix* mat) { return csr_download_device_data(mat); }
int csr_from_gpu(CSRMatrix* mat) {
return csr_download_device_data(mat);
}

int csr_serialize(const CSRMatrix* mat, const char* filename) {
if (!mat || !filename) {
Expand Down
11 changes: 8 additions & 3 deletions src/ell_matrix.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#include "internal/ell_device.h"
#include "spmv/ell_matrix.h"

#include <algorithm>
Expand All @@ -7,6 +6,8 @@
#include <fstream>
#include <new>

#include "internal/ell_device.h"

namespace spmv {

// Public API -----------------------------------------------------------------
Expand Down Expand Up @@ -200,9 +201,13 @@ float ell_get_element(const ELLMatrix* mat, int row, int col) {
return 0.0f;
}

int ell_to_gpu(ELLMatrix* mat) { return ell_upload_device_data(mat); }
int ell_to_gpu(ELLMatrix* mat) {
return ell_upload_device_data(mat);
}

int ell_from_gpu(ELLMatrix* mat) { return ell_download_device_data(mat); }
int ell_from_gpu(ELLMatrix* mat) {
return ell_download_device_data(mat);
}

int ell_serialize(const ELLMatrix* mat, const char* filename) {
if (!mat || !filename) {
Expand Down
10 changes: 6 additions & 4 deletions src/internal/csr_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ int csr_upload_device_data(CSRMatrix* mat) {
};

if (mat->nnz > 0) {
cudaError_t err = cudaMalloc(reinterpret_cast<void**>(&new_d_values), mat->nnz * sizeof(float));
cudaError_t err =
cudaMalloc(reinterpret_cast<void**>(&new_d_values), mat->nnz * sizeof(float));
if (err != cudaSuccess) {
cleanup();
return static_cast<int>(SpMVError::CUDA_MALLOC);
Expand All @@ -137,15 +138,16 @@ int csr_upload_device_data(CSRMatrix* mat) {
}
}

cudaError_t err = cudaMalloc(reinterpret_cast<void**>(&new_d_row_ptrs),
(mat->num_rows + 1) * sizeof(int));
cudaError_t err =
cudaMalloc(reinterpret_cast<void**>(&new_d_row_ptrs), (mat->num_rows + 1) * sizeof(int));
if (err != cudaSuccess) {
cleanup();
return static_cast<int>(SpMVError::CUDA_MALLOC);
}

if (mat->nnz > 0) {
err = cudaMemcpy(new_d_values, mat->values, mat->nnz * sizeof(float), cudaMemcpyHostToDevice);
err =
cudaMemcpy(new_d_values, mat->values, mat->nnz * sizeof(float), cudaMemcpyHostToDevice);
if (err != cudaSuccess) {
cleanup();
return static_cast<int>(SpMVError::CUDA_MEMCPY);
Expand Down
4 changes: 2 additions & 2 deletions src/internal/ell_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ int ell_download_device_data(ELLMatrix* mat) {
if (!mat->values || !mat->col_indices) {
return static_cast<int>(SpMVError::INVALID_ARGUMENT);
}
CUDA_CHECK_MEMCPY(
cudaMemcpy(mat->values, internal->d_values, size * sizeof(float), cudaMemcpyDeviceToHost));
CUDA_CHECK_MEMCPY(cudaMemcpy(mat->values, internal->d_values, size * sizeof(float),
cudaMemcpyDeviceToHost));
CUDA_CHECK_MEMCPY(cudaMemcpy(mat->col_indices, internal->d_col_indices, size * sizeof(int),
cudaMemcpyDeviceToHost));
}
Expand Down
3 changes: 1 addition & 2 deletions src/internal/kernel_selector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

namespace spmv {

SpMVConfig select_kernel(const CSRStats& stats, int num_cols,
const SpMVThresholds& thresholds) {
SpMVConfig select_kernel(const CSRStats& stats, int num_cols, const SpMVThresholds& thresholds) {
SpMVConfig config(SpMVConfig::SCALAR_CSR, DEFAULT_BLOCK_SIZE, false);

config.use_texture = (num_cols > thresholds.texture_cols_threshold);
Expand Down
3 changes: 1 addition & 2 deletions src/internal/kernel_selector.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ namespace spmv {
* immutable inputs (stats, dimensions, thresholds) and returns a deterministic
* SpMVConfig. No global state, no matrix internals.
*/
SpMVConfig select_kernel(const CSRStats& stats, int num_cols,
const SpMVThresholds& thresholds);
SpMVConfig select_kernel(const CSRStats& stats, int num_cols, const SpMVThresholds& thresholds);

} // namespace spmv

Expand Down
4 changes: 2 additions & 2 deletions src/spmv_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ int spmv_prepare_texture(SpMVExecutionContext* context, const float* d_x, size_t
return static_cast<int>(SpMVError::INVALID_ARGUMENT);
}

bool needs_rebuild = !context->texture_enabled_ || context->tex_x_ == 0 || context->cached_x_ != d_x ||
context->cached_x_length_ != x_length;
bool needs_rebuild = !context->texture_enabled_ || context->tex_x_ == 0 ||
context->cached_x_ != d_x || context->cached_x_length_ != x_length;
if (needs_rebuild) {
context->reset();

Expand Down
3 changes: 2 additions & 1 deletion src/spmv_cpu.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#include "internal/kernel_selector.h"
#include "spmv/spmv.h"

#include <cstring>

#include "internal/kernel_selector.h"

namespace spmv {

// Global thresholds for kernel selection (can be tuned per-GPU architecture)
Expand Down
17 changes: 9 additions & 8 deletions src/spmv_kernels.cu
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
#include "internal/csr_device.h"
#include "internal/ell_device.h"
#include "internal/texture_cache.h"
#include "spmv/bandwidth.h"
#include "spmv/spmv.h"

#include <cuda_runtime.h>

#include <chrono>

#include "internal/csr_device.h"
#include "internal/ell_device.h"
#include "internal/texture_cache.h"

namespace spmv {

// ---------- RAII helpers ----------
Expand Down Expand Up @@ -400,16 +401,16 @@ SpMVResult spmv_csr(const CSRMatrix* A, const float* d_x, float* d_y, const SpMV
return result;
}
spmv_csr_merge_path_kernel<<<num_blocks, block_size>>>(
A->num_rows, A->nnz, csr_d_row_ptrs(A), csr_d_col_indices(A), csr_d_values(A),
d_x, tex_x, use_texture, d_y);
A->num_rows, A->nnz, csr_d_row_ptrs(A), csr_d_col_indices(A), csr_d_values(A), d_x,
tex_x, use_texture, d_y);
break;
}
case SpMVConfig::VECTOR_CSR: {
int warps_per_block = block_size / 32;
int num_warps = (A->num_rows + warps_per_block - 1) / warps_per_block;
spmv_csr_vector_kernel<<<num_warps, block_size>>>(
A->num_rows, csr_d_row_ptrs(A), csr_d_col_indices(A), csr_d_values(A), d_x, tex_x,
use_texture, d_y);
spmv_csr_vector_kernel<<<num_warps, block_size>>>(A->num_rows, csr_d_row_ptrs(A),
csr_d_col_indices(A), csr_d_values(A),
d_x, tex_x, use_texture, d_y);
break;
}
case SpMVConfig::SCALAR_CSR:
Expand Down
Loading