Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ cmake --preset release && cmake --build --preset release

# CPU-only(无 GPU 环境,CI 使用此配置)
cmake -S . -B build-no-cuda -DSPMV_REQUIRE_CUDA=OFF && cmake --build build-no-cuda
ctest --test-dir build-no-cuda --output-on-failure

# 运行测试
ctest --preset default
Expand All @@ -110,7 +111,7 @@ ctest --preset default
find src include tests benchmarks -type f \( -name "*.cpp" -o -name "*.h" -o -name "*.cu" \) | xargs clang-format -i
```

> **CI 无 GPU**:需要 CUDA 设备的测试在 CI 中会跳过。`benchmarks/main.cu` 和 `pagerank.cu` 在无 GPU 时自动退出
> **CI 无 GPU**:CPU-only 配置会构建 core library + CPU 测试;需要 CUDA 设备的测试、基准程序和 PageRank CUDA 实现不会参与该配置

---

Expand Down
5 changes: 3 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
cmake
- S.- B build - no - cuda - DSPMV_REQUIRE_CUDA =
OFF && cmake-- build build - no -
cuda
cuda && ctest --test-dir build-no-cuda --output-on-failure

#测试
ctest-- preset default
Expand All @@ -62,7 +62,8 @@

##CI 特殊说明

- CI 无 GPU:`benchmarks / main.cu`、`src / pagerank.cu` 无 GPU 时自动退出
- CI 无 GPU:CPU-only 配置会构建 core library + CPU 测试;CUDA 测试 / benchmark /
PageRank CUDA 路径不参与该配置
- CI 使用 clang - format - 18 检查格式 - CPU - only 构建:`cmake - S.- B build
- no - cuda - DSPMV_REQUIRE_CUDA = OFF`

Expand Down
102 changes: 68 additions & 34 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,41 +8,60 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

if(NOT SPMV_REQUIRE_CUDA)
message(STATUS "SPMV_REQUIRE_CUDA=OFF; configuring in no-CUDA mode. No build targets will be generated.")
enable_testing()
return()
endif()

check_language(CUDA)
if(NOT CMAKE_CUDA_COMPILER)
message(FATAL_ERROR "CUDA toolkit with nvcc is required to build gpu-spmv. Set CUDAToolkit_ROOT or ensure nvcc is available on PATH, or configure with -DSPMV_REQUIRE_CUDA=OFF for a configure-only fallback.")
endif()

enable_language(CUDA)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)

# CUDA 架构设置
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 70 75 80 86 89 90)
set(SPMV_WITH_CUDA OFF)

if(SPMV_REQUIRE_CUDA)
check_language(CUDA)
if(NOT CMAKE_CUDA_COMPILER)
message(FATAL_ERROR "CUDA toolkit with nvcc is required to build gpu-spmv. Set CUDAToolkit_ROOT or ensure nvcc is available on PATH, or configure with -DSPMV_REQUIRE_CUDA=OFF for a CPU-only fallback.")
endif()

enable_language(CUDA)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
set(SPMV_WITH_CUDA ON)

# CUDA 架构设置
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 70 75 80 86 89 90)
endif()
else()
message(STATUS "SPMV_REQUIRE_CUDA=OFF; configuring in no-CUDA mode. Building core library and CPU tests only.")
endif()

# ---------- 主库 ----------
set(SPMV_SOURCES
src/csr_matrix.cpp
src/ell_matrix.cpp
src/internal/csr_device.cpp
src/internal/ell_device.cpp
src/spmv_cpu.cpp
src/spmv_context.cpp
src/internal/kernel_selector.cpp
src/spmv_kernels.cu
src/bandwidth.cpp
src/benchmark.cu
src/pagerank.cu
src/pagerank_common.cpp
)

if(SPMV_WITH_CUDA)
list(APPEND SPMV_SOURCES
src/spmv_kernels.cu
src/benchmark.cu
src/pagerank.cu
)
else()
list(APPEND SPMV_SOURCES
src/no_cuda_stubs.cpp
)
endif()

add_library(spmv STATIC ${SPMV_SOURCES})
target_include_directories(spmv PUBLIC ${CMAKE_SOURCE_DIR}/include)
set_target_properties(spmv PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
if(SPMV_WITH_CUDA)
target_compile_definitions(spmv PUBLIC SPMV_WITH_CUDA=1)
set_target_properties(spmv PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
else()
target_compile_definitions(spmv PUBLIC SPMV_WITH_CUDA=0)
endif()

# CUDA 编译选项
target_compile_options(spmv PRIVATE
Expand All @@ -64,33 +83,48 @@ enable_testing()

# ---------- 测试 ----------
set(TEST_SOURCES
tests/test_architecture.cpp
tests/test_common.cpp
tests/test_csr.cpp
tests/test_ell.cpp
tests/test_kernel_selector.cpp
tests/test_spmv.cu
tests/test_bandwidth.cu
tests/test_benchmark.cu
tests/test_pagerank.cu
tests/test_no_cuda.cpp
tests/test_pagerank_core.cpp
)

if(SPMV_WITH_CUDA)
list(APPEND TEST_SOURCES
tests/test_spmv.cu
tests/test_bandwidth.cu
tests/test_benchmark.cu
tests/test_pagerank.cu
)
endif()

add_executable(spmv_tests ${TEST_SOURCES})
target_link_libraries(spmv_tests PRIVATE spmv GTest::gtest_main)
set_target_properties(spmv_tests PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
if(SPMV_WITH_CUDA)
target_compile_definitions(spmv_tests PRIVATE SPMV_WITH_CUDA=1)
set_target_properties(spmv_tests PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
else()
target_compile_definitions(spmv_tests PRIVATE SPMV_WITH_CUDA=0)
endif()
target_compile_options(spmv_tests PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:-lineinfo>
)

include(GoogleTest)
gtest_discover_tests(spmv_tests)

# ---------- 基准测试 ----------
add_executable(spmv_benchmark benchmarks/main.cu)
target_link_libraries(spmv_benchmark PRIVATE spmv)
set_target_properties(spmv_benchmark PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_compile_options(spmv_benchmark PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:-lineinfo>
)
if(SPMV_WITH_CUDA)
# ---------- 基准测试 ----------
add_executable(spmv_benchmark benchmarks/main.cu)
target_link_libraries(spmv_benchmark PRIVATE spmv)
set_target_properties(spmv_benchmark PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_compile_options(spmv_benchmark PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:-lineinfo>
)
endif()

# ---------- Installation ----------
include(GNUInstallDirs)
Expand Down
2 changes: 1 addition & 1 deletion include/spmv/common.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef SPMV_COMMON_H
#define SPMV_COMMON_H

#include <cuda_runtime.h>
#include "cuda_compat.h"

#include <cstdint>
#include <cstdio>
Expand Down
2 changes: 1 addition & 1 deletion include/spmv/cuda_buffer.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef SPMV_CUDA_BUFFER_H
#define SPMV_CUDA_BUFFER_H

#include <cuda_runtime.h>
#include "cuda_compat.h"

#include <cstddef>
#include <utility>
Expand Down
150 changes: 150 additions & 0 deletions include/spmv/cuda_compat.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#ifndef SPMV_CUDA_COMPAT_H
#define SPMV_CUDA_COMPAT_H

#if defined(SPMV_WITH_CUDA) && SPMV_WITH_CUDA

#include <cuda_runtime.h>

#else

#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <cstring>

using cudaError_t = int;
using cudaTextureObject_t = std::uintptr_t;
using cudaEvent_t = void*;

constexpr cudaError_t cudaSuccess = 0;
constexpr cudaError_t cudaErrorMemoryAllocation = 2;
constexpr cudaError_t cudaErrorInvalidValue = 11;

enum cudaMemcpyKind {
cudaMemcpyHostToHost = 0,
cudaMemcpyHostToDevice = 1,
cudaMemcpyDeviceToHost = 2,
cudaMemcpyDeviceToDevice = 3
};

enum {
cudaResourceTypeLinear = 0,
cudaAddressModeClamp = 0,
cudaFilterModePoint = 0,
cudaReadModeElementType = 0
};

struct cudaChannelFormatDesc {
int x = 0;
int y = 0;
int z = 0;
int w = 0;
int f = 0;
};

template <typename T>
inline cudaChannelFormatDesc cudaCreateChannelDesc() {
return {};
}

struct cudaResourceDesc {
int resType = cudaResourceTypeLinear;
struct {
struct {
void* devPtr = nullptr;
cudaChannelFormatDesc desc{};
size_t sizeInBytes = 0;
} linear;
} res;
};

struct cudaTextureDesc {
int addressMode[3] = {cudaAddressModeClamp, cudaAddressModeClamp, cudaAddressModeClamp};
int filterMode = cudaFilterModePoint;
int readMode = cudaReadModeElementType;
int normalizedCoords = 0;
};

struct cudaDeviceProp {
int memoryClockRate = 0;
int memoryBusWidth = 0;
};

inline const char* cudaGetErrorString(cudaError_t err) {
switch (err) {
case cudaSuccess:
return "success";
case cudaErrorMemoryAllocation:
return "memory allocation failed";
case cudaErrorInvalidValue:
return "invalid value";
default:
return "cuda unavailable";
}
}

inline cudaError_t cudaMalloc(void** ptr, size_t size) {
if (!ptr) {
return cudaErrorInvalidValue;
}
*ptr = (size == 0) ? nullptr : std::malloc(size);
return (size == 0 || *ptr != nullptr) ? cudaSuccess : cudaErrorMemoryAllocation;
}

template <typename T>
inline cudaError_t cudaMalloc(T** ptr, size_t size) {
return cudaMalloc(reinterpret_cast<void**>(ptr), size);
}

inline cudaError_t cudaFree(void* ptr) {
std::free(ptr);
return cudaSuccess;
}

inline cudaError_t cudaMemcpy(void* dst, const void* src, size_t count, cudaMemcpyKind) {
if (count > 0 && (!dst || !src)) {
return cudaErrorInvalidValue;
}
if (count > 0) {
std::memcpy(dst, src, count);
}
return cudaSuccess;
}

inline cudaError_t cudaMemset(void* dst, int value, size_t count) {
if (count > 0 && !dst) {
return cudaErrorInvalidValue;
}
if (count > 0) {
std::memset(dst, value, count);
}
return cudaSuccess;
}

inline cudaError_t cudaCreateTextureObject(cudaTextureObject_t* tex,
const cudaResourceDesc*,
const cudaTextureDesc*,
const void*) {
static cudaTextureObject_t next_texture = 1;
if (!tex) {
return cudaErrorInvalidValue;
}
*tex = next_texture++;
return cudaSuccess;
}

inline cudaError_t cudaDestroyTextureObject(cudaTextureObject_t) {
return cudaSuccess;
}

inline cudaError_t cudaGetDeviceProperties(cudaDeviceProp* prop, int) {
if (!prop) {
return cudaErrorInvalidValue;
}
*prop = {};
return cudaSuccess;
}

#endif

#endif // SPMV_CUDA_COMPAT_H
13 changes: 4 additions & 9 deletions include/spmv/spmv.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,16 +129,11 @@ class SpMVExecutionContext {
/** @brief Query whether a texture object is currently bound. */
bool is_texture_bound() const { return tex_x_ != 0; }

/**
* @brief Prepare texture object for input vector x.
* @return 0 on success, negative error code on failure.
*
* Internal API; defined in spmv_kernels.cu.
*/
int prepare_texture(const float* d_x, size_t x_length, bool requested,
cudaTextureObject_t* tex_out, bool* use_texture_out);

private:
friend int spmv_prepare_texture(SpMVExecutionContext* context, const float* d_x, size_t x_length,
bool requested, cudaTextureObject_t* tex_out,
bool* use_texture_out);

cudaTextureObject_t tex_x_ = 0;
const float* cached_x_ = nullptr;
size_t cached_x_length_ = 0;
Expand Down
3 changes: 1 addition & 2 deletions src/bandwidth.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "spmv/bandwidth.h"

#include <cuda_runtime.h>
#include "spmv/cuda_compat.h"

#include <algorithm>
#include <mutex>
Expand Down
Loading
Loading