From b1fc176d3bdf64cce0c519ebd901b1c55f706b30 Mon Sep 17 00:00:00 2001 From: termi-official Date: Thu, 19 Feb 2026 13:29:43 +0100 Subject: [PATCH 1/4] Add polyester for SpMV kernels --- src/SparseMatricesCSR.jl | 3 ++ src/SparseMatrixCSR.jl | 75 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/src/SparseMatricesCSR.jl b/src/SparseMatricesCSR.jl index 44044b4..4ea4ef3 100644 --- a/src/SparseMatricesCSR.jl +++ b/src/SparseMatricesCSR.jl @@ -14,6 +14,9 @@ export SymSparseMatrixCSR export sparsecsr, symsparsecsr export colvals, getBi, getoffset +import Polyester: @batch +import Atomix: @atomic + include("SparseMatrixCSR.jl") include("SymSparseMatrixCSR.jl") diff --git a/src/SparseMatrixCSR.jl b/src/SparseMatrixCSR.jl index 20dcc15..0b2f205 100644 --- a/src/SparseMatrixCSR.jl +++ b/src/SparseMatrixCSR.jl @@ -319,14 +319,22 @@ If `pred` not given, it counts the number of `true` values. count(pred, S::SparseMatrixCSR) = count(pred, nzvalview(S)) count(S::SparseMatrixCSR) = count(i->true, nzvalview(S)) -function mul!(y::AbstractVector,A::SparseMatrixCSR,v::AbstractVector, α::Number, β::Number) +function mul!(y::AbstractVector,A::Adjoint{T, <:SparseMatrixCSR},v::AbstractVector) where T + if Threads.nthreads() > 1 + tmul!(y, A, v) + else + smul!(y, A, v) + end +end + +function smul!(y::AbstractVector,A::SparseMatrixCSR,v::AbstractVector, α::Number, β::Number) A.n == size(v, 1) || throw(DimensionMismatch()) A.m == size(y, 1) || throw(DimensionMismatch()) if β != 1 β != 0 ? rmul!(y, β) : fill!(y, zero(eltype(y))) end o = getoffset(A) - for row = 1:size(y, 1) + @batch for row = 1:size(y, 1) @inbounds for nz in nzrange(A,row) col = A.colval[nz]+o y[row] += A.nzval[nz]*v[col]*α @@ -335,7 +343,31 @@ function mul!(y::AbstractVector,A::SparseMatrixCSR,v::AbstractVector, α::Number return y end -function mul!(y::AbstractVector,A::SparseMatrixCSR,v::AbstractVector) +function tmul!(y::AbstractVector,A::SparseMatrixCSR,v::AbstractVector, α::Number, β::Number) + A.n == size(v, 1) || throw(DimensionMismatch()) + A.m == size(y, 1) || throw(DimensionMismatch()) + if β != 1 + β != 0 ? rmul!(y, β) : fill!(y, zero(eltype(y))) + end + o = getoffset(A) + @batch for row = 1:size(y, 1) + @inbounds for nz in nzrange(A,row) + col = A.colval[nz]+o + y[row] += A.nzval[nz]*v[col]*α + end + end + return y +end + +function mul!(y::AbstractVector,A::SparseMatrixCSR,v::AbstractVector) where T + if Threads.nthreads() > 1 + tmul!(y, A, v) + else + smul!(y, A, v) + end +end + +function smul!(y::AbstractVector,A::SparseMatrixCSR,v::AbstractVector) A.n == size(v, 1) || throw(DimensionMismatch()) A.m == size(y, 1) || throw(DimensionMismatch()) fill!(y, zero(eltype(y))) @@ -349,9 +381,31 @@ function mul!(y::AbstractVector,A::SparseMatrixCSR,v::AbstractVector) return y end +function tmul!(y::AbstractVector,A::SparseMatrixCSR,v::AbstractVector) + A.n == size(v, 1) || throw(DimensionMismatch()) + A.m == size(y, 1) || throw(DimensionMismatch()) + fill!(y, zero(eltype(y))) + o = getoffset(A) + @batch for row = 1:size(y, 1) + @inbounds for nz in nzrange(A,row) + col = A.colval[nz]+o + y[row] += A.nzval[nz]*v[col] + end + end + return y +end + *(A::SparseMatrixCSR, v::Vector) = (y = similar(v,size(A,1));mul!(y,A,v)) function mul!(y::AbstractVector,A::Adjoint{T, <:SparseMatrixCSR},v::AbstractVector) where T + if Threads.nthreads() > 1 + tmul!(y, A, v) + else + smul!(y, A, v) + end +end + +function smul!(y::AbstractVector,A::Adjoint{T, <:SparseMatrixCSR},v::AbstractVector) where T P = A.parent P.n == size(y, 1) || throw(DimensionMismatch()) P.m == size(v, 1) || throw(DimensionMismatch()) @@ -366,6 +420,21 @@ function mul!(y::AbstractVector,A::Adjoint{T, <:SparseMatrixCSR},v::AbstractVect return y end +function tmul!(y::AbstractVector,A::Adjoint{T, <:SparseMatrixCSR},v::AbstractVector) where T + P = A.parent + P.n == size(y, 1) || throw(DimensionMismatch()) + P.m == size(v, 1) || throw(DimensionMismatch()) + fill!(y,zero(eltype(y))) + o = getoffset(P) + @batch for row = 1:size(P, 1) + for nz in nzrange(P,row) + col = P.colval[nz]+o + y[col] += P.nzval[nz]*v[row] + end + end + return y +end + *(A::Adjoint{T, <:SparseMatrixCSR}, v::AbstractVector) where T = (y = similar(v, promote_type(eltype(v),T), size(A,1)); mul!(y, A, v)) function show(io::IO, ::MIME"text/plain", S::SparseMatrixCSR) From c9d8a1025e1fb9c4a558c284c32e3815f4a26a28 Mon Sep 17 00:00:00 2001 From: termi-official Date: Thu, 19 Feb 2026 13:31:45 +0100 Subject: [PATCH 2/4] Add deps to Project --- Project.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Project.toml b/Project.toml index 6270345..8dd29b1 100644 --- a/Project.toml +++ b/Project.toml @@ -4,11 +4,15 @@ authors = ["Víctor Sande ", "Francesc Verdugo Date: Thu, 19 Feb 2026 13:37:43 +0100 Subject: [PATCH 3/4] Fix warnings --- src/SparseMatrixCSR.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/SparseMatrixCSR.jl b/src/SparseMatrixCSR.jl index 0b2f205..f01deac 100644 --- a/src/SparseMatrixCSR.jl +++ b/src/SparseMatrixCSR.jl @@ -319,11 +319,11 @@ If `pred` not given, it counts the number of `true` values. count(pred, S::SparseMatrixCSR) = count(pred, nzvalview(S)) count(S::SparseMatrixCSR) = count(i->true, nzvalview(S)) -function mul!(y::AbstractVector,A::Adjoint{T, <:SparseMatrixCSR},v::AbstractVector) where T +function mul!(y::AbstractVector,A::SparseMatrixCSR,v::AbstractVector, α::Number, β::Number) if Threads.nthreads() > 1 - tmul!(y, A, v) + tmul!(y, A, v, α, β) else - smul!(y, A, v) + smul!(y, A, v, α, β) end end @@ -359,7 +359,7 @@ function tmul!(y::AbstractVector,A::SparseMatrixCSR,v::AbstractVector, α::Numbe return y end -function mul!(y::AbstractVector,A::SparseMatrixCSR,v::AbstractVector) where T +function mul!(y::AbstractVector,A::SparseMatrixCSR,v::AbstractVector) if Threads.nthreads() > 1 tmul!(y, A, v) else @@ -397,7 +397,7 @@ end *(A::SparseMatrixCSR, v::Vector) = (y = similar(v,size(A,1));mul!(y,A,v)) -function mul!(y::AbstractVector,A::Adjoint{T, <:SparseMatrixCSR},v::AbstractVector) where T +function mul!(y::AbstractVector,A::Adjoint{<:Any, <:SparseMatrixCSR},v::AbstractVector) if Threads.nthreads() > 1 tmul!(y, A, v) else @@ -405,7 +405,7 @@ function mul!(y::AbstractVector,A::Adjoint{T, <:SparseMatrixCSR},v::AbstractVect end end -function smul!(y::AbstractVector,A::Adjoint{T, <:SparseMatrixCSR},v::AbstractVector) where T +function smul!(y::AbstractVector,A::Adjoint{<:Any, <:SparseMatrixCSR},v::AbstractVector) P = A.parent P.n == size(y, 1) || throw(DimensionMismatch()) P.m == size(v, 1) || throw(DimensionMismatch()) @@ -420,7 +420,7 @@ function smul!(y::AbstractVector,A::Adjoint{T, <:SparseMatrixCSR},v::AbstractVec return y end -function tmul!(y::AbstractVector,A::Adjoint{T, <:SparseMatrixCSR},v::AbstractVector) where T +function tmul!(y::AbstractVector,A::Adjoint{<:Any, <:SparseMatrixCSR},v::AbstractVector) P = A.parent P.n == size(y, 1) || throw(DimensionMismatch()) P.m == size(v, 1) || throw(DimensionMismatch()) From c364a1225e9e7ec9332139e0c0db3151c90b06fa Mon Sep 17 00:00:00 2001 From: termi-official Date: Fri, 20 Feb 2026 18:49:52 +0100 Subject: [PATCH 4/4] Add threaded CI --- .github/workflows/ci.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index acbd16c..9102a31 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,6 +31,35 @@ jobs: file: lcov.info verbose: true + threaded-test: + name: Test Threaded ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} + runs-on: ${{ matrix.os }} + env: + JULIA_NUM_THREADS: 2 + strategy: + fail-fast: false + matrix: + version: + - '1.10' + os: + - ubuntu-latest + arch: + - x64 + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/cache@v2 + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v5 + with: + file: lcov.info + verbose: true + docs: name: Documentation permissions: