From a5d0a335476be1a4f8c33fd9982cef32f2c53661 Mon Sep 17 00:00:00 2001 From: expandingman Date: Sun, 1 Mar 2026 13:26:13 -0500 Subject: [PATCH] preliminary support for multi-arg map and mapreduce with breaking changes --- Project.toml | 2 +- src/arithmetics.jl | 24 +++++++++++------------ src/map.jl | 24 +++++++++++++---------- src/reduce/mapreduce_1d_cpu.jl | 3 ++- src/reduce/mapreduce_1d_gpu.jl | 3 ++- src/reduce/mapreduce_nd.jl | 3 ++- src/reduce/reduce.jl | 35 +++++++++++++++++++--------------- src/utils.jl | 10 ++++++++++ test/map.jl | 22 +++++++++++++++++++++ test/reduce.jl | 4 ++-- 10 files changed, 87 insertions(+), 43 deletions(-) diff --git a/Project.toml b/Project.toml index 2fccea8..b58dc4c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "AcceleratedKernels" uuid = "6a4ca0a5-0e36-4168-a932-d9be78d558f1" authors = ["Andrei-Leonard Nicusan and contributors"] -version = "0.4.3" +version = "0.5.0" [deps] ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197" diff --git a/src/arithmetics.jl b/src/arithmetics.jl index a14a9e2..e5cf278 100644 --- a/src/arithmetics.jl +++ b/src/arithmetics.jl @@ -47,8 +47,8 @@ function sum( kwargs... ) reduce( - +, src, backend; - init, + +, src; + backend, init, kwargs... ) end @@ -103,8 +103,8 @@ function prod( kwargs... ) reduce( - *, src, backend; - init, + *, src; + backend, init, kwargs... ) end @@ -159,8 +159,8 @@ function maximum( kwargs... ) reduce( - max, src, backend; - init, + max, src; + backend, init, kwargs... ) end @@ -215,8 +215,8 @@ function minimum( kwargs... ) reduce( - min, src, backend; - init, + min, src; + backend, init, kwargs... ) end @@ -277,8 +277,8 @@ function count( kwargs... ) mapreduce( - x -> x ? one(typeof(init)) : zero(typeof(init)), +, src, backend; - init, + x -> x ? one(typeof(init)) : zero(typeof(init)), +, src; + backend, init, neutral=zero(typeof(init)), kwargs... ) @@ -291,8 +291,8 @@ function count( kwargs... ) mapreduce( - x -> f(x) ? one(typeof(init)) : zero(typeof(init)), +, src, backend; - init, + x -> f(x) ? one(typeof(init)) : zero(typeof(init)), +, src; + backend, init, neutral=zero(typeof(init)), kwargs... ) diff --git a/src/map.jl b/src/map.jl index 10d690d..dc54e46 100644 --- a/src/map.jl +++ b/src/map.jl @@ -1,6 +1,7 @@ """ map!( - f, dst::AbstractArray, src::AbstractArray, backend::Backend=get_backend(src); + f, dst::AbstractArray, src::AbstractArray...; + backend::Backend=get_backend(src); # CPU settings max_tasks=Threads.nthreads(), @@ -32,15 +33,16 @@ end ``` """ function map!( - f, dst::AbstractArray, src::AbstractArray, backend::Backend=get_backend(src); + f, dst::AbstractArray, src::AbstractArray...; + backend::Backend=get_backend(src[1]), kwargs... ) - @argcheck length(dst) == length(src) + @argcheck lengthcheck(dst, src...) foreachindex( - src, backend; + dst, backend; kwargs... ) do idx - dst[idx] = f(src[idx]) + dst[idx] = f(indextuple(src, idx)...) end dst end @@ -48,7 +50,8 @@ end """ map( - f, src::AbstractArray, backend::Backend=get_backend(src); + f, src::AbstractArray; + backend::Backend=get_backend(src), # CPU settings max_tasks=Threads.nthreads(), @@ -63,12 +66,13 @@ changes the `eltype`, allocate `dst` separately and call [`map!`](@ref)). The CP settings are the same as for [`foreachindex`](@ref). """ function map( - f, src::AbstractArray, backend::Backend=get_backend(src); + f, src::AbstractArray...; + backend::Backend=get_backend(src[1]), kwargs... ) - dst = similar(src) + dst = similar(src[1]) map!( - f, dst, src, backend; - kwargs... + f, dst, src...; + backend, kwargs... ) end diff --git a/src/reduce/mapreduce_1d_cpu.jl b/src/reduce/mapreduce_1d_cpu.jl index 95a93f2..ed87409 100644 --- a/src/reduce/mapreduce_1d_cpu.jl +++ b/src/reduce/mapreduce_1d_cpu.jl @@ -1,5 +1,6 @@ function mapreduce_1d_cpu( - f, op, src::AbstractArray, backend::Backend; + f, op, src::AbstractArray; + backend::Backend, init, neutral, diff --git a/src/reduce/mapreduce_1d_gpu.jl b/src/reduce/mapreduce_1d_gpu.jl index 39e7c41..0b3994d 100644 --- a/src/reduce/mapreduce_1d_gpu.jl +++ b/src/reduce/mapreduce_1d_gpu.jl @@ -47,7 +47,8 @@ end function mapreduce_1d_gpu( - f, op, src::AbstractArray, backend::Backend; + f, op, src::AbstractArray; + backend::Backend, init, neutral, diff --git a/src/reduce/mapreduce_nd.jl b/src/reduce/mapreduce_nd.jl index 231d0dc..c69c468 100644 --- a/src/reduce/mapreduce_nd.jl +++ b/src/reduce/mapreduce_nd.jl @@ -1,5 +1,6 @@ function mapreduce_nd( - f, op, src::AbstractArray, backend::Backend; + f, op, src::AbstractArray; + backend::Backend, init, neutral=neutral_element(op, eltype(src)), dims::Int, diff --git a/src/reduce/reduce.jl b/src/reduce/reduce.jl index 230ded1..0a72bbf 100644 --- a/src/reduce/reduce.jl +++ b/src/reduce/reduce.jl @@ -7,7 +7,8 @@ include("mapreduce_nd.jl") """ reduce( - op, src::AbstractArray, backend::Backend=get_backend(src); + op, src::AbstractArray; + backend::Backend=get_backend(src), init, neutral=neutral_element(op, eltype(src)), dims::Union{Nothing, Int}=nothing, @@ -74,13 +75,14 @@ mcolsum = AK.reduce(+, m; init=zero(eltype(m)), dims=2) ``` """ function reduce( - op, src::AbstractArray, backend::Backend=get_backend(src); + op, src::AbstractArray; + backend::Backend=get_backend(src), init, kwargs... ) _mapreduce_impl( - identity, op, src, backend; - init, + identity, op, src; + backend, init, kwargs... ) end @@ -90,7 +92,8 @@ end """ mapreduce( - f, op, src::AbstractArray, backend::Backend=get_backend(src); + f, op, src::AbstractArray; + backend::Backend=get_backend(src), init, neutral=neutral_element(op, eltype(src)), dims::Union{Nothing, Int}=nothing, @@ -154,20 +157,22 @@ mcolsumsq = AK.mapreduce(f, +, m; init=zero(eltype(m)), dims=2) ``` """ function mapreduce( - f, op, src::AbstractArray, backend::Backend=get_backend(src); + f, op, src::AbstractArray; + backend::Backend=get_backend(src), init, kwargs... ) _mapreduce_impl( - f, op, src, backend; - init, + f, op, src; + backend, init, kwargs... ) end function _mapreduce_impl( - f, op, src::AbstractArray, backend::Backend; + f, op, src::AbstractArray; + backend::Backend, init, neutral=neutral_element(op, eltype(src)), dims::Union{Nothing, Int}=nothing, @@ -185,16 +190,16 @@ function _mapreduce_impl( if isnothing(dims) if use_gpu_algorithm(backend, prefer_threads) mapreduce_1d_gpu( - f, op, src, backend; - init, neutral, + f, op, src; + backend, init, neutral, max_tasks, min_elems, block_size, temp, switch_below ) else mapreduce_1d_cpu( - f, op, src, backend; - init, neutral, + f, op, src; + backend, init, neutral, max_tasks, min_elems, block_size, temp, switch_below @@ -202,8 +207,8 @@ function _mapreduce_impl( end else return mapreduce_nd( - f, op, src, backend; - init, neutral, dims, + f, op, src; + backend, init, neutral, dims, max_tasks, prefer_threads, min_elems, block_size, temp, diff --git a/src/utils.jl b/src/utils.jl index d644120..ca22b7d 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -126,3 +126,13 @@ module DocHelpers return Markdown.parse(join(lines[captured_istart:captured_iend], '\n')) end end + +@inline indextuple(arrays::Tuple, idx)::Tuple = Base.map(a -> a[idx], arrays) + +@inline function lengthcheck(dst::AbstractArray, src::AbstractArray...) + n = length(dst) + for a ∈ src + length(a) == n || return false + end + return true +end diff --git a/test/map.jl b/test/map.jl index 24fa8bd..30c004e 100644 --- a/test/map.jl +++ b/test/map.jl @@ -16,12 +16,23 @@ end @test y == map(i -> i^2, x) + z = ones(Int, 1000) + AK.map!(y, x, z; prefer_threads) do a, b + a^2 + b + end + @test y == map((a, b) -> a^2 + b, x, z) + x = rand(Float32, 1000) y = AK.map(x; prefer_threads, max_tasks=2, min_elems=100) do i i > 0.5 ? i : 0 end @test y == map(i -> i > 0.5 ? i : 0, x) + y = AK.map(x, z; prefer_threads, max_tasks=2, min_elems=100) do a, b + a > 0.5 ? a+b : -b + end + @test y == map((a, b) -> a > 0.5 ? a+b : -b, x, z) + x = rand(Float32, 1000) y = AK.map(x; prefer_threads, max_tasks=4, min_elems=500) do i i > 0.5 ? i : 0 @@ -45,12 +56,23 @@ end @test Array(y) == map(i -> i^2, 1:1000) + z = array_from_host(ones(Int, 1000)) + AK.map!(y, x, z; prefer_threads) do a, b + a^2 + b + end + @test Array(y) == map((a, b) -> a^2 + b, x, z) + x = array_from_host(rand(Float32, 1000)) y = AK.map(x; prefer_threads, block_size=64) do i i > 0.5 ? i : 0 end @test Array(y) == map(i -> i > 0.5 ? i : 0, Array(x)) + y = AK.map(x, z; prefer_threads, block_size=64) do a, b + a > 0.5 ? a+b : -b + end + @test Array(y) == map((a, b) -> a > 0.5 ? a+b : -b, x, z) + # Test that undefined kwargs are not accepted @test_throws MethodError AK.map(x -> x^2, x; prefer_threads, bad=:kwarg) end diff --git a/test/reduce.jl b/test/reduce.jl index 9fe8b5c..c684bbc 100644 --- a/test/reduce.jl +++ b/test/reduce.jl @@ -115,7 +115,7 @@ Base.zero(::Type{Point}) = Point(0.0f0, 0.0f0) for _ in 1:100 num_elems = rand(1:1000) v = 1:num_elems - s = AK.reduce(+, v, BACKEND; prefer_threads, init=Int32(0)) + s = AK.reduce(+, v; backend=BACKEND, prefer_threads, init=Int32(0)) vh = Array(v) @test s == reduce(+, vh) end @@ -337,7 +337,7 @@ end for _ in 1:100 num_elems = rand(1:1000) v = 1:num_elems - s = AK.mapreduce(abs, +, v, BACKEND; prefer_threads, init=Int32(0)) + s = AK.mapreduce(abs, +, v; backend=BACKEND, prefer_threads, init=Int32(0)) vh = Array(v) @test s == mapreduce(abs, +, vh) end