From 7e4fec1931e35461c4635797a6279f6e5b64ad08 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Wed, 15 Oct 2025 21:24:28 +0200
Subject: [PATCH 01/23] Use ParallelTestRunner with a custom TestRecord

---
 test/Project.toml          |   5 +
 test/kernelabstractions.jl |  14 +
 test/runtests.jl           | 546 ++++++++++++-------------------------
 test/setup.jl              | 137 +---------
 4 files changed, 204 insertions(+), 498 deletions(-)

diff --git a/test/Project.toml b/test/Project.toml
index 455dc2dd..75779a72 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -9,6 +9,7 @@ JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 OpenCL = "08131aa3-fb12-5dee-8b74-c09406e224a2"
+ParallelTestRunner = "d3525ed8-44d0-4b2c-a655-542cee43accc"
 Preferences = "21216c6a-2e73-6563-6e65-726566657250"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
@@ -26,3 +27,7 @@ pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd"
 
 [compat]
 pocl_jll = "7.0"
+ParallelTestRunner = "1.0.1"
+
+[sources]
+ParallelTestRunner = {url="https://github.com/JuliaTesting/ParallelTestRunner.jl", rev="vc/custom_testrecord"}
diff --git a/test/kernelabstractions.jl b/test/kernelabstractions.jl
index debf5dec..97e54154 100644
--- a/test/kernelabstractions.jl
+++ b/test/kernelabstractions.jl
@@ -1,3 +1,17 @@
+# KernelAbstractions has a testsuite that isn't part of the main package.
+# Include it directly.
+
+const KATestSuite = let
+    mod = @eval module $(gensym())
+        using ..Test
+        import KernelAbstractions
+        kernelabstractions = pathof(KernelAbstractions)
+        kernelabstractions_root = dirname(dirname(kernelabstractions))
+        include(joinpath(kernelabstractions_root, "test", "testsuite.jl"))
+    end
+    mod.Testsuite
+end
+
 skip_tests=Set([
     "sparse",
     "Convert", # Need to opt out of i128
diff --git a/test/runtests.jl b/test/runtests.jl
index 6fd437f2..a57dd9cf 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,413 +1,211 @@
-using Distributed
-using Dates
-import REPL
-using Printf: @sprintf
-using Base.Filesystem: path_separator
+using ParallelTestRunner
 using Preferences
+import OpenCL, pocl_jll
+import Test
 
-# parse some command-line arguments
-function extract_flag!(args, flag, default=nothing)
-    for f in args
-        if startswith(f, flag)
-            # Check if it's just `--flag` or if it's `--flag=foo`
-            if f != flag
-                val = split(f, '=')[2]
-                if default !== nothing && !(typeof(default) <: AbstractString)
-                  val = parse(typeof(default), val)
-                end
-            else
-                val = default
-            end
+@info "System information:\n" * sprint(io->OpenCL.versioninfo(io))
 
-            # Drop this value from our args
-            filter!(x -> x != f, args)
-            return (true, val)
-        end
+## --platform selector
+do_platform, platform_filter = ParallelTestRunner.extract_flag!(ARGS, "--platform", nothing)
+
+custom_record_init = quote
+    struct OpenCLTestRecord <: ParallelTestRunner.AbstractTestRecord
+        # TODO: Would it be better to wrap "ParallelTestRunner.TestRecord "
+        value::Any          # AbstractTestSet or TestSetException
+        output::String      # captured stdout/stderr
+
+        # stats
+        time::Float64
+        bytes::UInt64
+        gctime::Float64
+        rss::UInt64
+    end
+    function ParallelTestRunner.memory_usage(rec::OpenCLTestRecord)
+        return rec.rss
+    end
+    function ParallelTestRunner.test_IOContext(::Type{OpenCLTestRecord}, args...)
+        return ParallelTestRunner.test_IOContext(ParallelTestRunner.TestRecord, args...)
     end
-    return (false, default)
-end
-do_help, _ = extract_flag!(ARGS, "--help")
-if do_help
-    println("""
-        Usage: runtests.jl [--help] [--list] [--jobs=N] [TESTS...]
-
-               --help             Show this text.
-               --list             List all available tests.
-               --verbose          Print more information during testing.
-               --quickfail        Fail the entire run as soon as a single test errored.
-               --jobs=N           Launch `N` processes to perform tests (default: Sys.CPU_THREADS).
-               --platform=NAME    Run tests on the platform named `NAME` (default: all platforms).
 
-               Remaining arguments filter the tests that will be executed.""")
-    exit(0)
-end
-_, jobs = extract_flag!(ARGS, "--jobs", Sys.CPU_THREADS)
-do_verbose, _ = extract_flag!(ARGS, "--verbose")
-do_quickfail, _ = extract_flag!(ARGS, "--quickfail")
+    const targets = []
+    using OpenCL, IOCapture
+
+    function ParallelTestRunner.runtest(::Type{OpenCLTestRecord}, f, name, init_code, color, (; platform_filter))
+        if isempty(targets)
+            for platform in cl.platforms(),
+                device in cl.devices(platform)
+                if platform_filter !== nothing
+                    # filter on the name or vendor
+                    names = lowercase.([platform.name, platform.vendor])
+                    if !any(contains(platform_filter), names)
+                        continue
+                    end
+                end
+                push!(targets, (; platform, device))
+            end
+            if isempty(targets)
+                if platform_filter === nothing
+                    throw(ArgumentError("No OpenCL platforms found"))
+                else
+                    throw(ArgumentError("No OpenCL platforms found matching $platform_filter"))
+                end
+            end
+        end
 
-include("setup.jl")     # make sure everything is precompiled
-@info "System information:\n" * sprint(io->OpenCL.versioninfo(io))
+        function inner()
+            # generate a temporary module to execute the tests in
+            mod = Core.eval(Main, Expr(:module, true, gensym(name)), Expr(:block))
+            @eval(mod, import ParallelTestRunner: Test, Random)
+            @eval(mod, using .Test, .Random)
+
+            Core.eval(mod, init_code)
+
+            # some tests require native execution capabilities
+            requires_il = name in ["atomics", "execution", "intrinsics", "kernelabstractions"] ||
+                          startswith(name, "gpuarrays/")
+
+            data = @eval mod begin
+                GC.gc(true)
+                Random.seed!(1)
+                OpenCL.allowscalar(false)
+
+                mktemp() do path, io
+                    stats = redirect_stdio(stdout=io, stderr=io) do
+                        @timed try
+                            # @testset $(name) begin
+                                @testset "\$(device.name)" for (; platform, device) in $targets
+                                    cl.platform!(platform)
+                                    cl.device!(device)
+
+                                    if !$requires_il || "cl_khr_il_program" in device.extensions
+                                        $f
+                                    end
+                                end
+                            # end
+                        catch err
+                            isa(err, Test.TestSetException) || rethrow()
+
+                            # return the error to package it into a TestRecord
+                            err
+                        end
+                    end
+                    close(io)
+                    output = read(path, String)
+                    (; testset=stats.value, output, stats.time, stats.bytes, stats.gctime)
 
-@info "Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable."
+                end
+            end
 
-# choose tests
-const tests = []
-const test_runners = Dict()
-## files in the test folder
-for (rootpath, dirs, files) in walkdir(@__DIR__)
-  # find Julia files
-  filter!(files) do file
-    endswith(file, ".jl") && file !== "setup.jl" && file !== "runtests.jl"
-  end
-  isempty(files) && continue
+            # process results
+            rss = Sys.maxrss()
+            record = OpenCLTestRecord(data..., rss)
 
-  # strip extension
-  files = map(files) do file
-    file[1:end-3]
-  end
+            GC.gc(true)
+            return record
+        end
 
-  # prepend subdir
-  subdir = relpath(rootpath, @__DIR__)
-  if subdir != "."
-    files = map(files) do file
-      joinpath(subdir, file)
+        @static if VERSION >= v"1.13.0-DEV.1044"
+            @with Test.TESTSET_PRINT_ENABLE => false begin
+                inner()
+            end
+        else
+            old_print_setting = Test.TESTSET_PRINT_ENABLE[]
+            Test.TESTSET_PRINT_ENABLE[] = false
+            try
+                inner()
+            finally
+                Test.TESTSET_PRINT_ENABLE[] = old_print_setting
+            end
+        end
     end
-  end
-
-  # unify path separators
-  files = map(files) do file
-    replace(file, path_separator => '/')
-  end
-
-  append!(tests, files)
-  for file in files
-    test_runners[file] = ()->include("$(@__DIR__)/$file.jl")
-  end
+end # quote
+eval(custom_record_init)
+
+# register custom tests that do not correspond to files in the test directory
+custom_tests = Dict{String, Expr}()
+
+# GPUArrays has a testsuite that isn't part of the main package.
+# Include it directly.
+const GPUArraysTestSuite = let
+    mod = @eval module $(gensym())
+        using ..Test
+        import GPUArrays
+        gpuarrays = pathof(GPUArrays)
+        gpuarrays_root = dirname(dirname(gpuarrays))
+        include(joinpath(gpuarrays_root, "test", "testsuite.jl"))
+    end
+    mod.TestSuite
 end
-sort!(tests; by=(file)->stat("$(@__DIR__)/$file.jl").size, rev=true)
-## GPUArrays testsuite
+
 for name in keys(GPUArraysTestSuite.tests)
-    push!(tests, "gpuarrays/$name")
-    test_runners["gpuarrays/$name"] = ()->GPUArraysTestSuite.tests[name](CLArray)
+    custom_tests["GPUArraysTestSuite/$name"] = :(GPUArraysTestSuite.tests[$name](CLArray))
 end
-## finalize
-unique!(tests)
 
-# parse some more command-line arguments
-## --list to list all available tests
-do_list, _ = extract_flag!(ARGS, "--list")
-if do_list
-    println("Available tests:")
-    for test in sort(tests)
-        println(" - $test")
-    end
-    exit(0)
-end
-## --platform selector
-do_platform, platform = extract_flag!(ARGS, "--platform", nothing)
-## no options should remain
-optlike_args = filter(startswith("-"), ARGS)
-if !isempty(optlike_args)
-    error("Unknown test options `$(join(optlike_args, " "))` (try `--help` for usage instructions)")
-end
-## the remaining args filter tests
-if isempty(ARGS)
-  # default to running all tests, except:
-  filter!(tests) do test
+function test_filter(test)
     if load_preference(OpenCL, "default_memory_backend") == "svm" &&
        test == "gpuarrays/indexing scalar"
         # GPUArrays' scalar indexing tests assume that indexing is not supported
         return false
     end
-
-    return true
-  end
-else
-  filter!(tests) do test
-    any(arg->startswith(test, arg), ARGS)
-  end
-end
-
-# add workers
-const test_exeflags = Base.julia_cmd()
-filter!(test_exeflags.exec) do c
-    return !(startswith(c, "--depwarn") || startswith(c, "--check-bounds"))
-end
-push!(test_exeflags.exec, "--check-bounds=yes")
-push!(test_exeflags.exec, "--startup-file=no")
-push!(test_exeflags.exec, "--depwarn=yes")
-push!(test_exeflags.exec, "--project=$(Base.active_project())")
-const test_exename = popfirst!(test_exeflags.exec)
-function addworker(X; kwargs...)
-    withenv("JULIA_NUM_THREADS" => 1, "OPENBLAS_NUM_THREADS" => 1) do
-        procs = addprocs(X; exename=test_exename, exeflags=test_exeflags, kwargs...)
-        @everywhere procs include($(joinpath(@__DIR__, "setup.jl")))
-        procs
-    end
-end
-addworker(min(jobs, length(tests)))
-
-# pretty print information about gc and mem usage
-testgroupheader = "Test"
-workerheader = "(Worker)"
-name_align        = maximum([textwidth(testgroupheader) + textwidth(" ") +
-                             textwidth(workerheader); map(x -> textwidth(x) +
-                             3 + ndigits(nworkers()), tests)])
-elapsed_align     = textwidth("Time (s)")
-gc_align      = textwidth("GC (s)")
-percent_align = textwidth("GC %")
-alloc_align   = textwidth("Alloc (MB)")
-rss_align     = textwidth("RSS (MB)")
-printstyled(" "^(name_align + textwidth(testgroupheader) - 3), " | ")
-printstyled("         | ---------------- CPU ---------------- |\n", color=:white)
-printstyled(testgroupheader, color=:white)
-printstyled(lpad(workerheader, name_align - textwidth(testgroupheader) + 1), " | ", color=:white)
-printstyled("Time (s) | GC (s) | GC % | Alloc (MB) | RSS (MB) |\n", color=:white)
-print_lock = stdout isa Base.LibuvStream ? stdout.lock : ReentrantLock()
-if stderr isa Base.LibuvStream
-    stderr.lock = print_lock
 end
-function print_testworker_stats(test, wrkr, resp)
-    @nospecialize resp
-    lock(print_lock)
-    try
-        printstyled(test, color=:white)
-        printstyled(lpad("($wrkr)", name_align - textwidth(test) + 1, " "), " | ", color=:white)
-        time_str = @sprintf("%7.2f",resp[2])
-        printstyled(lpad(time_str, elapsed_align, " "), " | ", color=:white)
 
-        cpu_gc_str = @sprintf("%5.2f", resp[4])
-        printstyled(lpad(cpu_gc_str, gc_align, " "), " | ", color=:white)
-        # since there may be quite a few digits in the percentage,
-        # the left-padding here is less to make sure everything fits
-        cpu_percent_str = @sprintf("%4.1f", 100 * resp[4] / resp[2])
-        printstyled(lpad(cpu_percent_str, percent_align, " "), " | ", color=:white)
-        cpu_alloc_str = @sprintf("%5.2f", resp[3] / 2^20)
-        printstyled(lpad(cpu_alloc_str, alloc_align, " "), " | ", color=:white)
+const init_code = quote
+    using OpenCL, pocl_jll
 
-        cpu_rss_str = @sprintf("%5.2f", resp[6] / 2^20)
-        printstyled(lpad(cpu_rss_str, rss_align, " "), " |\n", color=:white)
-    finally
-        unlock(print_lock)
-    end
-end
-global print_testworker_started = (name, wrkr)->begin
-    if do_verbose
-        lock(print_lock)
-        try
-            printstyled(name, color=:white)
-            printstyled(lpad("($wrkr)", name_align - textwidth(name) + 1, " "), " |",
-                " "^elapsed_align, "started at $(now())\n", color=:white)
-        finally
-            unlock(print_lock)
+    # GPUArrays has a testsuite that isn't part of the main package.
+    # Include it directly.
+    const GPUArraysTestSuite = let
+        mod = @eval module $(gensym())
+            using ..Test
+            import GPUArrays
+            gpuarrays = pathof(GPUArrays)
+            gpuarrays_root = dirname(dirname(gpuarrays))
+            include(joinpath(gpuarrays_root, "test", "testsuite.jl"))
         end
+        mod.TestSuite
     end
-end
-function print_testworker_errored(name, wrkr)
-    lock(print_lock)
-    try
-        printstyled(name, color=:red)
-        printstyled(lpad("($wrkr)", name_align - textwidth(name) + 1, " "), " |",
-            " "^elapsed_align, " failed at $(now())\n", color=:red)
-    finally
-        unlock(print_lock)
-    end
-end
 
-# run tasks
-t0 = now()
-results = []
-all_tasks = Task[]
-all_tests = copy(tests)
-try
-    # Monitor stdin and kill this task on ^C
-    # but don't do this on Windows, because it may deadlock in the kernel
-    t = current_task()
-    running_tests = Dict{String, DateTime}()
-    if !Sys.iswindows() && isa(stdin, Base.TTY)
-        stdin_monitor = @async begin
-            term = REPL.Terminals.TTYTerminal("xterm", stdin, stdout, stderr)
-            try
-                REPL.Terminals.raw!(term, true)
-                while true
-                    c = read(term, Char)
-                    if c == '\x3'
-                        Base.throwto(t, InterruptException())
-                        break
-                    elseif c == '?'
-                        println("Currently running: ")
-                        tests = sort(collect(running_tests), by=x->x[2])
-                        foreach(tests) do (test, date)
-                            println(test, " (running for ", round(now()-date, Minute), ")")
-                        end
-                    end
-                end
-            catch e
-                isa(e, InterruptException) || rethrow()
-            finally
-                REPL.Terminals.raw!(term, false)
+    const device_eltypes = Dict()
+    function GPUArraysTestSuite.supported_eltypes(::Type{<:CLArray})
+        get!(device_eltypes, cl.device()) do
+            types = [Int16, Int32, Int64,
+                    Complex{Int16}, Complex{Int32}, Complex{Int64},
+                    Float32, ComplexF32]
+            if "cl_khr_fp64" in cl.device().extensions
+                push!(types, Float64)
+                push!(types, ComplexF64)
             end
+            if "cl_khr_fp16" in cl.device().extensions
+                push!(types, Float16)
+                push!(types, ComplexF16)
+            end
+            return types
         end
     end
-    @sync begin
-        function recycle_worker(p)
-            rmprocs(p, waitfor=30)
-            return nothing
-        end
 
-        for p in workers()
-            @async begin
-                push!(all_tasks, current_task())
-                while length(tests) > 0
-                    test = popfirst!(tests)
-
-                    # sometimes a worker failed, and we need to spawn a new one
-                    if p === nothing
-                        p = addworker(1)[1]
-                    end
-                    wrkr = p
-
-                    local resp
-
-                    # run the test
-                    running_tests[test] = now()
-                    try
-                        resp = remotecall_fetch(runtests, wrkr,
-                                                test_runners[test], test,
-                                                platform)
-                    catch e
-                        isa(e, InterruptException) && return
-                        resp = Any[e]
-                    end
-                    delete!(running_tests, test)
-                    push!(results, (test, resp))
+    testf(f, xs...; kwargs...) = GPUArraysTestSuite.compare(f, CLArray, xs...; kwargs...)
 
-                    # act on the results
-                    if resp[1] isa Exception
-                        print_testworker_errored(test, wrkr)
-                        do_quickfail && Base.throwto(t, InterruptException())
+    ## auxiliary stuff
 
-                        # the worker encountered some failure, recycle it
-                        # so future tests get a fresh environment
-                        p = recycle_worker(p)
-                    else
-                        print_testworker_stats(test, wrkr, resp)
+    # Run some code on-device
+    macro on_device(ex...)
+        code = ex[end]
+        kwargs = ex[1:end-1]
 
-                        compilations = resp[7]
-                        if Sys.iswindows() && compilations > 100
-                            # XXX: restart to avoid handle exhaustion
-                            #      (see pocl/pocl#1941)
-                            @warn "Restarting worker $wrkr to avoid handle exhaustion"
-                            p = recycle_worker(p)
-                        end
-                    end
+        @gensym kernel
+        esc(quote
+            let
+                function $kernel()
+                    $code
+                    return
                 end
 
-                if p !== nothing
-                    recycle_worker(p)
-                end
-            end
-        end
-    end
-catch e
-    isa(e, InterruptException) || rethrow()
-    # If the test suite was merely interrupted, still print the
-    # summary, which can be useful to diagnose what's going on
-    foreach(task -> begin
-            istaskstarted(task) || return
-            istaskdone(task) && return
-            try
-                schedule(task, InterruptException(); error=true)
-            catch ex
-                @error "InterruptException" exception=ex,catch_backtrace()
+                @opencl $(kwargs...) $kernel()
+                cl.finish(cl.queue())
             end
-        end, all_tasks)
-    for t in all_tasks
-        # NOTE: we can't just wait, but need to discard the exception,
-        #       because the throwto for --quickfail also kills the worker.
-        try
-            wait(t)
-        catch e
-            showerror(stderr, e)
-        end
-    end
-finally
-    if @isdefined stdin_monitor
-        schedule(stdin_monitor, InterruptException(); error=true)
+        end)
     end
 end
-t1 = now()
-elapsed = canonicalize(Dates.CompoundPeriod(t1-t0))
-println("Testing finished in $elapsed")
 
-# construct a testset to render the test results
-o_ts = Test.DefaultTestSet("Overall")
-Test.push_testset(o_ts)
-completed_tests = Set{String}()
-for (testname, (resp,)) in results
-    push!(completed_tests, testname)
-    if isa(resp, Test.DefaultTestSet)
-        Test.push_testset(resp)
-        Test.record(o_ts, resp)
-        Test.pop_testset()
-    elseif isa(resp, Tuple{Int,Int})
-        fake = Test.DefaultTestSet(testname)
-        for i in 1:resp[1]
-            Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, nothing))
-        end
-        for i in 1:resp[2]
-            Test.record(fake, Test.Broken(:test, nothing))
-        end
-        Test.push_testset(fake)
-        Test.record(o_ts, fake)
-        Test.pop_testset()
-    elseif isa(resp, RemoteException) && isa(resp.captured.ex, Test.TestSetException)
-        println("Worker $(resp.pid) failed running test $(testname):")
-        Base.showerror(stdout, resp.captured)
-        println()
-        fake = Test.DefaultTestSet(testname)
-        for i in 1:resp.captured.ex.pass
-            Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, nothing))
-        end
-        for i in 1:resp.captured.ex.broken
-            Test.record(fake, Test.Broken(:test, nothing))
-        end
-        for t in resp.captured.ex.errors_and_fails
-            Test.record(fake, t)
-        end
-        Test.push_testset(fake)
-        Test.record(o_ts, fake)
-        Test.pop_testset()
-    else
-        if !isa(resp, Exception)
-            resp = ErrorException(string("Unknown result type : ", typeof(resp)))
-        end
-        # If this test raised an exception that is not a remote testset exception,
-        # i.e. not a RemoteException capturing a TestSetException that means
-        # the test runner itself had some problem, so we may have hit a segfault,
-        # deserialization errors or something similar.  Record this testset as Errored.
-        fake = Test.DefaultTestSet(testname)
-        Test.record(fake, Test.Error(:nontest_error, testname, nothing, Any[(resp, [])], LineNumberNode(1)))
-        Test.push_testset(fake)
-        Test.record(o_ts, fake)
-        Test.pop_testset()
-    end
-end
-for test in all_tests
-    (test in completed_tests) && continue
-    fake = Test.DefaultTestSet(test)
-    Test.record(fake, Test.Error(:test_interrupted, test, nothing,
-                                    [("skipped", [])], LineNumberNode(1)))
-    Test.push_testset(fake)
-    Test.record(o_ts, fake)
-    Test.pop_testset()
-end
-println()
-Test.print_test_results(o_ts, 1)
-if !o_ts.anynonpass
-    println("    \033[32;1mSUCCESS\033[0m")
-else
-    println("    \033[31;1mFAILURE\033[0m\n")
-    Test.print_test_errors(o_ts)
-    throw(Test.FallbackTestSetException("Test run finished with errors"))
-end
+runtests(OpenCL, ARGS; custom_tests, test_filter, init_code, custom_record_init,
+                       RecordType=OpenCLTestRecord, custom_args=(;platform_filter = platform))
diff --git a/test/setup.jl b/test/setup.jl
index 90337d36..a6635b85 100644
--- a/test/setup.jl
+++ b/test/setup.jl
@@ -1,52 +1,6 @@
-using Distributed, Test
-using OpenCL, pocl_jll
+#=
 using IOCapture
 
-# KernelAbstractions has a testsuite that isn't part of the main package.
-# Include it directly.
-const KATestSuite = let
-    mod = @eval module $(gensym())
-        using ..Test
-        import KernelAbstractions
-        kernelabstractions = pathof(KernelAbstractions)
-        kernelabstractions_root = dirname(dirname(kernelabstractions))
-        include(joinpath(kernelabstractions_root, "test", "testsuite.jl"))
-    end
-    mod.Testsuite
-end
-
-# GPUArrays has a testsuite that isn't part of the main package.
-# Include it directly.
-const GPUArraysTestSuite = let
-    mod = @eval module $(gensym())
-        using ..Test
-        import GPUArrays
-        gpuarrays = pathof(GPUArrays)
-        gpuarrays_root = dirname(dirname(gpuarrays))
-        include(joinpath(gpuarrays_root, "test", "testsuite.jl"))
-    end
-    mod.TestSuite
-end
-testf(f, xs...; kwargs...) = GPUArraysTestSuite.compare(f, CLArray, xs...; kwargs...)
-
-const device_eltypes = Dict()
-function GPUArraysTestSuite.supported_eltypes(::Type{<:CLArray})
-    get!(device_eltypes, cl.device()) do
-        types = [Int16, Int32, Int64,
-                 Complex{Int16}, Complex{Int32}, Complex{Int64},
-                 Float32, ComplexF32]
-        if "cl_khr_fp64" in cl.device().extensions
-            push!(types, Float64)
-            push!(types, ComplexF64)
-        end
-        if "cl_khr_fp16" in cl.device().extensions
-            push!(types, Float16)
-            push!(types, ComplexF16)
-        end
-        return types
-    end
-end
-
 using Random
 
 
@@ -55,8 +9,7 @@ using Random
 const targets = []
 
 function runtests(f, name, platform_filter)
-    old_print_setting = Test.TESTSET_PRINT_ENABLE[]
-    Test.TESTSET_PRINT_ENABLE[] = false
+
 
     if isempty(targets)
         for platform in cl.platforms(),
@@ -79,88 +32,24 @@ function runtests(f, name, platform_filter)
         end
     end
 
-    try
-        # generate a temporary module to execute the tests in
-        mod_name = Symbol("Test", rand(1:100), "Main_", replace(name, '/' => '_'))
-        mod = @eval(Main, module $mod_name end)
-        @eval(mod, using Test, Random, OpenCL)
 
-        let id = myid()
-            wait(@spawnat 1 print_testworker_started(name, id))
-        end
+    # some tests require native execution capabilities
+    requires_il = name in ["atomics", "execution", "intrinsics", "kernelabstractions"] ||
+                    startswith(name, "gpuarrays/") || startswith(name, "device/")
 
-        # some tests require native execution capabilities
-        requires_il = name in ["atomics", "execution", "intrinsics", "kernelabstractions"] ||
-                      startswith(name, "gpuarrays/") || startswith(name, "device/")
 
-        ex = quote
-            GC.gc(true)
-            Random.seed!(1)
-            OpenCL.allowscalar(false)
+        OpenCL.allowscalar(false)
 
-            @timed @testset $"$name" begin
-                @testset "\$(device.name)" for (; platform, device) in $targets
-                    cl.platform!(platform)
-                    cl.device!(device)
+        @timed @testset $"$name" begin
+            @testset "\$(device.name)" for (; platform, device) in $targets
+                cl.platform!(platform)
+                cl.device!(device)
 
-                    if !$requires_il || "cl_khr_il_program" in device.extensions
-                        $f()
-                    end
+                if !$requires_il || "cl_khr_il_program" in device.extensions
+                    $f()
                 end
             end
         end
-        data = Core.eval(mod, ex)
-        #data[1] is the testset
-
-        # process results
-        cpu_rss = Sys.maxrss()
-        compilations = OpenCL.compilations[]
-        if VERSION >= v"1.11.0-DEV.1529"
-            tc = Test.get_test_counts(data[1])
-            passes,fails,error,broken,c_passes,c_fails,c_errors,c_broken =
-                tc.passes, tc.fails, tc.errors, tc.broken, tc.cumulative_passes,
-                tc.cumulative_fails, tc.cumulative_errors, tc.cumulative_broken
-        else
-            passes,fails,errors,broken,c_passes,c_fails,c_errors,c_broken =
-                Test.get_test_counts(data[1])
-        end
-        if data[1].anynonpass == false
-            data = ((passes+c_passes,broken+c_broken),
-                    data[2],
-                    data[3],
-                    data[4],
-                    data[5])
-        end
-        res = vcat(collect(data), cpu_rss, compilations)
-
-        GC.gc(true)
-        res
-    finally
-        Test.TESTSET_PRINT_ENABLE[] = old_print_setting
     end
-end
-
-
-## auxiliary stuff
-
-# Run some code on-device
-macro on_device(ex...)
-    code = ex[end]
-    kwargs = ex[1:end-1]
-
-    @gensym kernel
-    esc(quote
-        let
-            function $kernel()
-                $code
-                return
-            end
-
-            @opencl $(kwargs...) $kernel()
-            cl.finish(cl.queue())
-        end
-    end)
-end
-
 
-nothing # File is loaded via a remotecall to "include". Ensure it returns "nothing".
+ =#
\ No newline at end of file

From 2cb21fc289b658afd718e9a7d37ba3f559ed6127 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Wed, 15 Oct 2025 23:45:50 +0200
Subject: [PATCH 02/23] fix custom_record implementation

---
 test/Project.toml |  2 +-
 test/runtests.jl  | 17 +++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/test/Project.toml b/test/Project.toml
index 75779a72..c86d6de7 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -30,4 +30,4 @@ pocl_jll = "7.0"
 ParallelTestRunner = "1.0.1"
 
 [sources]
-ParallelTestRunner = {url="https://github.com/JuliaTesting/ParallelTestRunner.jl", rev="vc/custom_testrecord"}
+ParallelTestRunner = {url="https://github.com/JuliaTesting/ParallelTestRunner.jl", rev="vc/custom_record"}
diff --git a/test/runtests.jl b/test/runtests.jl
index a57dd9cf..5c10bb6c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -9,6 +9,7 @@ import Test
 do_platform, platform_filter = ParallelTestRunner.extract_flag!(ARGS, "--platform", nothing)
 
 custom_record_init = quote
+    import ParallelTestRunner: Test
     struct OpenCLTestRecord <: ParallelTestRunner.AbstractTestRecord
         # TODO: Would it be better to wrap "ParallelTestRunner.TestRecord "
         value::Any          # AbstractTestSet or TestSetException
@@ -23,8 +24,8 @@ custom_record_init = quote
     function ParallelTestRunner.memory_usage(rec::OpenCLTestRecord)
         return rec.rss
     end
-    function ParallelTestRunner.test_IOContext(::Type{OpenCLTestRecord}, args...)
-        return ParallelTestRunner.test_IOContext(ParallelTestRunner.TestRecord, args...)
+    function ParallelTestRunner.test_IOContext(::Type{OpenCLTestRecord}, stdout::IO, stderr::IO, lock::ReentrantLock, name_align::Int64)
+        return ParallelTestRunner.test_IOContext(ParallelTestRunner.TestRecord, stdout, stderr, lock, name_align)
     end
 
     const targets = []
@@ -72,16 +73,16 @@ custom_record_init = quote
                 mktemp() do path, io
                     stats = redirect_stdio(stdout=io, stderr=io) do
                         @timed try
-                            # @testset $(name) begin
-                                @testset "\$(device.name)" for (; platform, device) in $targets
+                            @testset $(Expr(:$, :name)) begin
+                                @testset "\$(device.name)" for (; platform, device) in $(Expr(:$, :targets))
                                     cl.platform!(platform)
                                     cl.device!(device)
 
-                                    if !$requires_il || "cl_khr_il_program" in device.extensions
-                                        $f
+                                    if !$(Expr(:$, :requires_il)) || "cl_khr_il_program" in device.extensions
+                                        $(Expr(:$, :f))
                                     end
                                 end
-                            # end
+                            end
                         catch err
                             isa(err, Test.TestSetException) || rethrow()
 
@@ -208,4 +209,4 @@ const init_code = quote
 end
 
 runtests(OpenCL, ARGS; custom_tests, test_filter, init_code, custom_record_init,
-                       RecordType=OpenCLTestRecord, custom_args=(;platform_filter = platform))
+                       RecordType=OpenCLTestRecord, custom_args=(;platform_filter))

From dd147311bc56ac1cc74c463a0b045840ddb17ed9 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Wed, 15 Oct 2025 23:46:44 +0200
Subject: [PATCH 03/23] fixup! fix custom_record implementation

---
 test/runtests.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/runtests.jl b/test/runtests.jl
index 5c10bb6c..2845fa76 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -148,6 +148,7 @@ function test_filter(test)
         # GPUArrays' scalar indexing tests assume that indexing is not supported
         return false
     end
+    return true
 end
 
 const init_code = quote

From 00bb2e7cd24c14b899d99789a1bf4181a7232bce Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Wed, 15 Oct 2025 23:49:20 +0200
Subject: [PATCH 04/23] fixup! fixup! fix custom_record implementation

---
 test/runtests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 2845fa76..4f170f59 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -55,7 +55,7 @@ custom_record_init = quote
 
         function inner()
             # generate a temporary module to execute the tests in
-            mod = Core.eval(Main, Expr(:module, true, gensym(name)), Expr(:block))
+            mod = Core.eval(Main, Expr(:module, true, gensym(name), Expr(:block)))
             @eval(mod, import ParallelTestRunner: Test, Random)
             @eval(mod, using .Test, .Random)
 

From a7d68e5ee0504dc725b3a0738c96966b454766fb Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Thu, 16 Oct 2025 03:40:26 +0200
Subject: [PATCH 05/23] adopt to refactor

---
 test/runtests.jl | 93 ++++++++++++++++++------------------------------
 1 file changed, 35 insertions(+), 58 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 4f170f59..ff5e4451 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -31,7 +31,7 @@ custom_record_init = quote
     const targets = []
     using OpenCL, IOCapture
 
-    function ParallelTestRunner.runtest(::Type{OpenCLTestRecord}, f, name, init_code, color, (; platform_filter))
+    function ParallelTestRunner.execute(::Type{OpenCLTestRecord}, mod, f, name, color, (; platform_filter))
         if isempty(targets)
             for platform in cl.platforms(),
                 device in cl.devices(platform)
@@ -53,71 +53,48 @@ custom_record_init = quote
             end
         end
 
-        function inner()
-            # generate a temporary module to execute the tests in
-            mod = Core.eval(Main, Expr(:module, true, gensym(name), Expr(:block)))
-            @eval(mod, import ParallelTestRunner: Test, Random)
-            @eval(mod, using .Test, .Random)
-
-            Core.eval(mod, init_code)
-
-            # some tests require native execution capabilities
-            requires_il = name in ["atomics", "execution", "intrinsics", "kernelabstractions"] ||
-                          startswith(name, "gpuarrays/")
-
-            data = @eval mod begin
-                GC.gc(true)
-                Random.seed!(1)
-                OpenCL.allowscalar(false)
-
-                mktemp() do path, io
-                    stats = redirect_stdio(stdout=io, stderr=io) do
-                        @timed try
-                            @testset $(Expr(:$, :name)) begin
-                                @testset "\$(device.name)" for (; platform, device) in $(Expr(:$, :targets))
-                                    cl.platform!(platform)
-                                    cl.device!(device)
-
-                                    if !$(Expr(:$, :requires_il)) || "cl_khr_il_program" in device.extensions
-                                        $(Expr(:$, :f))
-                                    end
+        # some tests require native execution capabilities
+        requires_il = name in ["atomics", "execution", "intrinsics", "kernelabstractions"] ||
+                      startswith(name, "gpuarrays/")
+
+        data = @eval mod begin
+            GC.gc(true)
+            Random.seed!(1)
+            OpenCL.allowscalar(false)
+
+            mktemp() do path, io
+                stats = redirect_stdio(stdout=io, stderr=io) do
+                    @timed try
+                        @testset $(Expr(:$, :name)) begin
+                            @testset "\$(device.name)" for (; platform, device) in $(Expr(:$, :targets))
+                                cl.platform!(platform)
+                                cl.device!(device)
+
+                                if !$(Expr(:$, :requires_il)) || "cl_khr_il_program" in device.extensions
+                                    $(Expr(:$, :f))
                                 end
                             end
-                        catch err
-                            isa(err, Test.TestSetException) || rethrow()
-
-                            # return the error to package it into a TestRecord
-                            err
                         end
-                    end
-                    close(io)
-                    output = read(path, String)
-                    (; testset=stats.value, output, stats.time, stats.bytes, stats.gctime)
+                    catch err
+                        isa(err, Test.TestSetException) || rethrow()
 
+                        # return the error to package it into a TestRecord
+                        err
+                    end
                 end
-            end
-
-            # process results
-            rss = Sys.maxrss()
-            record = OpenCLTestRecord(data..., rss)
+                close(io)
+                output = read(path, String)
+                (; testset=stats.value, output, stats.time, stats.bytes, stats.gctime)
 
-            GC.gc(true)
-            return record
-        end
-
-        @static if VERSION >= v"1.13.0-DEV.1044"
-            @with Test.TESTSET_PRINT_ENABLE => false begin
-                inner()
-            end
-        else
-            old_print_setting = Test.TESTSET_PRINT_ENABLE[]
-            Test.TESTSET_PRINT_ENABLE[] = false
-            try
-                inner()
-            finally
-                Test.TESTSET_PRINT_ENABLE[] = old_print_setting
             end
         end
+
+        # process results
+        rss = Sys.maxrss()
+        record = OpenCLTestRecord(data..., rss)
+
+        GC.gc(true)
+        return record
     end
 end # quote
 eval(custom_record_init)

From 715b3fe1fb037c5e4039d94521df47cd7f78d8de Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Thu, 16 Oct 2025 16:21:37 +0200
Subject: [PATCH 06/23] Switch to test_transform

---
 test/Project.toml |  2 +-
 test/runtests.jl  | 95 ++++++++++++-----------------------------------
 2 files changed, 25 insertions(+), 72 deletions(-)

diff --git a/test/Project.toml b/test/Project.toml
index c86d6de7..f741cc0e 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -30,4 +30,4 @@ pocl_jll = "7.0"
 ParallelTestRunner = "1.0.1"
 
 [sources]
-ParallelTestRunner = {url="https://github.com/JuliaTesting/ParallelTestRunner.jl", rev="vc/custom_record"}
+ParallelTestRunner = {url="https://github.com/JuliaTesting/ParallelTestRunner.jl", rev="vc/test_transform"}
diff --git a/test/runtests.jl b/test/runtests.jl
index ff5e4451..5974a0cc 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -8,96 +8,45 @@ import Test
 ## --platform selector
 do_platform, platform_filter = ParallelTestRunner.extract_flag!(ARGS, "--platform", nothing)
 
-custom_record_init = quote
-    import ParallelTestRunner: Test
-    struct OpenCLTestRecord <: ParallelTestRunner.AbstractTestRecord
-        # TODO: Would it be better to wrap "ParallelTestRunner.TestRecord "
-        value::Any          # AbstractTestSet or TestSetException
-        output::String      # captured stdout/stderr
-
-        # stats
-        time::Float64
-        bytes::UInt64
-        gctime::Float64
-        rss::UInt64
-    end
-    function ParallelTestRunner.memory_usage(rec::OpenCLTestRecord)
-        return rec.rss
-    end
-    function ParallelTestRunner.test_IOContext(::Type{OpenCLTestRecord}, stdout::IO, stderr::IO, lock::ReentrantLock, name_align::Int64)
-        return ParallelTestRunner.test_IOContext(ParallelTestRunner.TestRecord, stdout, stderr, lock, name_align)
-    end
-
-    const targets = []
-    using OpenCL, IOCapture
-
-    function ParallelTestRunner.execute(::Type{OpenCLTestRecord}, mod, f, name, color, (; platform_filter))
+test_transform = function(test, expr)
+    # targets is a global variable that is defined in init_code
+    return quote
         if isempty(targets)
             for platform in cl.platforms(),
                 device in cl.devices(platform)
-                if platform_filter !== nothing
+                if $(platform_filter) !== nothing
                     # filter on the name or vendor
                     names = lowercase.([platform.name, platform.vendor])
-                    if !any(contains(platform_filter), names)
+                    if !any(contains($(platform_filter)), names)
                         continue
                     end
                 end
                 push!(targets, (; platform, device))
             end
             if isempty(targets)
-                if platform_filter === nothing
+                if $(platform_filter) === nothing
                     throw(ArgumentError("No OpenCL platforms found"))
                 else
-                    throw(ArgumentError("No OpenCL platforms found matching $platform_filter"))
+                    throw(ArgumentError("No OpenCL platforms found matching $($(platform_filter))"))
                 end
             end
         end
 
         # some tests require native execution capabilities
-        requires_il = name in ["atomics", "execution", "intrinsics", "kernelabstractions"] ||
-                      startswith(name, "gpuarrays/")
-
-        data = @eval mod begin
-            GC.gc(true)
-            Random.seed!(1)
-            OpenCL.allowscalar(false)
-
-            mktemp() do path, io
-                stats = redirect_stdio(stdout=io, stderr=io) do
-                    @timed try
-                        @testset $(Expr(:$, :name)) begin
-                            @testset "\$(device.name)" for (; platform, device) in $(Expr(:$, :targets))
-                                cl.platform!(platform)
-                                cl.device!(device)
-
-                                if !$(Expr(:$, :requires_il)) || "cl_khr_il_program" in device.extensions
-                                    $(Expr(:$, :f))
-                                end
-                            end
-                        end
-                    catch err
-                        isa(err, Test.TestSetException) || rethrow()
-
-                        # return the error to package it into a TestRecord
-                        err
-                    end
-                end
-                close(io)
-                output = read(path, String)
-                (; testset=stats.value, output, stats.time, stats.bytes, stats.gctime)
+        requires_il = $(test) in ["atomics", "execution", "intrinsics", "kernelabstractions"] ||
+                      startswith($(test), "gpuarrays/")
 
+        @testset "\$(device.name)" for (; platform, device) in targets
+            cl.platform!(platform)
+            cl.device!(device)
+
+            if !requires_il || "cl_khr_il_program" in device.extensions
+                $(expr)
             end
         end
-
-        # process results
-        rss = Sys.maxrss()
-        record = OpenCLTestRecord(data..., rss)
-
-        GC.gc(true)
-        return record
     end
-end # quote
-eval(custom_record_init)
+end
+
 
 # register custom tests that do not correspond to files in the test directory
 custom_tests = Dict{String, Expr}()
@@ -116,7 +65,8 @@ const GPUArraysTestSuite = let
 end
 
 for name in keys(GPUArraysTestSuite.tests)
-    custom_tests["GPUArraysTestSuite/$name"] = :(GPUArraysTestSuite.tests[$name](CLArray))
+    test = "GPUArraysTestSuite/$name"
+    custom_tests[test] = test_transform(test, :(GPUArraysTestSuite.tests[$name](CLArray)))
 end
 
 function test_filter(test)
@@ -131,6 +81,9 @@ end
 const init_code = quote
     using OpenCL, pocl_jll
 
+    OpenCL.allowscalar(false)
+    const targets = []
+
     # GPUArrays has a testsuite that isn't part of the main package.
     # Include it directly.
     const GPUArraysTestSuite = let
@@ -186,5 +139,5 @@ const init_code = quote
     end
 end
 
-runtests(OpenCL, ARGS; custom_tests, test_filter, init_code, custom_record_init,
-                       RecordType=OpenCLTestRecord, custom_args=(;platform_filter))
+
+runtests(OpenCL, ARGS; custom_tests, test_filter, init_code, test_transform)

From b51193f35df3916ee3d2ea2453a5df4ab2a257dc Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Thu, 16 Oct 2025 16:21:37 +0200
Subject: [PATCH 07/23] Switch to test_transform

---
 test/runtests.jl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 5974a0cc..78375fc7 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -9,6 +9,10 @@ import Test
 do_platform, platform_filter = ParallelTestRunner.extract_flag!(ARGS, "--platform", nothing)
 
 test_transform = function(test, expr)
+    # some tests require native execution capabilities
+    requires_il = test in ["atomics", "execution", "intrinsics", "kernelabstractions", "statistics"] ||
+                  startswith(test, "gpuarrays/")
+
     # targets is a global variable that is defined in init_code
     return quote
         if isempty(targets)
@@ -32,15 +36,11 @@ test_transform = function(test, expr)
             end
         end
 
-        # some tests require native execution capabilities
-        requires_il = $(test) in ["atomics", "execution", "intrinsics", "kernelabstractions"] ||
-                      startswith($(test), "gpuarrays/")
-
         @testset "\$(device.name)" for (; platform, device) in targets
             cl.platform!(platform)
             cl.device!(device)
 
-            if !requires_il || "cl_khr_il_program" in device.extensions
+            if !$(requires_il) || "cl_khr_il_program" in device.extensions
                 $(expr)
             end
         end

From 5b77f9652b1ebc3930593c0a8ecf4e6e26858948 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Thu, 16 Oct 2025 17:00:10 +0200
Subject: [PATCH 08/23] fix name for gpuarrays testsuite

---
 test/runtests.jl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 78375fc7..ff47c467 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -10,7 +10,8 @@ do_platform, platform_filter = ParallelTestRunner.extract_flag!(ARGS, "--platfor
 
 test_transform = function(test, expr)
     # some tests require native execution capabilities
-    requires_il = test in ["atomics", "execution", "intrinsics", "kernelabstractions", "statistics"] ||
+    requires_il = test in ["atomics", "execution", "intrinsics", "kernelabstractions", "statistics",
+                           "linalg", ] ||
                   startswith(test, "gpuarrays/")
 
     # targets is a global variable that is defined in init_code
@@ -65,7 +66,7 @@ const GPUArraysTestSuite = let
 end
 
 for name in keys(GPUArraysTestSuite.tests)
-    test = "GPUArraysTestSuite/$name"
+    test = "gpuarrays/$name"
     custom_tests[test] = test_transform(test, :(GPUArraysTestSuite.tests[$name](CLArray)))
 end
 

From 1d2394e328dbf4e67db7a53eeb560e783cd0a0d6 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Thu, 16 Oct 2025 19:37:20 +0200
Subject: [PATCH 09/23] fix execution test

---
 test/setup.jl | 55 ---------------------------------------------------
 1 file changed, 55 deletions(-)
 delete mode 100644 test/setup.jl

diff --git a/test/setup.jl b/test/setup.jl
deleted file mode 100644
index a6635b85..00000000
--- a/test/setup.jl
+++ /dev/null
@@ -1,55 +0,0 @@
-#=
-using IOCapture
-
-using Random
-
-
-## entry point
-
-const targets = []
-
-function runtests(f, name, platform_filter)
-
-
-    if isempty(targets)
-        for platform in cl.platforms(),
-            device in cl.devices(platform)
-            if platform_filter !== nothing
-                # filter on the name or vendor
-                names = lowercase.([platform.name, platform.vendor])
-                if !any(contains(platform_filter), names)
-                    continue
-                end
-            end
-            push!(targets, (; platform, device))
-        end
-        if isempty(targets)
-            if platform_filter === nothing
-                throw(ArgumentError("No OpenCL platforms found"))
-            else
-                throw(ArgumentError("No OpenCL platforms found matching $platform_filter"))
-            end
-        end
-    end
-
-
-    # some tests require native execution capabilities
-    requires_il = name in ["atomics", "execution", "intrinsics", "kernelabstractions"] ||
-                    startswith(name, "gpuarrays/") || startswith(name, "device/")
-
-
-        OpenCL.allowscalar(false)
-
-        @timed @testset $"$name" begin
-            @testset "\$(device.name)" for (; platform, device) in $targets
-                cl.platform!(platform)
-                cl.device!(device)
-
-                if !$requires_il || "cl_khr_il_program" in device.extensions
-                    $f()
-                end
-            end
-        end
-    end
-
- =#
\ No newline at end of file

From cc2591cc75930b197ab5adbbde48b2ed02df53d3 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Fri, 17 Oct 2025 18:09:10 +0200
Subject: [PATCH 10/23] give JuliaTesting/ParallelTestRunner#59 a whirl

---
 test/Project.toml |  2 +-
 test/runtests.jl  | 26 +++++++++++++-------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/test/Project.toml b/test/Project.toml
index f741cc0e..d5174d18 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -30,4 +30,4 @@ pocl_jll = "7.0"
 ParallelTestRunner = "1.0.1"
 
 [sources]
-ParallelTestRunner = {url="https://github.com/JuliaTesting/ParallelTestRunner.jl", rev="vc/test_transform"}
+ParallelTestRunner = {url="https://github.com/JuliaTesting/ParallelTestRunner.jl", rev="tb/testsuite"}
diff --git a/test/runtests.jl b/test/runtests.jl
index ff47c467..93e2d322 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -48,9 +48,18 @@ test_transform = function(test, expr)
     end
 end
 
+const testsuite = find_tests(pwd()) do path
+    expr = quote
+        include($path)
+    end
+    test_transform(path, expr)
+end
 
-# register custom tests that do not correspond to files in the test directory
-custom_tests = Dict{String, Expr}()
+if load_preference(OpenCL, "default_memory_backend") == "svm"
+    # GPUArrays' scalar indexing tests assume that indexing is not supported
+    delete!(testsuite, "gpuarrays/indexing scalar")
+    return false
+end
 
 # GPUArrays has a testsuite that isn't part of the main package.
 # Include it directly.
@@ -67,16 +76,7 @@ end
 
 for name in keys(GPUArraysTestSuite.tests)
     test = "gpuarrays/$name"
-    custom_tests[test] = test_transform(test, :(GPUArraysTestSuite.tests[$name](CLArray)))
-end
-
-function test_filter(test)
-    if load_preference(OpenCL, "default_memory_backend") == "svm" &&
-       test == "gpuarrays/indexing scalar"
-        # GPUArrays' scalar indexing tests assume that indexing is not supported
-        return false
-    end
-    return true
+    testsuite[test] = test_transform(test, :(GPUArraysTestSuite.tests[$name](CLArray)))
 end
 
 const init_code = quote
@@ -141,4 +141,4 @@ const init_code = quote
 end
 
 
-runtests(OpenCL, ARGS; custom_tests, test_filter, init_code, test_transform)
+runtests(OpenCL, ARGS; testsuite, init_code)

From 622279fe7b7682c9d55a3be4f61713aa3676ff41 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Fri, 17 Oct 2025 19:13:40 +0200
Subject: [PATCH 11/23] Apply suggestion from @vchuravy

---
 test/runtests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 93e2d322..d55acb3d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -37,7 +37,7 @@ test_transform = function(test, expr)
             end
         end
 
-        @testset "\$(device.name)" for (; platform, device) in targets
+        @testset device.name for (; platform, device) in targets
             cl.platform!(platform)
             cl.device!(device)
 

From 9619ca1af974299a2dd5979c99e6b88d0524d4df Mon Sep 17 00:00:00 2001
From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com>
Date: Thu, 21 May 2026 16:38:22 -0300
Subject: [PATCH 12/23] Fix execution test

Co-Authored-By: Valentin Churavy <v.churavy@gmail.com>
---
 test/execution.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/execution.jl b/test/execution.jl
index 2900c04f..1fd69d7a 100644
--- a/test/execution.jl
+++ b/test/execution.jl
@@ -1,4 +1,5 @@
 using SPIRV_LLVM_Translator_jll
+using IOCapture
 
 @testset "@opencl" begin
 

From a15f6b0711e4c72560011348ca055bc2222970b5 Mon Sep 17 00:00:00 2001
From: Tim Besard <tim.besard@gmail.com>
Date: Sat, 18 Oct 2025 10:05:29 +0200
Subject: [PATCH 13/23] Simplify and fix.

---
 test/runtests.jl | 65 +++++++++++++++++++++++-------------------------
 1 file changed, 31 insertions(+), 34 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index d55acb3d..bf23e3bb 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -8,10 +8,35 @@ import Test
 ## --platform selector
 do_platform, platform_filter = ParallelTestRunner.extract_flag!(ARGS, "--platform", nothing)
 
-test_transform = function(test, expr)
+# determine tests to run
+const testsuite = find_tests(pwd())
+## GPUArrays test suite: not part of the main package
+const GPUArraysTestSuite = let
+    mod = @eval module $(gensym())
+        using ..Test
+        import GPUArrays
+        gpuarrays = pathof(GPUArrays)
+        gpuarrays_root = dirname(dirname(gpuarrays))
+        include(joinpath(gpuarrays_root, "test", "testsuite.jl"))
+    end
+    mod.TestSuite
+end
+for name in keys(GPUArraysTestSuite.tests)
+    test = "gpuarrays/$name"
+    testsuite[test] = :(GPUArraysTestSuite.tests[$name](CLArray))
+end
+## filter
+if load_preference(OpenCL, "default_memory_backend") == "svm"
+    # GPUArrays' scalar indexing tests assume that indexing is not supported
+    delete!(testsuite, "gpuarrays/indexing scalar")
+    return false
+end
+
+# wrap tests in device loops
+function generate_test(test, expr)
     # some tests require native execution capabilities
-    requires_il = test in ["atomics", "execution", "intrinsics", "kernelabstractions", "statistics",
-                           "linalg", ] ||
+    requires_il = test in ["atomics", "execution", "intrinsics", "kernelabstractions",
+                           "statistics", "linalg", ] ||
                   startswith(test, "gpuarrays/")
 
     # targets is a global variable that is defined in init_code
@@ -37,7 +62,7 @@ test_transform = function(test, expr)
             end
         end
 
-        @testset device.name for (; platform, device) in targets
+        @testset "$(device.name)" for (; platform, device) in targets
             cl.platform!(platform)
             cl.device!(device)
 
@@ -47,36 +72,8 @@ test_transform = function(test, expr)
         end
     end
 end
-
-const testsuite = find_tests(pwd()) do path
-    expr = quote
-        include($path)
-    end
-    test_transform(path, expr)
-end
-
-if load_preference(OpenCL, "default_memory_backend") == "svm"
-    # GPUArrays' scalar indexing tests assume that indexing is not supported
-    delete!(testsuite, "gpuarrays/indexing scalar")
-    return false
-end
-
-# GPUArrays has a testsuite that isn't part of the main package.
-# Include it directly.
-const GPUArraysTestSuite = let
-    mod = @eval module $(gensym())
-        using ..Test
-        import GPUArrays
-        gpuarrays = pathof(GPUArrays)
-        gpuarrays_root = dirname(dirname(gpuarrays))
-        include(joinpath(gpuarrays_root, "test", "testsuite.jl"))
-    end
-    mod.TestSuite
-end
-
-for name in keys(GPUArraysTestSuite.tests)
-    test = "gpuarrays/$name"
-    testsuite[test] = test_transform(test, :(GPUArraysTestSuite.tests[$name](CLArray)))
+for test in keys(testsuite)
+    testsuite[test] = generate_test(test, testsuite[test])
 end
 
 const init_code = quote

From d707ca0c5ca67b67fee8f906d3614799ab562b94 Mon Sep 17 00:00:00 2001
From: Tim Besard <tim.besard@gmail.com>
Date: Mon, 20 Oct 2025 09:01:30 +0200
Subject: [PATCH 14/23] Support new filtering API.

---
 test/runtests.jl | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index bf23e3bb..09596ceb 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -5,8 +5,8 @@ import Test
 
 @info "System information:\n" * sprint(io->OpenCL.versioninfo(io))
 
-## --platform selector
-do_platform, platform_filter = ParallelTestRunner.extract_flag!(ARGS, "--platform", nothing)
+## custom arguments
+args = parse_args(ARGS; custom=["platform"])
 
 # determine tests to run
 const testsuite = find_tests(pwd())
@@ -26,10 +26,12 @@ for name in keys(GPUArraysTestSuite.tests)
     testsuite[test] = :(GPUArraysTestSuite.tests[$name](CLArray))
 end
 ## filter
-if load_preference(OpenCL, "default_memory_backend") == "svm"
-    # GPUArrays' scalar indexing tests assume that indexing is not supported
-    delete!(testsuite, "gpuarrays/indexing scalar")
-    return false
+if filter_tests!(testsuite, args)
+    if load_preference(OpenCL, "default_memory_backend") == "svm"
+        # GPUArrays' scalar indexing tests assume that indexing is not supported
+        delete!(testsuite, "gpuarrays/indexing scalar")
+        return false
+    end
 end
 
 # wrap tests in device loops
@@ -42,22 +44,23 @@ function generate_test(test, expr)
     # targets is a global variable that is defined in init_code
     return quote
         if isempty(targets)
+            platform_filter = $(args.custom["platform"])
             for platform in cl.platforms(),
                 device in cl.devices(platform)
-                if $(platform_filter) !== nothing
+                if platform_filter !== nothing
                     # filter on the name or vendor
                     names = lowercase.([platform.name, platform.vendor])
-                    if !any(contains($(platform_filter)), names)
+                    if !any(contains(platform_filter.value), names)
                         continue
                     end
                 end
                 push!(targets, (; platform, device))
             end
             if isempty(targets)
-                if $(platform_filter) === nothing
+                if platform_filter !== nothing
                     throw(ArgumentError("No OpenCL platforms found"))
                 else
-                    throw(ArgumentError("No OpenCL platforms found matching $($(platform_filter))"))
+                    throw(ArgumentError("No OpenCL platforms found matching $(platform_filter.value)"))
                 end
             end
         end
@@ -138,4 +141,4 @@ const init_code = quote
 end
 
 
-runtests(OpenCL, ARGS; testsuite, init_code)
+runtests(OpenCL, args; testsuite, init_code)

From 60a92457f2ab875ec79acd6128adb60705fcf217 Mon Sep 17 00:00:00 2001
From: Tim Besard <tim.besard@gmail.com>
Date: Mon, 20 Oct 2025 09:17:07 +0200
Subject: [PATCH 15/23] Limit to PoCL 7.0.

---
 test/Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/Project.toml b/test/Project.toml
index d5174d18..d0afe624 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -26,7 +26,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd"
 
 [compat]
-pocl_jll = "7.0"
+pocl_jll = "~7.0"
 ParallelTestRunner = "1.0.1"
 
 [sources]

From 24a72909356b8e287a78a01d4a0af257e79247f7 Mon Sep 17 00:00:00 2001
From: Tim Besard <tim.besard@gmail.com>
Date: Mon, 20 Oct 2025 10:54:24 +0200
Subject: [PATCH 16/23] Try to work around Windows failure.

---
 test/runtests.jl | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 09596ceb..2e04653d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -140,5 +140,13 @@ const init_code = quote
     end
 end
 
+# avoid handle exhaustion on Windows by running each test in a separate process (pocl/pocl#1941)
+function test_worker(test)
+    if Sys.iswindows()
+        addworker()
+    else
+        nothing
+    end
+end
 
-runtests(OpenCL, args; testsuite, init_code)
+runtests(OpenCL, args; testsuite, init_code, test_worker)

From cd2b5b69b8333cae4fe7cbcfd3d55047b5837adb Mon Sep 17 00:00:00 2001
From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com>
Date: Fri, 31 Oct 2025 15:10:32 -0300
Subject: [PATCH 17/23] Revert "Limit to PoCL 7.0."

This reverts commit 0fe5db1de310cb291fe6557bfdd3e1e3b052dfa1.
---
 test/Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/Project.toml b/test/Project.toml
index d0afe624..d5174d18 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -26,7 +26,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd"
 
 [compat]
-pocl_jll = "~7.0"
+pocl_jll = "7.0"
 ParallelTestRunner = "1.0.1"
 
 [sources]

From f24b6b55258bdba744aa2b5a0275fbfc748e550f Mon Sep 17 00:00:00 2001
From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com>
Date: Fri, 31 Oct 2025 15:41:54 -0300
Subject: [PATCH 18/23] PTR 2

---
 test/Project.toml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/test/Project.toml b/test/Project.toml
index d5174d18..dfd54665 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -27,7 +27,4 @@ pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd"
 
 [compat]
 pocl_jll = "7.0"
-ParallelTestRunner = "1.0.1"
-
-[sources]
-ParallelTestRunner = {url="https://github.com/JuliaTesting/ParallelTestRunner.jl", rev="tb/testsuite"}
+ParallelTestRunner = "2"

From f16aaabcf156ebf4ebd1b06e34527db09c009e20 Mon Sep 17 00:00:00 2001
From: Christian <28689358+christiangnrd@users.noreply.github.com>
Date: Sun, 1 Feb 2026 13:58:26 -0400
Subject: [PATCH 19/23] Rebase omission

---
 test/runtests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 2e04653d..3509d1fb 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -39,7 +39,7 @@ function generate_test(test, expr)
     # some tests require native execution capabilities
     requires_il = test in ["atomics", "execution", "intrinsics", "kernelabstractions",
                            "statistics", "linalg", ] ||
-                  startswith(test, "gpuarrays/")
+                  startswith(test, "gpuarrays/") || startswith(test, "device/")
 
     # targets is a global variable that is defined in init_code
     return quote

From 5432750696bebc454b51d6077ce4d3a9c67a41f4 Mon Sep 17 00:00:00 2001
From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com>
Date: Wed, 4 Feb 2026 14:49:27 -0400
Subject: [PATCH 20/23] `init_worker_code`

---
 test/Project.toml |  2 +-
 test/runtests.jl  | 18 +++++++++++++-----
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/test/Project.toml b/test/Project.toml
index dfd54665..24acaeeb 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -27,4 +27,4 @@ pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd"
 
 [compat]
 pocl_jll = "7.0"
-ParallelTestRunner = "2"
+ParallelTestRunner = "2.2"
diff --git a/test/runtests.jl b/test/runtests.jl
index 3509d1fb..fe3e6e86 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -79,7 +79,7 @@ for test in keys(testsuite)
     testsuite[test] = generate_test(test, testsuite[test])
 end
 
-const init_code = quote
+const init_worker_code = quote
     using OpenCL, pocl_jll
 
     OpenCL.allowscalar(false)
@@ -89,7 +89,7 @@ const init_code = quote
     # Include it directly.
     const GPUArraysTestSuite = let
         mod = @eval module $(gensym())
-            using ..Test
+            using Test
             import GPUArrays
             gpuarrays = pathof(GPUArrays)
             gpuarrays_root = dirname(dirname(gpuarrays))
@@ -140,13 +140,21 @@ const init_code = quote
     end
 end
 
+const init_code = quote
+    using OpenCL, pocl_jll
+
+    # bring used symbols into the temporary module
+    import ..GPUArraysTestSuite, ..testf
+    import ..@on_device, ..targets
+end
+
 # avoid handle exhaustion on Windows by running each test in a separate process (pocl/pocl#1941)
-function test_worker(test)
+function test_worker(_, init_worker_code)
     if Sys.iswindows()
-        addworker()
+        addworker(; init_worker_code)
     else
         nothing
     end
 end
 
-runtests(OpenCL, args; testsuite, init_code, test_worker)
+runtests(OpenCL, args; testsuite, init_code, init_worker_code, test_worker)

From ffe198de6bbad47619c047f2cde4a954adb60819 Mon Sep 17 00:00:00 2001
From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com>
Date: Thu, 5 Mar 2026 17:02:05 -0400
Subject: [PATCH 21/23] macOS 26

---
 .github/workflows/Test.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml
index f24f6c33..8b9ce263 100644
--- a/.github/workflows/Test.yml
+++ b/.github/workflows/Test.yml
@@ -23,7 +23,7 @@ jobs:
       fail-fast: false
       matrix:
         version: ['1.10', '1.12']
-        os: [ubuntu-24.04, ubuntu-24.04-arm, macOS-15-intel, windows-2022]
+        os: [ubuntu-24.04, ubuntu-24.04-arm, macOS-26-intel, windows-2022]
         arch: [x64, arm64]
         pocl: [jll, local]
         memory_backend: [usm, svm, buffer]
@@ -35,11 +35,11 @@ jobs:
             arch: arm64
           - os: ubuntu-24.04-arm
             arch: x64
-          # macOS-15-intel is Intel-only
-          - os: macOS-15-intel
+          # macOS-26-intel is Intel-only
+          - os: macOS-26-intel
             arch: arm64
           # we only test building PoCL on Linux
-          - os: macOS-15-intel
+          - os: macOS-26-intel
             pocl: local
           - os: windows-2022
             pocl: local

From 3f246f1699a71bc026f36c2b15b2bb1f3e6ccfc2 Mon Sep 17 00:00:00 2001
From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com>
Date: Thu, 2 Apr 2026 16:44:34 -0300
Subject: [PATCH 22/23] Run tests with

---
 .github/workflows/Test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml
index 8b9ce263..b7ad5926 100644
--- a/.github/workflows/Test.yml
+++ b/.github/workflows/Test.yml
@@ -137,7 +137,7 @@ jobs:
         uses: julia-actions/julia-runtest@v1
         if: runner.os != 'Windows'
         with:
-          test_args: '--quickfail --platform=pocl'
+          test_args: '--quickfail --verbose --platform=pocl'
 
       - name: Setup BusyBox
         if: runner.os == 'Windows'
@@ -149,7 +149,7 @@ jobs:
         run: |
           using Pkg
           Pkg.activate(".")
-          Pkg.test(; test_args=`--quickfail --platform=pocl`)
+          Pkg.test(; test_args=`--quickfail --verbose --platform=pocl`)
 
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v6

From e64a9df82e6d8646a7e7bf827216dcc62939735e Mon Sep 17 00:00:00 2001
From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com>
Date: Thu, 9 Apr 2026 13:10:17 -0300
Subject: [PATCH 23/23] Lower max RSS on macOS

Also limit to max 2 jobs
---
 .github/workflows/Test.yml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml
index b7ad5926..a1dc3262 100644
--- a/.github/workflows/Test.yml
+++ b/.github/workflows/Test.yml
@@ -125,6 +125,16 @@ jobs:
             echo '[pocl_jll]' >> test/LocalPreferences.toml
             echo 'libpocl_path="${{ github.workspace }}/target/lib/libpocl.so"' >> test/LocalPreferences.toml
 
+      - name: "Set test arguments and other environment variables"
+        shell: bash
+        run: |
+          if [[ ${{ runner.os }} == "macOS" ]]; then
+            JULIA_TEST_MAXRSS_MB=1800
+            echo "JULIA_TEST_MAXRSS_MB=${JULIA_TEST_MAXRSS_MB}" | tee -a "${GITHUB_ENV}"
+            JULIA_CPU_THREADS=2
+            echo "JULIA_CPU_THREADS=${JULIA_CPU_THREADS}" | tee -a "${GITHUB_ENV}"
+          fi
+
       - name: Setup OpenCL.jl
         run: |
           echo '[OpenCL]' >> test/LocalPreferences.toml