From 23d2813ed4432a96fcff15a29cc2e2c9530908ae Mon Sep 17 00:00:00 2001 From: Ludovic Raess Date: Tue, 26 May 2026 18:12:52 +0200 Subject: [PATCH 1/2] Fixup --- src/compiler/codegen.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/codegen.jl b/src/compiler/codegen.jl index fce8fca44..f5a8653d2 100644 --- a/src/compiler/codegen.jl +++ b/src/compiler/codegen.jl @@ -200,7 +200,7 @@ function hipcompile(@nospecialize(job::CompilerJob)) GPUCompiler.compile(:obj, job) end - global_hostcalls = pop!(_global_hostcalls, hash(job)) + global_hostcalls = pop!(_global_hostcalls, hash(job), Symbol[]) # Late global hostcalls detection. append!(global_hostcalls, find_global_hostcalls(meta.ir)) From 2786537830bd13fa05ab51df704a3cbbaefe9ab4 Mon Sep 17 00:00:00 2001 From: Ludovic Raess Date: Tue, 26 May 2026 18:21:05 +0200 Subject: [PATCH 2/2] Try improve --- src/compiler/codegen.jl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/compiler/codegen.jl b/src/compiler/codegen.jl index f5a8653d2..62b5e7da8 100644 --- a/src/compiler/codegen.jl +++ b/src/compiler/codegen.jl @@ -16,8 +16,6 @@ const _hip_compiler_cache = Dict{HIP.HIPDevice, Dict{Any, HIP.HIPFunction}}() # hash(fun, hash(f, hash(tt))) => HIPKernel const _kernel_instances = Dict{UInt, Runtime.HIPKernel}() -# UInt (hash(job)) => Vector{Symbol} (global hostcall names) -const _global_hostcalls = Dict{UInt, Vector{Symbol}}() function compiler_cache(dev::HIP.HIPDevice) get!(() -> Dict{UInt, Any}(), _hip_compiler_cache, dev) @@ -34,7 +32,10 @@ function GPUCompiler.link_libraries!(@nospecialize(job::HIPCompilerJob), mod::LL Tuple{CompilerJob{GCNCompilerTarget},typeof(mod)}, job, mod) # Detect global hostcalls here, before optimizations & cleanup occur. - _global_hostcalls[hash(job)] = find_global_hostcalls(mod) + # Accumulate into task-local storage so hipcompile can retrieve them + # on the same task, without any global dict or hash-collision race. + tls_hostcalls = get!(task_local_storage(), :amdgpu_early_hostcalls, Symbol[]) + append!(tls_hostcalls, find_global_hostcalls(mod)) link_device_libs!( job.config.target, mod; @@ -200,7 +201,9 @@ function hipcompile(@nospecialize(job::CompilerJob)) GPUCompiler.compile(:obj, job) end - global_hostcalls = pop!(_global_hostcalls, hash(job), Symbol[]) + # Collect early-detected hostcalls written by link_libraries! on this task. + # Falls back gracefully to empty if link_libraries! was not called. + global_hostcalls = pop!(task_local_storage(), :amdgpu_early_hostcalls, Symbol[]) # Late global hostcalls detection. append!(global_hostcalls, find_global_hostcalls(meta.ir))