From e309c5fc92ee390306d0adb437b4709a2429a8ae Mon Sep 17 00:00:00 2001 From: Alex Reff <79004360+reffdev@users.noreply.github.com> Date: Wed, 13 May 2026 11:32:13 -0500 Subject: [PATCH] fix: replace host-side rsqrtf with 1/sqrtf for HIP compatibility rsqrtf is device-only in HIP, causing build failure on ROCm. The two affected call sites are host code preparing cublas alpha. --- ds4_cuda.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ds4_cuda.cu b/ds4_cuda.cu index 8b6241c..f3a0f1d 100644 --- a/ds4_cuda.cu +++ b/ds4_cuda.cu @@ -6168,7 +6168,7 @@ extern "C" int ds4_gpu_attention_prefill_raw_heads_tensor(ds4_gpu_tensor *heads, if (!tmp) return 0; float *scores = tmp; float *out_tmp = (float *)((char *)tmp + out_offset); - const float alpha = rsqrtf((float)head_dim); + const float alpha = 1.0f/sqrtf((float)head_dim); const float beta = 0.0f; cublasStatus_t st = cublasSgemmStridedBatched(g_cublas, CUBLAS_OP_T, @@ -6538,7 +6538,7 @@ static int attention_prefill_mixed_launch( n_comp, head_dim); if (!cuda_ok(cudaGetLastError(), "attention mixed kv pack launch")) return 0; - const float alpha = rsqrtf((float)head_dim); + const float alpha = 1.0f/sqrtf((float)head_dim); const float beta = 0.0f; cublasStatus_t st = cublasSgemmStridedBatched(g_cublas, CUBLAS_OP_T,