diff --git a/ggml/src/ggml-cuda/allreduce.cu b/ggml/src/ggml-cuda/allreduce.cu index 434689abd95..d56129a227e 100644 --- a/ggml/src/ggml-cuda/allreduce.cu +++ b/ggml/src/ggml-cuda/allreduce.cu @@ -184,13 +184,15 @@ static __global__ void ggml_cuda_ar_kernel( #pragma unroll for (int k = 0; k < ELEMS_PER_VEC; ++k) { const T_wire d_low = ggml_cuda_cast(sendbuf[off + k]); - recvbuf[off + k] = ggml_cuda_cast(d_low) + ggml_cuda_cast(wire[k]); + recvbuf[off + k] = ggml_cuda_cast( + ggml_cuda_cast(d_low) + ggml_cuda_cast(wire[k])); } } if (bid == 0 && tid < count - tail) { const T_wire d_low = ggml_cuda_cast(sendbuf[tail + tid]); - recvbuf[tail + tid] = - ggml_cuda_cast(d_low) + ggml_cuda_cast(host_other[tail + tid]); + recvbuf[tail + tid] = ggml_cuda_cast( + ggml_cuda_cast(d_low) + + ggml_cuda_cast(host_other[tail + tid])); } } } @@ -210,7 +212,8 @@ static __global__ void ggml_cuda_ar_add_kernel( const int nt = gridDim.x * blockDim.x; for (int i = tid; i < count; i += nt) { const T_src d_low = ggml_cuda_cast(dst[i]); - dst[i] = ggml_cuda_cast(d_low) + ggml_cuda_cast(src[i]); + dst[i] = ggml_cuda_cast( + ggml_cuda_cast(d_low) + ggml_cuda_cast(src[i])); } } diff --git a/ggml/src/ggml-opencl/CMakeLists.txt b/ggml/src/ggml-opencl/CMakeLists.txt index 0b39c011371..c6aba608736 100644 --- a/ggml/src/ggml-opencl/CMakeLists.txt +++ b/ggml/src/ggml-opencl/CMakeLists.txt @@ -106,6 +106,10 @@ set(GGML_OPENCL_KERNELS gemv_moe_q4_0_f32_ns gemm_moe_q4_1_f32_ns gemv_moe_q4_1_f32_ns + gemm_moe_q5_0_f32_ns + gemv_moe_q5_0_f32_ns + gemm_moe_q5_1_f32_ns + gemv_moe_q5_1_f32_ns gemm_moe_mxfp4_f32 gemv_moe_mxfp4_f32 gemm_moe_mxfp4_f32_ns diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp index 61bdc62cd10..0e511592d53 100644 --- a/ggml/src/ggml-opencl/ggml-opencl.cpp +++ b/ggml/src/ggml-opencl/ggml-opencl.cpp @@ -556,6 +556,8 @@ struct ggml_backend_opencl_context { cl_kernel kernel_convert_block_q4_0_trans4_ns, kernel_restore_block_q4_0_trans4_ns; cl_kernel kernel_convert_block_q4_1, kernel_restore_block_q4_1; cl_kernel kernel_convert_block_q4_1_trans4_ns, kernel_restore_block_q4_1_trans4_ns; + cl_kernel kernel_convert_block_q5_0_trans4_ns, kernel_restore_block_q5_0_trans4_ns; + cl_kernel kernel_convert_block_q5_1_trans4_ns, kernel_restore_block_q5_1_trans4_ns; cl_kernel kernel_convert_block_mxfp4, kernel_convert_block_mxfp4_trans, kernel_restore_block_mxfp4, kernel_restore_block_mxfp4_trans; cl_kernel kernel_convert_block_mxfp4_trans4_ns, kernel_restore_block_mxfp4_trans4_ns; cl_kernel kernel_convert_block_q8_0, kernel_restore_block_q8_0, kernel_restore_block_q8_0_trans; @@ -615,6 +617,8 @@ struct ggml_backend_opencl_context { cl_kernel kernel_timestep_embedding; cl_kernel kernel_gemv_moe_q4_0_f32_ns, kernel_gemm_moe_q4_0_f32_ns; cl_kernel kernel_gemv_moe_q4_1_f32_ns, kernel_gemm_moe_q4_1_f32_ns; + cl_kernel kernel_gemv_moe_q5_0_f32_ns, kernel_gemm_moe_q5_0_f32_ns; + cl_kernel kernel_gemv_moe_q5_1_f32_ns, kernel_gemm_moe_q5_1_f32_ns; cl_kernel kernel_gemv_moe_mxfp4_f32, kernel_gemm_moe_mxfp4_f32; cl_kernel kernel_gemv_moe_mxfp4_f32_ns, kernel_gemm_moe_mxfp4_f32_ns; cl_kernel kernel_moe_reorder_b; @@ -973,6 +977,10 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve CL_CHECK((backend_ctx->kernel_restore_block_q4_1 = clCreateKernel(backend_ctx->program_cvt, "kernel_restore_block_q4_1", &err), err)); CL_CHECK((backend_ctx->kernel_convert_block_q4_1_trans4_ns = clCreateKernel(backend_ctx->program_cvt, "kernel_convert_block_q4_1_trans4_ns", &err), err)); CL_CHECK((backend_ctx->kernel_restore_block_q4_1_trans4_ns = clCreateKernel(backend_ctx->program_cvt, "kernel_restore_block_q4_1_trans4_ns", &err), err)); + CL_CHECK((backend_ctx->kernel_convert_block_q5_0_trans4_ns = clCreateKernel(backend_ctx->program_cvt, "kernel_convert_block_q5_0_trans4_ns", &err), err)); + CL_CHECK((backend_ctx->kernel_restore_block_q5_0_trans4_ns = clCreateKernel(backend_ctx->program_cvt, "kernel_restore_block_q5_0_trans4_ns", &err), err)); + CL_CHECK((backend_ctx->kernel_convert_block_q5_1_trans4_ns = clCreateKernel(backend_ctx->program_cvt, "kernel_convert_block_q5_1_trans4_ns", &err), err)); + CL_CHECK((backend_ctx->kernel_restore_block_q5_1_trans4_ns = clCreateKernel(backend_ctx->program_cvt, "kernel_restore_block_q5_1_trans4_ns", &err), err)); CL_CHECK((backend_ctx->kernel_convert_block_mxfp4 = clCreateKernel(backend_ctx->program_cvt, "kernel_convert_block_mxfp4", &err), err)); CL_CHECK((backend_ctx->kernel_convert_block_mxfp4_trans = clCreateKernel(backend_ctx->program_cvt, "kernel_convert_block_mxfp4_trans", &err), err)); CL_CHECK((backend_ctx->kernel_convert_block_mxfp4_trans4_ns = clCreateKernel(backend_ctx->program_cvt, "kernel_convert_block_mxfp4_trans4_ns", &err), err)); @@ -2995,6 +3003,74 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve GGML_LOG_CONT("."); } + // gemv_moe_q5_0_f32_ns + { +#ifdef GGML_OPENCL_EMBED_KERNELS + const std::string kernel_src { + #include "gemv_moe_q5_0_f32_ns.cl.h" + }; +#else + const std::string kernel_src = read_file("gemv_moe_q5_0_f32_ns.cl"); +#endif + cl_program prog = + build_program_from_source(backend_ctx->context, backend_ctx->device, kernel_src.c_str(), CL_moe_compile_opts); + + CL_CHECK((backend_ctx->kernel_gemv_moe_q5_0_f32_ns = clCreateKernel(prog, "kernel_gemv_moe_q5_0_f32_ns", &err), err)); + CL_CHECK(clReleaseProgram(prog)); + GGML_LOG_CONT("."); + } + + // gemm_moe_q5_0_f32_ns + { +#ifdef GGML_OPENCL_EMBED_KERNELS + const std::string kernel_src { + #include "gemm_moe_q5_0_f32_ns.cl.h" + }; +#else + const std::string kernel_src = read_file("gemm_moe_q5_0_f32_ns.cl"); +#endif + cl_program prog = + build_program_from_source(backend_ctx->context, backend_ctx->device, kernel_src.c_str(), CL_moe_compile_opts); + + CL_CHECK((backend_ctx->kernel_gemm_moe_q5_0_f32_ns = clCreateKernel(prog, "kernel_gemm_moe_q5_0_f32_ns", &err), err)); + CL_CHECK(clReleaseProgram(prog)); + GGML_LOG_CONT("."); + } + + // gemv_moe_q5_1_f32_ns + { +#ifdef GGML_OPENCL_EMBED_KERNELS + const std::string kernel_src { + #include "gemv_moe_q5_1_f32_ns.cl.h" + }; +#else + const std::string kernel_src = read_file("gemv_moe_q5_1_f32_ns.cl"); +#endif + cl_program prog = + build_program_from_source(backend_ctx->context, backend_ctx->device, kernel_src.c_str(), CL_moe_compile_opts); + + CL_CHECK((backend_ctx->kernel_gemv_moe_q5_1_f32_ns = clCreateKernel(prog, "kernel_gemv_moe_q5_1_f32_ns", &err), err)); + CL_CHECK(clReleaseProgram(prog)); + GGML_LOG_CONT("."); + } + + // gemm_moe_q5_1_f32_ns + { +#ifdef GGML_OPENCL_EMBED_KERNELS + const std::string kernel_src { + #include "gemm_moe_q5_1_f32_ns.cl.h" + }; +#else + const std::string kernel_src = read_file("gemm_moe_q5_1_f32_ns.cl"); +#endif + cl_program prog = + build_program_from_source(backend_ctx->context, backend_ctx->device, kernel_src.c_str(), CL_moe_compile_opts); + + CL_CHECK((backend_ctx->kernel_gemm_moe_q5_1_f32_ns = clCreateKernel(prog, "kernel_gemm_moe_q5_1_f32_ns", &err), err)); + CL_CHECK(clReleaseProgram(prog)); + GGML_LOG_CONT("."); + } + // gemv_moe_mxfp4_f32_ns { #ifdef GGML_OPENCL_EMBED_KERNELS @@ -3852,6 +3928,122 @@ struct ggml_tensor_extra_cl_q4_1 { } }; +struct ggml_tensor_extra_cl_q5_0 { + // Quantized values. + cl_mem qs = nullptr; + // Quantized values in image1d_buffer_t. + cl_mem qs_img = nullptr; + // 5-th bit values. + cl_mem qh = nullptr; + // 5-th bit values in image1d_buffer_t. + cl_mem qh_img = nullptr; + // Scales. + cl_mem d = nullptr; + // Scales in image1d_buffer_t. + cl_mem d_img = nullptr; + // Size of quantized values. + size_t size_qs = 0; + // Size of 5-th bit values. + size_t size_qh = 0; + // Size of scales. + size_t size_d = 0; + + ~ggml_tensor_extra_cl_q5_0() { + reset(); + } + + void reset() { + if (qs != nullptr) { + CL_CHECK(clReleaseMemObject(qs)); + qs = nullptr; + } + if (qh != nullptr) { + CL_CHECK(clReleaseMemObject(qh)); + qh = nullptr; + } + if (d != nullptr) { + CL_CHECK(clReleaseMemObject(d)); + d = nullptr; + } + if (qs_img != nullptr) { + CL_CHECK(clReleaseMemObject(qs_img)); + qs_img = nullptr; + } + + qh_img = nullptr; + d_img = nullptr; + size_qs = 0; + size_qh = 0; + size_d = 0; + } +}; + +struct ggml_tensor_extra_cl_q5_1 { + // Quantized values. + cl_mem qs = nullptr; + // Quantized values in image1d_buffer_t. + cl_mem qs_img = nullptr; + // 5-th bit values. + cl_mem qh = nullptr; + // 5-th bit values in image1d_buffer_t. + cl_mem qh_img = nullptr; + // Scales. + cl_mem d = nullptr; + // Scales in image1d_buffer_t. + cl_mem d_img = nullptr; + // Min + cl_mem m = nullptr; + // Min in image1d_buffer_t. + cl_mem m_img = nullptr; + // Size of quantized values. + size_t size_qs = 0; + // Size of 5-th bit values. + size_t size_qh = 0; + // Size of scales. + size_t size_d = 0; + // Size of min values. + size_t size_m = 0; + + ~ggml_tensor_extra_cl_q5_1() { + reset(); + } + + void reset() { + // q and d are subbuffers into the bigger buffer allocated in ggml_backend_buffer. + // They must be properly released so that the original buffer can be + // properly released to avoid memory leak. + if (qs != nullptr) { + CL_CHECK(clReleaseMemObject(qs)); + qs = nullptr; + } + if (qh != nullptr) { + CL_CHECK(clReleaseMemObject(qh)); + qh = nullptr; + } + if (d != nullptr) { + CL_CHECK(clReleaseMemObject(d)); + d = nullptr; + } + if (m != nullptr) { + CL_CHECK(clReleaseMemObject(m)); + m = nullptr; + } + if (qs_img != nullptr) { + CL_CHECK(clReleaseMemObject(qs_img)); + qs_img = nullptr; + } + // qh_img, d_img, and m_img are not currently allocated separately. + // TODO: initialize them for non SMALL_PATH path, or remove them. + qh_img = nullptr; + d_img = nullptr; + m_img = nullptr; + size_qs = 0; + size_qh = 0; + size_d = 0; + size_m = 0; + } +}; + struct ggml_tensor_extra_cl_mxfp4 { // Quantized values. cl_mem q = nullptr; @@ -4506,7 +4698,9 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te } // q4_0, q8_0 and mxfp4 have general MUL_MAT_ID support, // the quantizations here currently do not - they are only supported by Adreno with certain shapes - if (op->src[0]->type == GGML_TYPE_Q4_1) { + if (op->src[0]->type == GGML_TYPE_Q4_1 || + op->src[0]->type == GGML_TYPE_Q5_0 || + op->src[0]->type == GGML_TYPE_Q5_1) { #ifdef GGML_OPENCL_USE_ADRENO_KERNELS if (op->src[1]->type == GGML_TYPE_F32) { return use_adreno_moe_kernels(backend_ctx, op->src[0]) @@ -4692,6 +4886,18 @@ struct ggml_backend_opencl_buffer_context { for (ggml_tensor_extra_cl_q4_1 * e : temp_tensor_extras_q4_1_in_use) { delete e; } + for (ggml_tensor_extra_cl_q5_0 * e : temp_tensor_extras_q5_0) { + delete e; + } + for (ggml_tensor_extra_cl_q5_0 * e : temp_tensor_extras_q5_0_in_use) { + delete e; + } + for (ggml_tensor_extra_cl_q5_1 * e : temp_tensor_extras_q5_1) { + delete e; + } + for (ggml_tensor_extra_cl_q5_1 * e : temp_tensor_extras_q5_1_in_use) { + delete e; + } for (ggml_tensor_extra_cl_mxfp4 * e : temp_tensor_extras_mxfp4) { delete e; } @@ -4775,6 +4981,36 @@ struct ggml_backend_opencl_buffer_context { return extra; } + ggml_tensor_extra_cl_q5_0 * ggml_opencl_alloc_temp_tensor_extra_q5_0() { + ggml_tensor_extra_cl_q5_0 * extra; + if (temp_tensor_extras_q5_0.empty()) { + extra = new ggml_tensor_extra_cl_q5_0(); + } else { + extra = temp_tensor_extras_q5_0.back(); + temp_tensor_extras_q5_0.pop_back(); + } + + temp_tensor_extras_q5_0_in_use.push_back(extra); + + extra->reset(); + return extra; + } + + ggml_tensor_extra_cl_q5_1 * ggml_opencl_alloc_temp_tensor_extra_q5_1() { + ggml_tensor_extra_cl_q5_1 * extra; + if (temp_tensor_extras_q5_1.empty()) { + extra = new ggml_tensor_extra_cl_q5_1(); + } else { + extra = temp_tensor_extras_q5_1.back(); + temp_tensor_extras_q5_1.pop_back(); + } + + temp_tensor_extras_q5_1_in_use.push_back(extra); + + extra->reset(); + return extra; + } + ggml_tensor_extra_cl_mxfp4 * ggml_opencl_alloc_temp_tensor_extra_mxfp4() { ggml_tensor_extra_cl_mxfp4 * extra; if (temp_tensor_extras_mxfp4.empty()) { @@ -4881,6 +5117,16 @@ struct ggml_backend_opencl_buffer_context { } temp_tensor_extras_q4_1_in_use.clear(); + for (ggml_tensor_extra_cl_q5_0 * e : temp_tensor_extras_q5_0_in_use) { + temp_tensor_extras_q5_0.push_back(e); + } + temp_tensor_extras_q5_0_in_use.clear(); + + for (ggml_tensor_extra_cl_q5_1 * e : temp_tensor_extras_q5_1_in_use) { + temp_tensor_extras_q5_1.push_back(e); + } + temp_tensor_extras_q5_1_in_use.clear(); + for (ggml_tensor_extra_cl_mxfp4 * e : temp_tensor_extras_mxfp4_in_use) { temp_tensor_extras_mxfp4.push_back(e); } @@ -4923,6 +5169,10 @@ struct ggml_backend_opencl_buffer_context { std::vector temp_tensor_extras_q4_0_in_use; std::vector temp_tensor_extras_q4_1; std::vector temp_tensor_extras_q4_1_in_use; + std::vector temp_tensor_extras_q5_0; + std::vector temp_tensor_extras_q5_0_in_use; + std::vector temp_tensor_extras_q5_1; + std::vector temp_tensor_extras_q5_1_in_use; std::vector temp_tensor_extras_mxfp4; std::vector temp_tensor_extras_mxfp4_in_use; std::vector temp_tensor_extras_q8_0; @@ -5286,17 +5536,18 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer, #endif // GGML_OPENCL_USE_ADRENO_KERNELS return; } - if (tensor->type == GGML_TYPE_MXFP4) { + if (tensor->type == GGML_TYPE_Q5_0) { ggml_tensor_extra_cl * extra_orig = (ggml_tensor_extra_cl *)tensor->extra; GGML_ASSERT(extra_orig && "Tesnors in OpenCL backend should have been allocated and initialized"); // Allocate the new extra and create aliases from the original. ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context; - ggml_tensor_extra_cl_mxfp4 * extra = ctx->ggml_opencl_alloc_temp_tensor_extra_mxfp4(); + ggml_tensor_extra_cl_q5_0 * extra = ctx->ggml_opencl_alloc_temp_tensor_extra_q5_0(); - size_t size_e = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*sizeof(char); - size_t size_q = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*ggml_blck_size(tensor->type)/2; - GGML_ASSERT(size_e + size_q == ggml_nbytes(tensor) && "Incorrect tensor size"); + size_t size_d = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*sizeof(ggml_fp16_t); + size_t size_qs = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*ggml_blck_size(tensor->type)/2; + size_t size_qh = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*sizeof(int32_t); + GGML_ASSERT(size_d + size_qs + size_qh == ggml_nbytes(tensor) && "Incorrect tensor size"); cl_int err; cl_mem data_device = clCreateBuffer(context, CL_MEM_READ_WRITE, @@ -5306,40 +5557,48 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer, queue, data_device, CL_TRUE, 0, ggml_nbytes(tensor), data, 0, NULL, NULL)); - // The original tensor memory is divided into scales and quants, i.e., - // we first store scales, then quants. cl_buffer_region region; // Create subbuffer for scales. region.origin = align_to(extra_orig->offset + tensor->view_offs + offset, backend_ctx->alignment); - region.size = size_e; - extra->e = clCreateSubBuffer( + region.size = size_d; + extra->d = clCreateSubBuffer( extra_orig->data_device, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err); CL_CHECK(err); auto previous_origin = region.origin; - // Create subbuffer for quants. - region.origin = align_to(previous_origin + size_e, backend_ctx->alignment); - region.size = size_q; - extra->q = clCreateSubBuffer( + // Create subbuffer for qh. + region.origin = align_to(previous_origin + size_d, backend_ctx->alignment); + region.size = size_qh; + extra->qh = clCreateSubBuffer( + extra_orig->data_device, CL_MEM_READ_WRITE, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err); + CL_CHECK(err); + previous_origin = region.origin; + + // Create subbuffer for qs. + region.origin = align_to(previous_origin + size_qh, backend_ctx->alignment); + region.size = size_qs; + extra->qs = clCreateSubBuffer( extra_orig->data_device, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err); CL_CHECK(err); #ifdef GGML_OPENCL_USE_ADRENO_KERNELS - // Adreno moe mxfp4 kernel needs special transpose and unshuffling + // Adreno moe q5_0 kernel needs special transpose and unshuffling if (use_adreno_moe_kernels(backend_ctx, tensor)) { - cl_kernel kernel = backend_ctx->kernel_convert_block_mxfp4_trans4_ns; + cl_kernel kernel = backend_ctx->kernel_convert_block_q5_0_trans4_ns; int ne00 = tensor->ne[0]; int ne01 = tensor->ne[1]; int ne02 = tensor->ne[2]; CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &data_device)); - CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra->q)); - CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra->e)); - CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), &ne00)); - CL_CHECK(clSetKernelArg(kernel, 4, sizeof(int), &ne01)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra->qs)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra->qh)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), &extra->d)); + CL_CHECK(clSetKernelArg(kernel, 4, sizeof(int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, 5, sizeof(int), &ne01)); size_t global_work_size[3] = {static_cast(((ne01 + 63) / 64) * 64), static_cast(ne00 / 32), static_cast(ne02)}; size_t local_work_size[3] = {64, 2, 1}; @@ -5348,61 +5607,36 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer, CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); CL_CHECK(clWaitForEvents(1, &evt)); CL_CHECK(clReleaseMemObject(data_device)); - tensor->extra = extra; // Create image for Q - cl_image_format img_format_q = {CL_R, CL_UNSIGNED_INT32}; - cl_image_desc img_desc_q = { + cl_image_format img_format_qs = {CL_R, CL_UNSIGNED_INT32}; + cl_image_desc img_desc_qs = { CL_MEM_OBJECT_IMAGE1D_BUFFER, static_cast(ggml_nelements(tensor) / 8), 0, 0, 0, 0, 0, 0, 0, - { extra->q } + { extra->qs } }; - extra->q_img = clCreateImage(context, CL_MEM_READ_ONLY, &img_format_q, &img_desc_q, NULL, &err); + extra->qs_img = clCreateImage(context, CL_MEM_READ_ONLY, &img_format_qs, &img_desc_qs, NULL, &err); tensor->extra = extra; return; } - #endif // GGML_OPENCL_USE_ADRENO_KERNELS - cl_kernel kernel = backend_ctx->kernel_convert_block_mxfp4; - - CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &data_device)); - CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra->q)); - CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra->e)); - - size_t global_work_size[3] = {(size_t)ggml_nelements(tensor)/ggml_blck_size(tensor->type), 1, 1}; - size_t local_work_size[3] = {64, 1, 1}; - - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - CL_CHECK(clWaitForEvents(1, &evt)); - CL_CHECK(clReleaseMemObject(data_device)); - - // Create image for Q - cl_image_format img_format_q = {CL_RG, CL_UNSIGNED_INT32}; - cl_image_desc img_desc_q = { - CL_MEM_OBJECT_IMAGE1D_BUFFER, - static_cast(ggml_nelements(tensor)/32*2), - 0, 0, 0, 0, 0, 0, 0, - { extra->q } - }; - extra->q_img = clCreateImage(context, CL_MEM_READ_ONLY, &img_format_q, &img_desc_q, NULL, &err); - tensor->extra = extra; - return; } - if (tensor->type == GGML_TYPE_Q8_0) { + if (tensor->type == GGML_TYPE_Q5_1) { ggml_tensor_extra_cl * extra_orig = (ggml_tensor_extra_cl *)tensor->extra; GGML_ASSERT(extra_orig && "Tesnors in OpenCL backend should have been allocated and initialized"); // Allocate the new extra and create aliases from the original. ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context; - ggml_tensor_extra_cl_q8_0 * extra = ctx->ggml_opencl_alloc_temp_tensor_extra_q8_0(); + ggml_tensor_extra_cl_q5_1 * extra = ctx->ggml_opencl_alloc_temp_tensor_extra_q5_1(); size_t size_d = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*sizeof(ggml_fp16_t); - size_t size_q = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*(ggml_blck_size(tensor->type)*sizeof(char)); - GGML_ASSERT(size_d + size_q == ggml_nbytes(tensor) && "Incorrect tensor size"); + size_t size_m = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*sizeof(ggml_fp16_t); + size_t size_qs = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*ggml_blck_size(tensor->type)/2; + size_t size_qh = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*sizeof(int32_t); + GGML_ASSERT(size_d + size_m + size_qs + size_qh == ggml_nbytes(tensor) && "Incorrect tensor size"); cl_int err; cl_mem data_device = clCreateBuffer(context, CL_MEM_READ_WRITE, @@ -5412,10 +5646,10 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer, queue, data_device, CL_TRUE, 0, ggml_nbytes(tensor), data, 0, NULL, NULL)); - // The original tensor memory is divided into scales and quants, i.e., - // we first store scales, then quants. cl_buffer_region region; + // The original tensor memory is divided into scales and quants, i.e., + // we first store scales, mins, then quants. // Create subbuffer for scales. region.origin = align_to(extra_orig->offset + tensor->view_offs + offset, backend_ctx->alignment); region.size = size_d; @@ -5425,22 +5659,227 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer, CL_CHECK(err); auto previous_origin = region.origin; - // Create subbuffer for quants. + // Create subbuffer for mins. region.origin = align_to(previous_origin + size_d, backend_ctx->alignment); - region.size = size_q; - extra->q = clCreateSubBuffer( + region.size = size_m; + extra->m = clCreateSubBuffer( extra_orig->data_device, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err); CL_CHECK(err); + previous_origin = region.origin; - cl_kernel kernel = backend_ctx->kernel_convert_block_q8_0; + // Create subbuffer for qh. + region.origin = align_to(previous_origin + size_m, backend_ctx->alignment); + region.size = size_qh; + extra->qh = clCreateSubBuffer( + extra_orig->data_device, CL_MEM_READ_WRITE, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err); + CL_CHECK(err); + previous_origin = region.origin; - CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &data_device)); - CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra->q)); - CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra->d)); + // Create subbuffer for qs. + region.origin = align_to(previous_origin + size_qh, backend_ctx->alignment); + region.size = size_qs; + extra->qs = clCreateSubBuffer( + extra_orig->data_device, CL_MEM_READ_WRITE, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err); + CL_CHECK(err); - size_t global_work_size[] = {(size_t)ggml_nelements(tensor)/ggml_blck_size(tensor->type), 1, 1}; - size_t local_work_size[] = {64, 1, 1}; +#ifdef GGML_OPENCL_USE_ADRENO_KERNELS + // Adreno moe q5_1 kernel needs special transpose and unshuffling + if (use_adreno_moe_kernels(backend_ctx, tensor)) { + cl_kernel kernel = backend_ctx->kernel_convert_block_q5_1_trans4_ns; + + int ne00 = tensor->ne[0]; + int ne01 = tensor->ne[1]; + int ne02 = tensor->ne[2]; + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &data_device)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra->qs)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra->qh)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), &extra->d)); + CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), &extra->m)); + CL_CHECK(clSetKernelArg(kernel, 5, sizeof(int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, 6, sizeof(int), &ne01)); + + size_t global_work_size[3] = {static_cast(((ne01 + 63) / 64) * 64), static_cast(ne00 / 32), static_cast(ne02)}; + size_t local_work_size[3] = {64, 2, 1}; + + cl_event evt; + CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); + CL_CHECK(clWaitForEvents(1, &evt)); + CL_CHECK(clReleaseMemObject(data_device)); + + // Create image for Q + cl_image_format img_format_qs = {CL_R, CL_UNSIGNED_INT32}; + cl_image_desc img_desc_qs = { + CL_MEM_OBJECT_IMAGE1D_BUFFER, + static_cast(ggml_nelements(tensor) / 8), + 0, 0, 0, 0, 0, 0, 0, + { extra->qs } + }; + extra->qs_img = clCreateImage(context, CL_MEM_READ_ONLY, &img_format_qs, &img_desc_qs, NULL, &err); + tensor->extra = extra; + + return; + } +#endif // GGML_OPENCL_USE_ADRENO_KERNELS + return; + } + if (tensor->type == GGML_TYPE_MXFP4) { + ggml_tensor_extra_cl * extra_orig = (ggml_tensor_extra_cl *)tensor->extra; + GGML_ASSERT(extra_orig && "Tesnors in OpenCL backend should have been allocated and initialized"); + + // Allocate the new extra and create aliases from the original. + ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context; + ggml_tensor_extra_cl_mxfp4 * extra = ctx->ggml_opencl_alloc_temp_tensor_extra_mxfp4(); + + size_t size_e = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*sizeof(char); + size_t size_q = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*ggml_blck_size(tensor->type)/2; + GGML_ASSERT(size_e + size_q == ggml_nbytes(tensor) && "Incorrect tensor size"); + + cl_int err; + cl_mem data_device = clCreateBuffer(context, CL_MEM_READ_WRITE, + ggml_nbytes(tensor), NULL, &err); + CL_CHECK(err); + CL_CHECK(clEnqueueWriteBuffer( + queue, data_device, CL_TRUE, 0, + ggml_nbytes(tensor), data, 0, NULL, NULL)); + + // The original tensor memory is divided into scales and quants, i.e., + // we first store scales, then quants. + cl_buffer_region region; + + // Create subbuffer for scales. + region.origin = align_to(extra_orig->offset + tensor->view_offs + offset, backend_ctx->alignment); + region.size = size_e; + extra->e = clCreateSubBuffer( + extra_orig->data_device, CL_MEM_READ_WRITE, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err); + CL_CHECK(err); + auto previous_origin = region.origin; + + // Create subbuffer for quants. + region.origin = align_to(previous_origin + size_e, backend_ctx->alignment); + region.size = size_q; + extra->q = clCreateSubBuffer( + extra_orig->data_device, CL_MEM_READ_WRITE, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err); + CL_CHECK(err); + +#ifdef GGML_OPENCL_USE_ADRENO_KERNELS + // Adreno moe mxfp4 kernel needs special transpose and unshuffling + if (use_adreno_moe_kernels(backend_ctx, tensor)) { + cl_kernel kernel = backend_ctx->kernel_convert_block_mxfp4_trans4_ns; + + int ne00 = tensor->ne[0]; + int ne01 = tensor->ne[1]; + int ne02 = tensor->ne[2]; + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &data_device)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra->q)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra->e)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, 4, sizeof(int), &ne01)); + + size_t global_work_size[3] = {static_cast(((ne01 + 63) / 64) * 64), static_cast(ne00 / 32), static_cast(ne02)}; + size_t local_work_size[3] = {64, 2, 1}; + + cl_event evt; + CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); + CL_CHECK(clWaitForEvents(1, &evt)); + CL_CHECK(clReleaseMemObject(data_device)); + tensor->extra = extra; + + // Create image for Q + cl_image_format img_format_q = {CL_R, CL_UNSIGNED_INT32}; + cl_image_desc img_desc_q = { + CL_MEM_OBJECT_IMAGE1D_BUFFER, + static_cast(ggml_nelements(tensor) / 8), + 0, 0, 0, 0, 0, 0, 0, + { extra->q } + }; + extra->q_img = clCreateImage(context, CL_MEM_READ_ONLY, &img_format_q, &img_desc_q, NULL, &err); + tensor->extra = extra; + + return; + } + +#endif // GGML_OPENCL_USE_ADRENO_KERNELS + cl_kernel kernel = backend_ctx->kernel_convert_block_mxfp4; + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &data_device)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra->q)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra->e)); + + size_t global_work_size[3] = {(size_t)ggml_nelements(tensor)/ggml_blck_size(tensor->type), 1, 1}; + size_t local_work_size[3] = {64, 1, 1}; + + cl_event evt; + CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); + CL_CHECK(clWaitForEvents(1, &evt)); + CL_CHECK(clReleaseMemObject(data_device)); + + // Create image for Q + cl_image_format img_format_q = {CL_RG, CL_UNSIGNED_INT32}; + cl_image_desc img_desc_q = { + CL_MEM_OBJECT_IMAGE1D_BUFFER, + static_cast(ggml_nelements(tensor)/32*2), + 0, 0, 0, 0, 0, 0, 0, + { extra->q } + }; + extra->q_img = clCreateImage(context, CL_MEM_READ_ONLY, &img_format_q, &img_desc_q, NULL, &err); + tensor->extra = extra; + + return; + } + if (tensor->type == GGML_TYPE_Q8_0) { + ggml_tensor_extra_cl * extra_orig = (ggml_tensor_extra_cl *)tensor->extra; + GGML_ASSERT(extra_orig && "Tesnors in OpenCL backend should have been allocated and initialized"); + + // Allocate the new extra and create aliases from the original. + ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context; + ggml_tensor_extra_cl_q8_0 * extra = ctx->ggml_opencl_alloc_temp_tensor_extra_q8_0(); + + size_t size_d = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*sizeof(ggml_fp16_t); + size_t size_q = ggml_nelements(tensor)/ggml_blck_size(tensor->type)*(ggml_blck_size(tensor->type)*sizeof(char)); + GGML_ASSERT(size_d + size_q == ggml_nbytes(tensor) && "Incorrect tensor size"); + + cl_int err; + cl_mem data_device = clCreateBuffer(context, CL_MEM_READ_WRITE, + ggml_nbytes(tensor), NULL, &err); + CL_CHECK(err); + CL_CHECK(clEnqueueWriteBuffer( + queue, data_device, CL_TRUE, 0, + ggml_nbytes(tensor), data, 0, NULL, NULL)); + + // The original tensor memory is divided into scales and quants, i.e., + // we first store scales, then quants. + cl_buffer_region region; + + // Create subbuffer for scales. + region.origin = align_to(extra_orig->offset + tensor->view_offs + offset, backend_ctx->alignment); + region.size = size_d; + extra->d = clCreateSubBuffer( + extra_orig->data_device, CL_MEM_READ_WRITE, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err); + CL_CHECK(err); + auto previous_origin = region.origin; + + // Create subbuffer for quants. + region.origin = align_to(previous_origin + size_d, backend_ctx->alignment); + region.size = size_q; + extra->q = clCreateSubBuffer( + extra_orig->data_device, CL_MEM_READ_WRITE, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err); + CL_CHECK(err); + + cl_kernel kernel = backend_ctx->kernel_convert_block_q8_0; + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &data_device)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra->q)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra->d)); + + size_t global_work_size[] = {(size_t)ggml_nelements(tensor)/ggml_blck_size(tensor->type), 1, 1}; + size_t local_work_size[] = {64, 1, 1}; cl_event evt; CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); @@ -6109,6 +6548,89 @@ static void ggml_backend_opencl_buffer_get_tensor(ggml_backend_buffer_t buffer, CL_CHECK(clReleaseMemObject(data_device)); return; } + if (tensor->type == GGML_TYPE_Q5_0) { + ggml_tensor_extra_cl_q5_0 * extra = (ggml_tensor_extra_cl_q5_0 *)tensor->extra; + +#ifdef GGML_OPENCL_USE_ADRENO_KERNELS + if (use_adreno_moe_kernels(backend_ctx, tensor)) { + cl_int err; + // TODO: use ggml_cl_buffer to manage this temporary buffer + cl_mem data_device = clCreateBuffer(context, CL_MEM_READ_WRITE, + ggml_nbytes(tensor), NULL, &err); + CL_CHECK(err); + + cl_kernel kernel = backend_ctx->kernel_restore_block_q5_0_trans4_ns; + + int ne00 = tensor->ne[0]; + int ne01 = tensor->ne[1]; + int ne02 = tensor->ne[2]; + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra->qs)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra->qh)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra->d)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), &data_device)); + CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_int), &ne01)); + + size_t global_work_size[3] = {static_cast(((ne01 + 63) / 64) * 64), static_cast(ne00 / 32), static_cast(ne02)}; + size_t local_work_size[3] = {64, 2, 1}; + + cl_event evt; + CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, + global_work_size, local_work_size, 0, NULL, &evt)); + CL_CHECK(clWaitForEvents(1, &evt)); + CL_CHECK(clEnqueueReadBuffer( + queue, data_device, CL_TRUE, offset, + size, data, 0, NULL, NULL)); + CL_CHECK(clReleaseMemObject(data_device)); + return; + } +#endif // GGML_OPENCL_USE_ADRENO_KERNELS + // TODO: normal q5_0 + (void) extra; + return; + } + if (tensor->type == GGML_TYPE_Q5_1) { + ggml_tensor_extra_cl_q5_1 * extra = (ggml_tensor_extra_cl_q5_1 *)tensor->extra; + +#ifdef GGML_OPENCL_USE_ADRENO_KERNELS + if (use_adreno_moe_kernels(backend_ctx, tensor)) { + cl_int err; + // TODO: use ggml_cl_buffer to manage this temporary buffer + cl_mem data_device = clCreateBuffer(context, CL_MEM_READ_WRITE, + ggml_nbytes(tensor), NULL, &err); + CL_CHECK(err); + + cl_kernel kernel = backend_ctx->kernel_restore_block_q5_1_trans4_ns; + + int ne00 = tensor->ne[0]; + int ne01 = tensor->ne[1]; + int ne02 = tensor->ne[2]; + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra->qs)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra->qh)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra->d)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), &extra->m)); + CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), &data_device)); + CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_int), &ne01)); + + size_t global_work_size[3] = {static_cast(((ne01 + 63) / 64) * 64), static_cast(ne00 / 32), static_cast(ne02)}; + size_t local_work_size[3] = {64, 2, 1}; + + cl_event evt; + CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, + global_work_size, local_work_size, 0, NULL, &evt)); + CL_CHECK(clWaitForEvents(1, &evt)); + CL_CHECK(clEnqueueReadBuffer( + queue, data_device, CL_TRUE, offset, + size, data, 0, NULL, NULL)); + CL_CHECK(clReleaseMemObject(data_device)); + return; + } +#endif // GGML_OPENCL_USE_ADRENO_KERNELS + // TODO: normal q5_1 + (void) extra; + return; + } if (tensor->type == GGML_TYPE_MXFP4) { ggml_tensor_extra_cl_mxfp4 * extra = (ggml_tensor_extra_cl_mxfp4 *)tensor->extra; @@ -13132,7 +13654,7 @@ static void moe_router_reoerder(ggml_backend_t backend, const ggml_tensor * src, CL_CHECK(clSetKernelArg(kernel, 4, sizeof(int), &ne02)); size_t histogram_global_size[] = {(size_t)(((ne21 + 63) / 64) * 64), static_cast(ne20), 1}; - size_t histogram_local_size[] = {64, static_cast(ne20), 1}; + size_t histogram_local_size[] = {64, 1, 1}; backend_ctx->enqueue_ndrange_kernel(kernel, 3, histogram_global_size, histogram_local_size, src); // Scan @@ -13209,10 +13731,17 @@ static void ggml_cl_mul_mat_id(ggml_backend_t backend, const ggml_tensor * src0, #ifdef GGML_OPENCL_SOA_Q ggml_tensor_extra_cl_q4_0 * extra0_q4_0 = (ggml_tensor_extra_cl_q4_0 *)src0->extra; ggml_tensor_extra_cl_q4_1 * extra0_q4_1 = (ggml_tensor_extra_cl_q4_1 *)src0->extra; + ggml_tensor_extra_cl_q5_0 * extra0_q5_0 = (ggml_tensor_extra_cl_q5_0 *)src0->extra; + ggml_tensor_extra_cl_q5_1 * extra0_q5_1 = (ggml_tensor_extra_cl_q5_1 *)src0->extra; ggml_tensor_extra_cl_mxfp4 * extra0_mxfp4 = (ggml_tensor_extra_cl_mxfp4 *)src0->extra; ggml_tensor_extra_cl_q8_0 * extra0_q8_0 = (ggml_tensor_extra_cl_q8_0 *)src0->extra; #endif + // TODO: general MoE for the following types + (void)extra0_q4_1; + (void)extra0_q5_0; + (void)extra0_q5_1; + const int ne00 = src0->ne[0]; const int ne01 = src0->ne[1]; const int ne02 = src0->ne[2]; @@ -13540,8 +14069,11 @@ static void ggml_cl_mul_mat_id(ggml_backend_t backend, const ggml_tensor * src0, } else { // for gemm kernel = backend_ctx->kernel_gemm_moe_q4_1_f32_ns; - if (strstr(src0->name, "as") != NULL) { + // Reorder router if called from test-backend-ops or when new router is generated. + // Otherwise reuse the reordered result from previous mul_mat_id call. + if ((strstr(src0->name, "as") != NULL) || backend_ctx->toggle_reorder) { moe_router_reoerder(backend, src2, ne20); + backend_ctx->toggle_reorder = false; } cl_mem sub_buf_src1_pre, buf_src1_reordered, image_src1_reordered, sub_buf_dst, buf_dst_image; @@ -13649,6 +14181,359 @@ static void ggml_cl_mul_mat_id(ggml_backend_t backend, const ggml_tensor * src0, } return; } +#endif //GGML_OPENCL_USE_ADRENO_KERNELS + } + case GGML_TYPE_Q5_0: { +#ifdef GGML_OPENCL_USE_ADRENO_KERNELS + if (use_adreno_moe_kernels(backend_ctx, src0)) { + cl_int status; + + size_t local_size[3] = {64, 2, 1}; + size_t global_size[3] = {64, 2, 1}; + + if (ne12 == 1) { // for gemv + kernel = backend_ctx->kernel_gemv_moe_q5_0_f32_ns; + + cl_mem src1_sub_buffer, buf_src1_image, buf_src2; + + // create a sub_buffer for src2 + cl_buffer_region region; + region.origin = offset2; + region.size = ne20 * ne21 * sizeof(int); + buf_src2 = clCreateSubBuffer(extra2->data_device, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &status); + CL_CHECK(status); + + // set thread grid + global_size[0] = static_cast(ne01); + global_size[1] = 4; + global_size[2] = static_cast(ne20); + local_size[1] = 4; + + // create a sub_buffer for src1 + region.origin = offset1; + region.size = ne10 * ne11 * ne12 * sizeof(float); + src1_sub_buffer = clCreateSubBuffer(extra1->data_device, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &status); + CL_CHECK(status); + + // create image for src1 + cl_image_format image_format_buf_src1 = {CL_RGBA, CL_FLOAT}; + cl_image_desc image_desc_buf_src1 = {CL_MEM_OBJECT_IMAGE1D_BUFFER, static_cast(ne10 * ne11 * ne12 / 4), 0,0,0,0,0,0,0, {src1_sub_buffer}}; + buf_src1_image = clCreateImage(backend_ctx->context, CL_MEM_READ_ONLY, &image_format_buf_src1, &image_desc_buf_src1, NULL, &status); + CL_CHECK(status); + + // Set kernel args + int arg_idx = 0; + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extra0_q5_0->qs)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extra0_q5_0->qh)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extra0_q5_0->d)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &buf_src1_image)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &buf_src2)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extrad->data_device)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_ulong), &offsetd)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(int), &ne01)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(int), &ne11)); + + // launch kernel + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_size, local_size, dst); + + // deallocate sub buffers and images + CL_CHECK(clReleaseMemObject(src1_sub_buffer)); + CL_CHECK(clReleaseMemObject(buf_src1_image)); + CL_CHECK(clReleaseMemObject(buf_src2)); + + } else { // for gemm + kernel = backend_ctx->kernel_gemm_moe_q5_0_f32_ns; + + // Reorder router if called from test-backend-ops or when new router is generated. + // Otherwise reuse the reordered result from previous mul_mat_id call. + if ((strstr(src0->name, "as") != NULL) || backend_ctx->toggle_reorder) { + moe_router_reoerder(backend, src2, ne20); + backend_ctx->toggle_reorder = false; + } + + cl_mem sub_buf_src1_pre, buf_src1_reordered, image_src1_reordered, sub_buf_dst, buf_dst_image; + cl_mem buf_src2, buf_src2_emap; + + cl_buffer_region region; + region.origin = 0; + region.size = sizeof(int) * max_post_router_tile * n_tile_size; + buf_src2 = clCreateSubBuffer(backend_ctx->prealloc_post_router.buffer, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &status); + CL_CHECK(status); + + region.origin = 0; + region.size = sizeof(short) * max_post_router_tile; + buf_src2_emap = clCreateSubBuffer(backend_ctx->prealloc_emap.buffer, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &status); + CL_CHECK(status); + + // Reorder activations + // create a sub_buffer for src1 + region.origin = offset1; + region.size = ne10 * ne11 * ne12 * sizeof(float); + sub_buf_src1_pre = clCreateSubBuffer(extra1->data_device, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &status); + CL_CHECK(status); + + // Create image for reordered src1 + // Use pre-allocated placeholder + region.origin = 0; + region.size = ne00 * max_post_router_tile * n_tile_size * sizeof(float); + backend_ctx->prealloc_act_trans.allocate(backend_ctx->context, region.size); + buf_src1_reordered = clCreateSubBuffer( + backend_ctx->prealloc_act_trans.buffer, + 0, + CL_BUFFER_CREATE_TYPE_REGION, + ®ion, + &status); + CL_CHECK(status); + cl_image_format image_format_buf_src1; + cl_image_desc image_desc_buf_src1; + image_format_buf_src1 = {CL_RGBA, CL_FLOAT}; + image_desc_buf_src1 = {CL_MEM_OBJECT_IMAGE1D_BUFFER, static_cast(ne00 * max_post_router_tile * n_tile_size / 4), 0,0,0,0,0,0,0, {buf_src1_reordered}}; + image_src1_reordered = clCreateImage(backend_ctx->context, CL_MEM_READ_ONLY, &image_format_buf_src1, &image_desc_buf_src1, NULL, &status); + CL_CHECK(status); + + unsigned short map_ratio = ne20 / ne11; + GGML_ASSERT(((map_ratio == 1) || (map_ratio == ne20)) && "Map ratio not supported\n"); + CL_CHECK(clSetKernelArg(backend_ctx->kernel_moe_reorder_b, 0, sizeof(cl_mem), &sub_buf_src1_pre)); + CL_CHECK(clSetKernelArg(backend_ctx->kernel_moe_reorder_b, 1, sizeof(cl_mem), &buf_src2)); + CL_CHECK(clSetKernelArg(backend_ctx->kernel_moe_reorder_b, 2, sizeof(cl_mem), &buf_src1_reordered)); + CL_CHECK(clSetKernelArg(backend_ctx->kernel_moe_reorder_b, 3, sizeof(cl_mem), &(backend_ctx->prealloc_total_tiles.buffer))); + CL_CHECK(clSetKernelArg(backend_ctx->kernel_moe_reorder_b, 4, sizeof(unsigned int), &ne00)); + CL_CHECK(clSetKernelArg(backend_ctx->kernel_moe_reorder_b, 5, sizeof(unsigned short), &map_ratio)); + CL_CHECK(clSetKernelArg(backend_ctx->kernel_moe_reorder_b, 6, sizeof(unsigned int), &n_tile_size)); + + size_t reorder_b_local_size[3] = {256, 1, 1}; + size_t reorder_b_global_size[3] = {static_cast(((ne00 / 4) + 255) / 256 * 256), static_cast(max_post_router_tile * n_tile_size), 1}; + + // Dispatch reorder kernel + backend_ctx->enqueue_ndrange_kernel(backend_ctx->kernel_moe_reorder_b, 3, reorder_b_global_size, reorder_b_local_size, dst); + + // MoE kernel prepare + // Create sub buffer for dst + region.origin = offsetd; + region.size = ne0 * ne1 * ne2 * sizeof(float); + sub_buf_dst = clCreateSubBuffer( + extrad->data_device, + 0, + CL_BUFFER_CREATE_TYPE_REGION, + ®ion, + &status); + CL_CHECK(status); + // Create image for dst + cl_image_format image_format_buf_dst = {CL_R, CL_FLOAT}; + cl_image_desc image_desc_buf_dst = {CL_MEM_OBJECT_IMAGE1D_BUFFER, static_cast(ne0 * ne1 * ne2), 0,0,0,0,0,0,0, {sub_buf_dst}}; + buf_dst_image = clCreateImage(backend_ctx->context, CL_MEM_WRITE_ONLY, &image_format_buf_dst, &image_desc_buf_dst, NULL, &status); + CL_CHECK(status); + + // Set kernel args + int arg_idx = 0; + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extra0_q5_0->qs_img)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extra0_q5_0->qh)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extra0_q5_0->d)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &image_src1_reordered)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &buf_src2)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &buf_src2_emap)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &buf_dst_image)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &(backend_ctx->prealloc_total_tiles.buffer))); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(int), &ne01)); + + // set thread grid + global_size[1] = static_cast((ne01 + 63) / 64); + global_size[2] = static_cast(max_post_router_tile); + local_size[1] = 1; + local_size[2] = 1; + + // Dispatch kernel + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_size, local_size, dst); + + clReleaseMemObject(sub_buf_src1_pre); + clReleaseMemObject(buf_src1_reordered); + clReleaseMemObject(image_src1_reordered); + clReleaseMemObject(buf_src2); + clReleaseMemObject(buf_src2_emap); + clReleaseMemObject(sub_buf_dst); + clReleaseMemObject(buf_dst_image); + } + return; + } +#endif //GGML_OPENCL_USE_ADRENO_KERNELS + } + case GGML_TYPE_Q5_1: { +#ifdef GGML_OPENCL_USE_ADRENO_KERNELS + if (use_adreno_moe_kernels(backend_ctx, src0)) { + cl_int status; + + size_t local_size[3] = {64, 2, 1}; + size_t global_size[3] = {64, 2, 1}; + + if (ne12 == 1) { // for gemv + kernel = backend_ctx->kernel_gemv_moe_q5_1_f32_ns; + + cl_mem src1_sub_buffer, buf_src1_image, buf_src2; + + // create a sub_buffer for src2 + cl_buffer_region region; + region.origin = offset2; + region.size = ne20 * ne21 * sizeof(int); + buf_src2 = clCreateSubBuffer(extra2->data_device, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &status); + CL_CHECK(status); + + // set thread grid + global_size[0] = static_cast(ne01); + global_size[1] = 4; + global_size[2] = static_cast(ne20); + local_size[1] = 4; + + // create a sub_buffer for src1 + region.origin = offset1; + region.size = ne10 * ne11 * ne12 * sizeof(float); + src1_sub_buffer = clCreateSubBuffer(extra1->data_device, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &status); + CL_CHECK(status); + + // create image for src1 + cl_image_format image_format_buf_src1 = {CL_RGBA, CL_FLOAT}; + cl_image_desc image_desc_buf_src1 = {CL_MEM_OBJECT_IMAGE1D_BUFFER, static_cast(ne10 * ne11 * ne12 / 4), 0,0,0,0,0,0,0, {src1_sub_buffer}}; + buf_src1_image = clCreateImage(backend_ctx->context, CL_MEM_READ_ONLY, &image_format_buf_src1, &image_desc_buf_src1, NULL, &status); + CL_CHECK(status); + + // Set kernel args + int arg_idx = 0; + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extra0_q5_1->qs)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extra0_q5_1->qh)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extra0_q5_1->d)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extra0_q5_1->m)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &buf_src1_image)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &buf_src2)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extrad->data_device)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_ulong), &offsetd)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(int), &ne01)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(int), &ne11)); + + // launch kernel + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_size, local_size, dst); + + // deallocate sub buffers and images + CL_CHECK(clReleaseMemObject(src1_sub_buffer)); + CL_CHECK(clReleaseMemObject(buf_src1_image)); + CL_CHECK(clReleaseMemObject(buf_src2)); + } else { // for gemm + kernel = backend_ctx->kernel_gemm_moe_q5_1_f32_ns; + + // Reorder router if called from test-backend-ops or when new router is generated. + // Otherwise reuse the reordered result from previous mul_mat_id call. + if ((strstr(src0->name, "as") != NULL) || backend_ctx->toggle_reorder) { + moe_router_reoerder(backend, src2, ne20); + backend_ctx->toggle_reorder = false; + } + + cl_mem sub_buf_src1_pre, buf_src1_reordered, image_src1_reordered, sub_buf_dst, buf_dst_image; + cl_mem buf_src2, buf_src2_emap; + + cl_buffer_region region; + region.origin = 0; + region.size = sizeof(int) * max_post_router_tile * n_tile_size; + buf_src2 = clCreateSubBuffer(backend_ctx->prealloc_post_router.buffer, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &status); + CL_CHECK(status); + + region.origin = 0; + region.size = sizeof(short) * max_post_router_tile; + buf_src2_emap = clCreateSubBuffer(backend_ctx->prealloc_emap.buffer, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &status); + CL_CHECK(status); + + // Reorder activations + // create a sub_buffer for src1 + region.origin = offset1; + region.size = ne10 * ne11 * ne12 * sizeof(float); + sub_buf_src1_pre = clCreateSubBuffer(extra1->data_device, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &status); + CL_CHECK(status); + + // Create image for reordered src1 + // Use pre-allocated placeholder + region.origin = 0; + region.size = ne00 * max_post_router_tile * n_tile_size * sizeof(float); + backend_ctx->prealloc_act_trans.allocate(backend_ctx->context, region.size); + buf_src1_reordered = clCreateSubBuffer( + backend_ctx->prealloc_act_trans.buffer, + 0, + CL_BUFFER_CREATE_TYPE_REGION, + ®ion, + &status); + CL_CHECK(status); + cl_image_format image_format_buf_src1; + cl_image_desc image_desc_buf_src1; + image_format_buf_src1 = {CL_RGBA, CL_FLOAT}; + image_desc_buf_src1 = {CL_MEM_OBJECT_IMAGE1D_BUFFER, static_cast(ne00 * max_post_router_tile * n_tile_size / 4), 0,0,0,0,0,0,0, {buf_src1_reordered}}; + image_src1_reordered = clCreateImage(backend_ctx->context, CL_MEM_READ_ONLY, &image_format_buf_src1, &image_desc_buf_src1, NULL, &status); + CL_CHECK(status); + + unsigned short map_ratio = ne20 / ne11; + GGML_ASSERT(((map_ratio == 1) || (map_ratio == ne20)) && "Map ratio not supported\n"); + CL_CHECK(clSetKernelArg(backend_ctx->kernel_moe_reorder_b, 0, sizeof(cl_mem), &sub_buf_src1_pre)); + CL_CHECK(clSetKernelArg(backend_ctx->kernel_moe_reorder_b, 1, sizeof(cl_mem), &buf_src2)); + CL_CHECK(clSetKernelArg(backend_ctx->kernel_moe_reorder_b, 2, sizeof(cl_mem), &buf_src1_reordered)); + CL_CHECK(clSetKernelArg(backend_ctx->kernel_moe_reorder_b, 3, sizeof(cl_mem), &(backend_ctx->prealloc_total_tiles.buffer))); + CL_CHECK(clSetKernelArg(backend_ctx->kernel_moe_reorder_b, 4, sizeof(unsigned int), &ne00)); + CL_CHECK(clSetKernelArg(backend_ctx->kernel_moe_reorder_b, 5, sizeof(unsigned short), &map_ratio)); + CL_CHECK(clSetKernelArg(backend_ctx->kernel_moe_reorder_b, 6, sizeof(unsigned int), &n_tile_size)); + + size_t reorder_b_local_size[3] = {256, 1, 1}; + size_t reorder_b_global_size[3] = {static_cast(((ne00 / 4) + 255) / 256 * 256), static_cast(max_post_router_tile * n_tile_size), 1}; + + // Dispatch reorder kernel + backend_ctx->enqueue_ndrange_kernel(backend_ctx->kernel_moe_reorder_b, 3, reorder_b_global_size, reorder_b_local_size, dst); + + // MoE kernel prepare + // Create sub buffer for dst + region.origin = offsetd; + region.size = ne0 * ne1 * ne2 * sizeof(float); + sub_buf_dst = clCreateSubBuffer( + extrad->data_device, + 0, + CL_BUFFER_CREATE_TYPE_REGION, + ®ion, + &status); + CL_CHECK(status); + // Create image for dst + cl_image_format image_format_buf_dst = {CL_R, CL_FLOAT}; + cl_image_desc image_desc_buf_dst = {CL_MEM_OBJECT_IMAGE1D_BUFFER, static_cast(ne0 * ne1 * ne2), 0,0,0,0,0,0,0, {sub_buf_dst}}; + buf_dst_image = clCreateImage(backend_ctx->context, CL_MEM_WRITE_ONLY, &image_format_buf_dst, &image_desc_buf_dst, NULL, &status); + CL_CHECK(status); + + // Set kernel args + int arg_idx = 0; + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extra0_q5_1->qs_img)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extra0_q5_1->qh)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extra0_q5_1->d)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &extra0_q5_1->m)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &image_src1_reordered)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &buf_src2)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &buf_src2_emap)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &buf_dst_image)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(cl_mem), &(backend_ctx->prealloc_total_tiles.buffer))); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, arg_idx++, sizeof(int), &ne01)); + + // set thread grid + global_size[1] = static_cast((ne01 + 63) / 64); + global_size[2] = static_cast(max_post_router_tile); + local_size[1] = 1; + local_size[2] = 1; + + // Dispatch kernel + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_size, local_size, dst); + + clReleaseMemObject(sub_buf_src1_pre); + clReleaseMemObject(buf_src1_reordered); + clReleaseMemObject(image_src1_reordered); + clReleaseMemObject(buf_src2); + clReleaseMemObject(buf_src2_emap); + clReleaseMemObject(sub_buf_dst); + clReleaseMemObject(buf_dst_image); + } + return; + } #endif //GGML_OPENCL_USE_ADRENO_KERNELS } case GGML_TYPE_Q8_0: { diff --git a/ggml/src/ggml-opencl/kernels/cvt.cl b/ggml/src/ggml-opencl/kernels/cvt.cl index 5bbf09710f9..8f06d570587 100644 --- a/ggml/src/ggml-opencl/kernels/cvt.cl +++ b/ggml/src/ggml-opencl/kernels/cvt.cl @@ -56,6 +56,25 @@ struct block_q4_1 { uchar qs[QK4_1 / 2]; // nibbles / quants }; +//------------------------------------------------------------------------------ +// block_q5_0 +//------------------------------------------------------------------------------ +struct block_q5_0 { + half d; // delta + uchar qh[4]; // 5-th bit of quants + uchar qs[QK5_0 / 2]; // nibbles / quants +}; + +//------------------------------------------------------------------------------ +// block_q5_1 +//------------------------------------------------------------------------------ +struct block_q5_1 { + half d; // delta + half m; // min + uchar qh[4]; // 5-th bit of quants + uchar qs[QK5_1 / 2]; // nibbles / quants +}; + //------------------------------------------------------------------------------ // block_q4_k //------------------------------------------------------------------------------ @@ -460,6 +479,191 @@ kernel void kernel_restore_block_q4_1_trans4_ns( ((__global ushort8 *)(&(b->qs[0])))[0] = pre_block; } +kernel void kernel_convert_block_q5_0_trans4_ns( + __global struct block_q5_0 * src0, + __global uint * dst_qs, + __global uint * dst_qh, + __global half * dst_d, + uint ne00, + uint ne01 +) { + uint i00 = get_global_id(1); + uint i01 = get_global_id(0); + uint i02 = get_global_id(2); + + uint ne00_blk = ne00 / QK5_0; + uint src_blk_offset = i00 + i01 * ne00_blk + i02 * ne00_blk * ne01; + uint dst_blk_offset = i01 + i00 * ne01 + i02 * ne00_blk * ne01; + + global struct block_q5_0 * b = src0 + src_blk_offset; + dst_d[dst_blk_offset] = b->d; + + dst_qh[dst_blk_offset] = ((global uint *)(&(b->qh[0])))[0]; + + // extract quantization and unshuffle + ushort8 pre_block = ((global ushort8 *)(&(b->qs[0])))[0]; + ushort8 post_block = (ushort8)(0); + + uchar * pre_block_ptr = (uchar *)(&pre_block); + uchar * post_block_ptr = (uchar *)(&post_block); + + for (int i = 0; i < QK5_0 / 4; ++i) { + uchar x0 = pre_block_ptr[2*i + 0]; + uchar x1 = pre_block_ptr[2*i + 1]; + + post_block_ptr[i + 0 ] = convert_uchar(x0 & 0x0F) | convert_uchar((x1 & 0x0F) << 4); + post_block_ptr[i + QK5_0 / 4] = convert_uchar((x0 & 0xF0) >> 4) | convert_uchar(x1 & 0xF0); + } + + uint4 q_block = as_uint4(post_block); + + uint offset = i02 * ne00_blk * ne01 * 4 + i00 * ne01 * 4 + i01; + dst_qs[offset] = q_block.x; + dst_qs[offset + ne01] = q_block.y; + dst_qs[offset + ne01 * 2] = q_block.z; + dst_qs[offset + ne01 * 3] = q_block.w; +} + +kernel void kernel_restore_block_q5_0_trans4_ns( + __global uint * src_qs, + __global uint * src_qh, + __global half * src_d, + __global struct block_q5_0 * dst0, + uint ne00, + uint ne01 +) { + int i00 = get_global_id(1); + uint i01 = get_global_id(0); + uint i02 = get_global_id(2); + + uint ne00_blk = ne00 / QK5_0; + uint dst_blk_offset = i00 + i01 * ne00_blk + i02 * ne00_blk * ne01; + uint src_blk_offset = i01 + i00 * ne01 + i02 * ne00_blk * ne01; + + __global struct block_q5_0 * b = dst0 + dst_blk_offset; + b->d = src_d[src_blk_offset]; + + ((__global uint *)(&(b->qh[0])))[0] = src_qh[src_blk_offset]; + + // collect transposed quantization parts for a block + uint src_q_offset = i02 * ne00_blk * ne01 * 4 + i00 * ne01 * 4 + i01; + uint4 q_block; + q_block.x = src_qs[src_q_offset]; + q_block.y = src_qs[src_q_offset + ne01]; + q_block.z = src_qs[src_q_offset + ne01 * 2]; + q_block.w = src_qs[src_q_offset + ne01 * 3]; + + ushort8 post_block = as_ushort8(q_block); + ushort8 pre_block = (ushort8)(0); + + uchar * pre_block_ptr = (uchar *)(&pre_block); + uchar * post_block_ptr = (uchar *)(&post_block); + + for (int i = 0; i < QK5_0 / 4; ++i) { + uchar x0 = post_block_ptr[i + 0]; + uchar x1 = post_block_ptr[i + QK5_0 / 4]; + + pre_block_ptr[2 * i + 0] = convert_uchar(x0 & 0x0F) | convert_uchar((x1 & 0x0F) << 4); + pre_block_ptr[2 * i + 1] = convert_uchar((x0 & 0xF0) >> 4) | convert_uchar(x1 & 0xF0); + } + + ((__global ushort8 *)(&(b->qs[0])))[0] = pre_block; +} + +kernel void kernel_convert_block_q5_1_trans4_ns( + __global struct block_q5_1 * src0, + __global uint * dst_qs, + __global uint * dst_qh, + __global half * dst_d, + __global half * dst_m, + uint ne00, + uint ne01 +) { + uint i00 = get_global_id(1); + uint i01 = get_global_id(0); + uint i02 = get_global_id(2); + + uint ne00_blk = ne00 / QK5_1; + uint src_blk_offset = i00 + i01 * ne00_blk + i02 * ne00_blk * ne01; + uint dst_blk_offset = i01 + i00 * ne01 + i02 * ne00_blk * ne01; + + global struct block_q5_1 * b = src0 + src_blk_offset; + dst_d[dst_blk_offset] = b->d; + dst_m[dst_blk_offset] = b->m; + + dst_qh[dst_blk_offset] = ((global uint *)(&(b->qh[0])))[0]; + + // extract quantization and unshuffle + ushort8 pre_block = ((global ushort8 *)(&(b->qs[0])))[0]; + ushort8 post_block = (ushort8)(0); + + uchar * pre_block_ptr = (uchar *)(&pre_block); + uchar * post_block_ptr = (uchar *)(&post_block); + + for (int i = 0; i < QK5_1 / 4; ++i) { + uchar x0 = pre_block_ptr[2*i + 0]; + uchar x1 = pre_block_ptr[2*i + 1]; + + post_block_ptr[i + 0 ] = convert_uchar(x0 & 0x0F) | convert_uchar((x1 & 0x0F) << 4); + post_block_ptr[i + QK5_1 / 4] = convert_uchar((x0 & 0xF0) >> 4) | convert_uchar(x1 & 0xF0); + } + + uint4 q_block = as_uint4(post_block); + + uint offset = i02 * ne00_blk * ne01 * 4 + i00 * ne01 * 4 + i01; + dst_qs[offset] = q_block.x; + dst_qs[offset + ne01] = q_block.y; + dst_qs[offset + ne01 * 2] = q_block.z; + dst_qs[offset + ne01 * 3] = q_block.w; +} + +kernel void kernel_restore_block_q5_1_trans4_ns( + __global uint * src_qs, + __global uint * src_qh, + __global half * src_d, + __global half * src_m, + __global struct block_q5_1 * dst0, + uint ne00, + uint ne01 +) { + int i00 = get_global_id(1); + uint i01 = get_global_id(0); + uint i02 = get_global_id(2); + + uint ne00_blk = ne00 / QK5_1; + uint dst_blk_offset = i00 + i01 * ne00_blk + i02 * ne00_blk * ne01; + uint src_blk_offset = i01 + i00 * ne01 + i02 * ne00_blk * ne01; + + __global struct block_q5_1 * b = dst0 + dst_blk_offset; + b->d = src_d[src_blk_offset]; + b->m = src_m[src_blk_offset]; + + ((__global uint *)(&(b->qh[0])))[0] = src_qh[src_blk_offset]; + + // collect transposed quantization parts for a block + uint src_q_offset = i02 * ne00_blk * ne01 * 4 + i00 * ne01 * 4 + i01; + uint4 q_block; + q_block.x = src_qs[src_q_offset]; + q_block.y = src_qs[src_q_offset + ne01]; + q_block.z = src_qs[src_q_offset + ne01 * 2]; + q_block.w = src_qs[src_q_offset + ne01 * 3]; + + ushort8 post_block = as_ushort8(q_block); + ushort8 pre_block = (ushort8)(0); + + uchar * pre_block_ptr = (uchar *)(&pre_block); + uchar * post_block_ptr = (uchar *)(&post_block); + + for (int i = 0; i < QK5_1 / 4; ++i) { + uchar x0 = post_block_ptr[i + 0]; + uchar x1 = post_block_ptr[i + QK5_1 / 4]; + + pre_block_ptr[2 * i + 0] = convert_uchar(x0 & 0x0F) | convert_uchar((x1 & 0x0F) << 4); + pre_block_ptr[2 * i + 1] = convert_uchar((x0 & 0xF0) >> 4) | convert_uchar(x1 & 0xF0); + } + ((__global ushort8 *)(&(b->qs[0])))[0] = pre_block; +} + //------------------------------------------------------------------------------ // block_mxfp4 //------------------------------------------------------------------------------ diff --git a/ggml/src/ggml-opencl/kernels/gemm_moe_q5_0_f32_ns.cl b/ggml/src/ggml-opencl/kernels/gemm_moe_q5_0_f32_ns.cl new file mode 100644 index 00000000000..3524cb1bdbd --- /dev/null +++ b/ggml/src/ggml-opencl/kernels/gemm_moe_q5_0_f32_ns.cl @@ -0,0 +1,256 @@ +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#pragma OPENCL EXTENSION cl_khr_subgroups : enable +#pragma OPENCL EXTENSION cl_qcom_subgroup_uniform_load: enable +#pragma OPENCL EXTENSION cl_qcom_subgroup_constant_load: enable +#pragma OPENCL EXTENSION cl_qcom_extra_vector_types : enable + +#define TILESIZE_K 16 +#define TILESIZE_M 64 +#define TILESIZE_N 32 + + +#define dequantize_q5_0(qs5x16, qh5x16, a_f16, scale) \ + a_f16.s0 = (half)((( qs5x16.s0 & 0x000F) | (( qh5x16.s0 & 0x01) << 4)) - 16) * scale; \ + a_f16.s1 = (half)((((qs5x16.s0 & 0x00F0) >> 4 ) | (((qh5x16.s0 >> 1) & 0x01) << 4)) - 16) * scale; \ + a_f16.s2 = (half)((((qs5x16.s0 & 0x0F00) >> 8 ) | (((qh5x16.s0 >> 2) & 0x01) << 4)) - 16) * scale; \ + a_f16.s3 = (half)((((qs5x16.s0 & 0xF000) >> 12) | (((qh5x16.s0 >> 3) & 0x01) << 4)) - 16) * scale; \ + a_f16.s4 = (half)((( qs5x16.s1 & 0x000F) | (((qh5x16.s0 >> 4) & 0x01) << 4)) - 16) * scale; \ + a_f16.s5 = (half)((((qs5x16.s1 & 0x00F0) >> 4 ) | (((qh5x16.s0 >> 5) & 0x01) << 4)) - 16) * scale; \ + a_f16.s6 = (half)((((qs5x16.s1 & 0x0F00) >> 8 ) | (((qh5x16.s0 >> 6) & 0x01) << 4)) - 16) * scale; \ + a_f16.s7 = (half)((((qs5x16.s1 & 0xF000) >> 12) | (((qh5x16.s0 >> 7) & 0x01) << 4)) - 16) * scale; \ + a_f16.s8 = (half)((( qs5x16.s2 & 0x000F) | (( qh5x16.s1 & 0x01) << 4)) - 16) * scale; \ + a_f16.s9 = (half)((((qs5x16.s2 & 0x00F0) >> 4 ) | (((qh5x16.s1 >> 1) & 0x01) << 4)) - 16) * scale; \ + a_f16.sa = (half)((((qs5x16.s2 & 0x0F00) >> 8 ) | (((qh5x16.s1 >> 2) & 0x01) << 4)) - 16) * scale; \ + a_f16.sb = (half)((((qs5x16.s2 & 0xF000) >> 12) | (((qh5x16.s1 >> 3) & 0x01) << 4)) - 16) * scale; \ + a_f16.sc = (half)((( qs5x16.s3 & 0x000F) | (((qh5x16.s1 >> 4) & 0x01) << 4)) - 16) * scale; \ + a_f16.sd = (half)((((qs5x16.s3 & 0x00F0) >> 4 ) | (((qh5x16.s1 >> 5) & 0x01) << 4)) - 16) * scale; \ + a_f16.se = (half)((((qs5x16.s3 & 0x0F00) >> 8 ) | (((qh5x16.s1 >> 6) & 0x01) << 4)) - 16) * scale; \ + a_f16.sf = (half)((((qs5x16.s3 & 0xF000) >> 12) | (((qh5x16.s1 >> 7) & 0x01) << 4)) - 16) * scale; \ + + +#define dotx16_reduce8(a_reg, b_lm, c_reg, lm_offset) \ + acc.s0 = dot(a_reg.s0123, b_lm[lm_offset + 0]); \ + acc.s1 = dot(a_reg.s0123, b_lm[lm_offset + 1]); \ + acc.s2 = dot(a_reg.s0123, b_lm[lm_offset + 2]); \ + acc.s3 = dot(a_reg.s0123, b_lm[lm_offset + 3]); \ + acc.s4 = dot(a_reg.s0123, b_lm[lm_offset + 4]); \ + acc.s5 = dot(a_reg.s0123, b_lm[lm_offset + 5]); \ + acc.s6 = dot(a_reg.s0123, b_lm[lm_offset + 6]); \ + acc.s7 = dot(a_reg.s0123, b_lm[lm_offset + 7]); \ + acc.s8 = dot(a_reg.s0123, b_lm[lm_offset + 8]); \ + acc.s9 = dot(a_reg.s0123, b_lm[lm_offset + 9]); \ + acc.sa = dot(a_reg.s0123, b_lm[lm_offset + 10]); \ + acc.sb = dot(a_reg.s0123, b_lm[lm_offset + 11]); \ + acc.sc = dot(a_reg.s0123, b_lm[lm_offset + 12]); \ + acc.sd = dot(a_reg.s0123, b_lm[lm_offset + 13]); \ + acc.se = dot(a_reg.s0123, b_lm[lm_offset + 14]); \ + acc.sf = dot(a_reg.s0123, b_lm[lm_offset + 15]); \ + acc.s0 += dot(a_reg.s4567, b_lm[lm_offset + 32]); \ + acc.s1 += dot(a_reg.s4567, b_lm[lm_offset + 33]); \ + acc.s2 += dot(a_reg.s4567, b_lm[lm_offset + 34]); \ + acc.s3 += dot(a_reg.s4567, b_lm[lm_offset + 35]); \ + acc.s4 += dot(a_reg.s4567, b_lm[lm_offset + 36]); \ + acc.s5 += dot(a_reg.s4567, b_lm[lm_offset + 37]); \ + acc.s6 += dot(a_reg.s4567, b_lm[lm_offset + 38]); \ + acc.s7 += dot(a_reg.s4567, b_lm[lm_offset + 39]); \ + acc.s8 += dot(a_reg.s4567, b_lm[lm_offset + 40]); \ + acc.s9 += dot(a_reg.s4567, b_lm[lm_offset + 41]); \ + acc.sa += dot(a_reg.s4567, b_lm[lm_offset + 42]); \ + acc.sb += dot(a_reg.s4567, b_lm[lm_offset + 43]); \ + acc.sc += dot(a_reg.s4567, b_lm[lm_offset + 44]); \ + acc.sd += dot(a_reg.s4567, b_lm[lm_offset + 45]); \ + acc.se += dot(a_reg.s4567, b_lm[lm_offset + 46]); \ + acc.sf += dot(a_reg.s4567, b_lm[lm_offset + 47]); \ + c_reg.lo += convert_float8(acc.lo); \ + c_reg.hi += convert_float8(acc.hi); \ + acc.s0 = dot(a_reg.s89ab, b_lm[lm_offset + 64]); \ + acc.s1 = dot(a_reg.s89ab, b_lm[lm_offset + 65]); \ + acc.s2 = dot(a_reg.s89ab, b_lm[lm_offset + 66]); \ + acc.s3 = dot(a_reg.s89ab, b_lm[lm_offset + 67]); \ + acc.s4 = dot(a_reg.s89ab, b_lm[lm_offset + 68]); \ + acc.s5 = dot(a_reg.s89ab, b_lm[lm_offset + 69]); \ + acc.s6 = dot(a_reg.s89ab, b_lm[lm_offset + 70]); \ + acc.s7 = dot(a_reg.s89ab, b_lm[lm_offset + 71]); \ + acc.s8 = dot(a_reg.s89ab, b_lm[lm_offset + 72]); \ + acc.s9 = dot(a_reg.s89ab, b_lm[lm_offset + 73]); \ + acc.sa = dot(a_reg.s89ab, b_lm[lm_offset + 74]); \ + acc.sb = dot(a_reg.s89ab, b_lm[lm_offset + 75]); \ + acc.sc = dot(a_reg.s89ab, b_lm[lm_offset + 76]); \ + acc.sd = dot(a_reg.s89ab, b_lm[lm_offset + 77]); \ + acc.se = dot(a_reg.s89ab, b_lm[lm_offset + 78]); \ + acc.sf = dot(a_reg.s89ab, b_lm[lm_offset + 79]); \ + acc.s0 += dot(a_reg.scdef, b_lm[lm_offset + 96]); \ + acc.s1 += dot(a_reg.scdef, b_lm[lm_offset + 97]); \ + acc.s2 += dot(a_reg.scdef, b_lm[lm_offset + 98]); \ + acc.s3 += dot(a_reg.scdef, b_lm[lm_offset + 99]); \ + acc.s4 += dot(a_reg.scdef, b_lm[lm_offset + 100]); \ + acc.s5 += dot(a_reg.scdef, b_lm[lm_offset + 101]); \ + acc.s6 += dot(a_reg.scdef, b_lm[lm_offset + 102]); \ + acc.s7 += dot(a_reg.scdef, b_lm[lm_offset + 103]); \ + acc.s8 += dot(a_reg.scdef, b_lm[lm_offset + 104]); \ + acc.s9 += dot(a_reg.scdef, b_lm[lm_offset + 105]); \ + acc.sa += dot(a_reg.scdef, b_lm[lm_offset + 106]); \ + acc.sb += dot(a_reg.scdef, b_lm[lm_offset + 107]); \ + acc.sc += dot(a_reg.scdef, b_lm[lm_offset + 108]); \ + acc.sd += dot(a_reg.scdef, b_lm[lm_offset + 109]); \ + acc.se += dot(a_reg.scdef, b_lm[lm_offset + 110]); \ + acc.sf += dot(a_reg.scdef, b_lm[lm_offset + 111]); \ + c_reg.lo += convert_float8(acc.lo); \ + c_reg.hi += convert_float8(acc.hi); \ + + +__attribute__((qcom_wave_pair_mode(1))) // 1=force single 2=force pair +kernel void kernel_gemm_moe_q5_0_f32_ns( + __read_only image1d_buffer_t src0_qs, + __global uint * src0_qh, + __global half * src0_d, + __read_only image1d_buffer_t src1, + __global uint * src2, + __global ushort * src2_emap, + __write_only image1d_buffer_t dst, + __global int * total_tiles, + uint ne00, + uint ne01 +) { + uint block_id_m = get_global_id(1); // m_tile + uint block_id_n = get_global_id(2); // n_tile + + // Boundary check + if (((get_global_id(0) + block_id_m * TILESIZE_M) >= ne01) || (block_id_n >= total_tiles[0])) { + return; + } + + __private half16 reg_a; + __private float32 reg_c = (float32)(0); + __local half4 shared_b[128]; + + const ushort expert_id = src2_emap[block_id_n]; + + const uint row = block_id_m * TILESIZE_M; + const uint col = block_id_n * TILESIZE_N; + + uint sub_block_id_m = get_local_id(0); + uint2 b_global_offset; + b_global_offset.x = ((sub_block_id_m & 3) << 2) + (sub_block_id_m >> 2) * ne00; + b_global_offset.y = b_global_offset.x + (16 * ne00); + uint2 b_local_offset; + b_local_offset.x = (sub_block_id_m & 3) * 32 + (sub_block_id_m >> 2); + b_local_offset.y = b_local_offset.x + 16; + + // Loop along K axis, 32 elements (one block) for each iteration, divided into 2 sub-blocks + for (uint step = 0; step < ne00; step += TILESIZE_K * 2) { + // First sub-block + uint q_sub_offset = row + ((ne01 * step) >> 3) + ((expert_id * ne00 * ne01) >> 3); + uint s_sub_offset = row + ((ne01 * step) >> 5) + ((expert_id * ne00 * ne01) >> 5); + uint b_sub_offset = col * ne00 + step; + + // Load scale for current Q5_0 block + uint blk_offset = s_sub_offset + get_global_id(0); + half s = src0_d[blk_offset]; + + // Load 32 qh (5-th bit of each Q5) for the entire block + uchar4 qhx32 = as_uchar4(src0_qh[blk_offset]); + + // Load 16 qs (half block) in transposed layout + uint2 qsx16; + qsx16.x = read_imageui(src0_qs, q_sub_offset + sub_block_id_m).x; + qsx16.y = read_imageui(src0_qs, q_sub_offset + sub_block_id_m + ne01).x; + + // Load 16x32 floats from matrix B, each fiber out of 64 in a sub-group loads 8 elements + float8 bx8_f32; + bx8_f32.lo = read_imagef(src1, (b_sub_offset + b_global_offset.x) / 4); + bx8_f32.hi = read_imagef(src1, (b_sub_offset + b_global_offset.y) / 4); + // Convert to half and store to LM to share within the subgroup + half8 bx8_f16 = convert_half8(bx8_f32); + shared_b[b_local_offset.x] = bx8_f16.lo; + shared_b[b_local_offset.y] = bx8_f16.hi; + + // Dequantization + dequantize_q5_0(as_ushort4(qsx16), qhx32.lo, reg_a, s); + + sub_group_barrier(CLK_LOCAL_MEM_FENCE); + + // 32 16x16 fp16 dot product with 8 elements reduction for better precision + half16 acc; + dotx16_reduce8(reg_a, shared_b, reg_c.lo, 0); + dotx16_reduce8(reg_a, shared_b, reg_c.hi, 16); + + // Repeat for second sub-block + uint half_step = step + TILESIZE_K; + q_sub_offset = row + ((ne01 * half_step) >> 3) + ((expert_id * ne00 * ne01) >> 3); + b_sub_offset = col * ne00 + half_step; + + // Load next 16 qs in transposed layout + qsx16.x = read_imageui(src0_qs, q_sub_offset + sub_block_id_m).x; + qsx16.y = read_imageui(src0_qs, q_sub_offset + sub_block_id_m + ne01).x; + + // Load 16x32 floats from matrix B, each fiber out of 64 in a sub-group loads 8 elements + bx8_f32.lo = read_imagef(src1, (b_sub_offset + b_global_offset.x) / 4); + bx8_f32.hi = read_imagef(src1, (b_sub_offset + b_global_offset.y) / 4); + // Convert to half and store to LM to share within the subgroup + bx8_f16 = convert_half8(bx8_f32); + shared_b[b_local_offset.x] = bx8_f16.lo; + shared_b[b_local_offset.y] = bx8_f16.hi; + + // Dequantization + dequantize_q5_0(as_ushort4(qsx16), qhx32.hi, reg_a, s); + + sub_group_barrier(CLK_LOCAL_MEM_FENCE); + + // 32 16x16 fp16 dot product with 3-levels reduction for better precision + dotx16_reduce8(reg_a, shared_b, reg_c.lo, 0); + dotx16_reduce8(reg_a, shared_b, reg_c.hi, 16); + } + + // Load poster router and share in LM + __local uint out_idx[TILESIZE_N]; + + if (get_local_id(0) < TILESIZE_N) { + uint idx = src2[block_id_n * TILESIZE_N + get_local_id(0)]; + if (idx == 0xFFFFFFFF) { + idx = src2[block_id_n * TILESIZE_N + 0]; + } + out_idx[get_local_id(0)] = idx * ne01; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + // Scatter results back to original position in output grid + uint m_offset = row + get_local_id(0); + + write_imagef(dst, out_idx[1] + m_offset, (reg_c.s1)); + write_imagef(dst, out_idx[2] + m_offset, (reg_c.s2)); + write_imagef(dst, out_idx[3] + m_offset, (reg_c.s3)); + write_imagef(dst, out_idx[4] + m_offset, (reg_c.s4)); + write_imagef(dst, out_idx[5] + m_offset, (reg_c.s5)); + write_imagef(dst, out_idx[6] + m_offset, (reg_c.s6)); + write_imagef(dst, out_idx[7] + m_offset, (reg_c.s7)); + write_imagef(dst, out_idx[8] + m_offset, (reg_c.s8)); + write_imagef(dst, out_idx[9] + m_offset, (reg_c.s9)); + write_imagef(dst, out_idx[10] + m_offset, (reg_c.sa)); + write_imagef(dst, out_idx[11] + m_offset, (reg_c.sb)); + write_imagef(dst, out_idx[12] + m_offset, (reg_c.sc)); + write_imagef(dst, out_idx[13] + m_offset, (reg_c.sd)); + write_imagef(dst, out_idx[14] + m_offset, (reg_c.se)); + write_imagef(dst, out_idx[15] + m_offset, (reg_c.sf)); + write_imagef(dst, out_idx[16] + m_offset, (reg_c.sg)); + write_imagef(dst, out_idx[17] + m_offset, (reg_c.sh)); + write_imagef(dst, out_idx[18] + m_offset, (reg_c.si)); + write_imagef(dst, out_idx[19] + m_offset, (reg_c.sj)); + write_imagef(dst, out_idx[20] + m_offset, (reg_c.sk)); + write_imagef(dst, out_idx[21] + m_offset, (reg_c.sl)); + write_imagef(dst, out_idx[22] + m_offset, (reg_c.sm)); + write_imagef(dst, out_idx[23] + m_offset, (reg_c.sn)); + write_imagef(dst, out_idx[24] + m_offset, (reg_c.so)); + write_imagef(dst, out_idx[25] + m_offset, (reg_c.sp)); + write_imagef(dst, out_idx[26] + m_offset, (reg_c.sq)); + write_imagef(dst, out_idx[27] + m_offset, (reg_c.sr)); + write_imagef(dst, out_idx[28] + m_offset, (reg_c.ss)); + write_imagef(dst, out_idx[29] + m_offset, (reg_c.st)); + write_imagef(dst, out_idx[30] + m_offset, (reg_c.su)); + write_imagef(dst, out_idx[31] + m_offset, (reg_c.sv)); + + // Store zero padding parts to the index of first output in tile, override correct result in the end + barrier(CLK_GLOBAL_MEM_FENCE); + write_imagef(dst, out_idx[0] + m_offset, (reg_c.s0)); +} diff --git a/ggml/src/ggml-opencl/kernels/gemm_moe_q5_1_f32_ns.cl b/ggml/src/ggml-opencl/kernels/gemm_moe_q5_1_f32_ns.cl new file mode 100644 index 00000000000..5fc2a523234 --- /dev/null +++ b/ggml/src/ggml-opencl/kernels/gemm_moe_q5_1_f32_ns.cl @@ -0,0 +1,258 @@ +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#pragma OPENCL EXTENSION cl_khr_subgroups : enable +#pragma OPENCL EXTENSION cl_qcom_subgroup_uniform_load: enable +#pragma OPENCL EXTENSION cl_qcom_subgroup_constant_load: enable +#pragma OPENCL EXTENSION cl_qcom_extra_vector_types : enable + +#define TILESIZE_K 16 +#define TILESIZE_M 64 +#define TILESIZE_N 32 + + +#define dequantize_q5_1(qs5x16, qh5x16, a_f16, scale, m) \ + a_f16.s0 = (half)((( qs5x16.s0 & 0x000F) | (( qh5x16.s0 & 0x01) << 4)) * scale + m); \ + a_f16.s1 = (half)((((qs5x16.s0 & 0x00F0) >> 4 ) | (((qh5x16.s0 >> 1) & 0x01) << 4)) * scale + m); \ + a_f16.s2 = (half)((((qs5x16.s0 & 0x0F00) >> 8 ) | (((qh5x16.s0 >> 2) & 0x01) << 4)) * scale + m); \ + a_f16.s3 = (half)((((qs5x16.s0 & 0xF000) >> 12) | (((qh5x16.s0 >> 3) & 0x01) << 4)) * scale + m); \ + a_f16.s4 = (half)((( qs5x16.s1 & 0x000F) | (((qh5x16.s0 >> 4) & 0x01) << 4)) * scale + m); \ + a_f16.s5 = (half)((((qs5x16.s1 & 0x00F0) >> 4 ) | (((qh5x16.s0 >> 5) & 0x01) << 4)) * scale + m); \ + a_f16.s6 = (half)((((qs5x16.s1 & 0x0F00) >> 8 ) | (((qh5x16.s0 >> 6) & 0x01) << 4)) * scale + m); \ + a_f16.s7 = (half)((((qs5x16.s1 & 0xF000) >> 12) | (((qh5x16.s0 >> 7) & 0x01) << 4)) * scale + m); \ + a_f16.s8 = (half)((( qs5x16.s2 & 0x000F) | (( qh5x16.s1 & 0x01) << 4)) * scale + m); \ + a_f16.s9 = (half)((((qs5x16.s2 & 0x00F0) >> 4 ) | (((qh5x16.s1 >> 1) & 0x01) << 4)) * scale + m); \ + a_f16.sa = (half)((((qs5x16.s2 & 0x0F00) >> 8 ) | (((qh5x16.s1 >> 2) & 0x01) << 4)) * scale + m); \ + a_f16.sb = (half)((((qs5x16.s2 & 0xF000) >> 12) | (((qh5x16.s1 >> 3) & 0x01) << 4)) * scale + m); \ + a_f16.sc = (half)((( qs5x16.s3 & 0x000F) | (((qh5x16.s1 >> 4) & 0x01) << 4)) * scale + m); \ + a_f16.sd = (half)((((qs5x16.s3 & 0x00F0) >> 4 ) | (((qh5x16.s1 >> 5) & 0x01) << 4)) * scale + m); \ + a_f16.se = (half)((((qs5x16.s3 & 0x0F00) >> 8 ) | (((qh5x16.s1 >> 6) & 0x01) << 4)) * scale + m); \ + a_f16.sf = (half)((((qs5x16.s3 & 0xF000) >> 12) | (((qh5x16.s1 >> 7) & 0x01) << 4)) * scale + m); \ + + +#define dotx16_reduce8(a_reg, b_lm, c_reg, lm_offset) \ + acc.s0 = dot(a_reg.s0123, b_lm[lm_offset + 0]); \ + acc.s1 = dot(a_reg.s0123, b_lm[lm_offset + 1]); \ + acc.s2 = dot(a_reg.s0123, b_lm[lm_offset + 2]); \ + acc.s3 = dot(a_reg.s0123, b_lm[lm_offset + 3]); \ + acc.s4 = dot(a_reg.s0123, b_lm[lm_offset + 4]); \ + acc.s5 = dot(a_reg.s0123, b_lm[lm_offset + 5]); \ + acc.s6 = dot(a_reg.s0123, b_lm[lm_offset + 6]); \ + acc.s7 = dot(a_reg.s0123, b_lm[lm_offset + 7]); \ + acc.s8 = dot(a_reg.s0123, b_lm[lm_offset + 8]); \ + acc.s9 = dot(a_reg.s0123, b_lm[lm_offset + 9]); \ + acc.sa = dot(a_reg.s0123, b_lm[lm_offset + 10]); \ + acc.sb = dot(a_reg.s0123, b_lm[lm_offset + 11]); \ + acc.sc = dot(a_reg.s0123, b_lm[lm_offset + 12]); \ + acc.sd = dot(a_reg.s0123, b_lm[lm_offset + 13]); \ + acc.se = dot(a_reg.s0123, b_lm[lm_offset + 14]); \ + acc.sf = dot(a_reg.s0123, b_lm[lm_offset + 15]); \ + acc.s0 += dot(a_reg.s4567, b_lm[lm_offset + 32]); \ + acc.s1 += dot(a_reg.s4567, b_lm[lm_offset + 33]); \ + acc.s2 += dot(a_reg.s4567, b_lm[lm_offset + 34]); \ + acc.s3 += dot(a_reg.s4567, b_lm[lm_offset + 35]); \ + acc.s4 += dot(a_reg.s4567, b_lm[lm_offset + 36]); \ + acc.s5 += dot(a_reg.s4567, b_lm[lm_offset + 37]); \ + acc.s6 += dot(a_reg.s4567, b_lm[lm_offset + 38]); \ + acc.s7 += dot(a_reg.s4567, b_lm[lm_offset + 39]); \ + acc.s8 += dot(a_reg.s4567, b_lm[lm_offset + 40]); \ + acc.s9 += dot(a_reg.s4567, b_lm[lm_offset + 41]); \ + acc.sa += dot(a_reg.s4567, b_lm[lm_offset + 42]); \ + acc.sb += dot(a_reg.s4567, b_lm[lm_offset + 43]); \ + acc.sc += dot(a_reg.s4567, b_lm[lm_offset + 44]); \ + acc.sd += dot(a_reg.s4567, b_lm[lm_offset + 45]); \ + acc.se += dot(a_reg.s4567, b_lm[lm_offset + 46]); \ + acc.sf += dot(a_reg.s4567, b_lm[lm_offset + 47]); \ + c_reg.lo += convert_float8(acc.lo); \ + c_reg.hi += convert_float8(acc.hi); \ + acc.s0 = dot(a_reg.s89ab, b_lm[lm_offset + 64]); \ + acc.s1 = dot(a_reg.s89ab, b_lm[lm_offset + 65]); \ + acc.s2 = dot(a_reg.s89ab, b_lm[lm_offset + 66]); \ + acc.s3 = dot(a_reg.s89ab, b_lm[lm_offset + 67]); \ + acc.s4 = dot(a_reg.s89ab, b_lm[lm_offset + 68]); \ + acc.s5 = dot(a_reg.s89ab, b_lm[lm_offset + 69]); \ + acc.s6 = dot(a_reg.s89ab, b_lm[lm_offset + 70]); \ + acc.s7 = dot(a_reg.s89ab, b_lm[lm_offset + 71]); \ + acc.s8 = dot(a_reg.s89ab, b_lm[lm_offset + 72]); \ + acc.s9 = dot(a_reg.s89ab, b_lm[lm_offset + 73]); \ + acc.sa = dot(a_reg.s89ab, b_lm[lm_offset + 74]); \ + acc.sb = dot(a_reg.s89ab, b_lm[lm_offset + 75]); \ + acc.sc = dot(a_reg.s89ab, b_lm[lm_offset + 76]); \ + acc.sd = dot(a_reg.s89ab, b_lm[lm_offset + 77]); \ + acc.se = dot(a_reg.s89ab, b_lm[lm_offset + 78]); \ + acc.sf = dot(a_reg.s89ab, b_lm[lm_offset + 79]); \ + acc.s0 += dot(a_reg.scdef, b_lm[lm_offset + 96]); \ + acc.s1 += dot(a_reg.scdef, b_lm[lm_offset + 97]); \ + acc.s2 += dot(a_reg.scdef, b_lm[lm_offset + 98]); \ + acc.s3 += dot(a_reg.scdef, b_lm[lm_offset + 99]); \ + acc.s4 += dot(a_reg.scdef, b_lm[lm_offset + 100]); \ + acc.s5 += dot(a_reg.scdef, b_lm[lm_offset + 101]); \ + acc.s6 += dot(a_reg.scdef, b_lm[lm_offset + 102]); \ + acc.s7 += dot(a_reg.scdef, b_lm[lm_offset + 103]); \ + acc.s8 += dot(a_reg.scdef, b_lm[lm_offset + 104]); \ + acc.s9 += dot(a_reg.scdef, b_lm[lm_offset + 105]); \ + acc.sa += dot(a_reg.scdef, b_lm[lm_offset + 106]); \ + acc.sb += dot(a_reg.scdef, b_lm[lm_offset + 107]); \ + acc.sc += dot(a_reg.scdef, b_lm[lm_offset + 108]); \ + acc.sd += dot(a_reg.scdef, b_lm[lm_offset + 109]); \ + acc.se += dot(a_reg.scdef, b_lm[lm_offset + 110]); \ + acc.sf += dot(a_reg.scdef, b_lm[lm_offset + 111]); \ + c_reg.lo += convert_float8(acc.lo); \ + c_reg.hi += convert_float8(acc.hi); \ + + +__attribute__((qcom_wave_pair_mode(1))) // 1=force single 2=force pair +kernel void kernel_gemm_moe_q5_1_f32_ns( + __read_only image1d_buffer_t src0_qs, + __global uint * src0_qh, + __global half * src0_d, + __global half * src0_m, + __read_only image1d_buffer_t src1, + __global uint * src2, + __global ushort * src2_emap, + __write_only image1d_buffer_t dst, + __global int * total_tiles, + uint ne00, + uint ne01 +) { + uint block_id_m = get_global_id(1); // m_tile + uint block_id_n = get_global_id(2); // n_tile + + // Boundary check + if (((get_global_id(0) + block_id_m * TILESIZE_M) >= ne01) || (block_id_n >= total_tiles[0])) { + return; + } + + __private half16 reg_a; + __private float32 reg_c = (float32)(0); + __local half4 shared_b[128]; + + const ushort expert_id = src2_emap[block_id_n]; + + const uint row = block_id_m * TILESIZE_M; + const uint col = block_id_n * TILESIZE_N; + + uint sub_block_id_m = get_local_id(0); + uint2 b_global_offset; + b_global_offset.x = ((sub_block_id_m & 3) << 2) + (sub_block_id_m >> 2) * ne00; + b_global_offset.y = b_global_offset.x + (16 * ne00); + uint2 b_local_offset; + b_local_offset.x = (sub_block_id_m & 3) * 32 + (sub_block_id_m >> 2); + b_local_offset.y = b_local_offset.x + 16; + + // Loop along K axis, 32 elements (one block) for each iteration, divided into 2 sub-blocks + for (uint step = 0; step < ne00; step += TILESIZE_K * 2) { + // First sub-block + uint q_sub_offset = row + ((ne01 * step) >> 3) + ((expert_id * ne00 * ne01) >> 3); + uint s_sub_offset = row + ((ne01 * step) >> 5) + ((expert_id * ne00 * ne01) >> 5); + uint b_sub_offset = col * ne00 + step; + + // Load scale and m for current Q5_1 block + uint blk_offset = s_sub_offset + get_global_id(0); + half s = src0_d[blk_offset]; + half m = src0_m[blk_offset]; + + // Load 32 qh (5-th bit of each Q5) for the entire block + uchar4 qhx32 = as_uchar4(src0_qh[blk_offset]); + + // Load 16 qs (half block) in transposed layout + uint2 qsx16; + qsx16.x = read_imageui(src0_qs, q_sub_offset + sub_block_id_m).x; + qsx16.y = read_imageui(src0_qs, q_sub_offset + sub_block_id_m + ne01).x; + + // Load 16x32 floats from matrix B, each fiber out of 64 in a sub-group loads 8 elements + float8 bx8_f32; + bx8_f32.lo = read_imagef(src1, (b_sub_offset + b_global_offset.x) / 4); + bx8_f32.hi = read_imagef(src1, (b_sub_offset + b_global_offset.y) / 4); + // Convert to half and store to LM to share within the subgroup + half8 bx8_f16 = convert_half8(bx8_f32); + shared_b[b_local_offset.x] = bx8_f16.lo; + shared_b[b_local_offset.y] = bx8_f16.hi; + + // Dequantization + dequantize_q5_1(as_ushort4(qsx16), qhx32.lo, reg_a, s, m); + + sub_group_barrier(CLK_LOCAL_MEM_FENCE); + + // 32 16x16 fp16 dot product with 8 elements reduction for better precision + half16 acc; + dotx16_reduce8(reg_a, shared_b, reg_c.lo, 0); + dotx16_reduce8(reg_a, shared_b, reg_c.hi, 16); + + // Repeat for second sub-block + uint half_step = step + TILESIZE_K; + q_sub_offset = row + ((ne01 * half_step) >> 3) + ((expert_id * ne00 * ne01) >> 3); + b_sub_offset = col * ne00 + half_step; + + // Load next 16 qs in transposed layout + qsx16.x = read_imageui(src0_qs, q_sub_offset + sub_block_id_m).x; + qsx16.y = read_imageui(src0_qs, q_sub_offset + sub_block_id_m + ne01).x; + + // Load 16x32 floats from matrix B, each fiber out of 64 in a sub-group loads 8 elements + bx8_f32.lo = read_imagef(src1, (b_sub_offset + b_global_offset.x) / 4); + bx8_f32.hi = read_imagef(src1, (b_sub_offset + b_global_offset.y) / 4); + // Convert to half and store to LM to share within the subgroup + bx8_f16 = convert_half8(bx8_f32); + shared_b[b_local_offset.x] = bx8_f16.lo; + shared_b[b_local_offset.y] = bx8_f16.hi; + + // Dequantization + dequantize_q5_1(as_ushort4(qsx16), qhx32.hi, reg_a, s, m); + + sub_group_barrier(CLK_LOCAL_MEM_FENCE); + + // 32 16x16 fp16 dot product with 3-levels reduction for better precision + dotx16_reduce8(reg_a, shared_b, reg_c.lo, 0); + dotx16_reduce8(reg_a, shared_b, reg_c.hi, 16); + } + + // Load poster router and share in LM + __local uint out_idx[TILESIZE_N]; + + if (get_local_id(0) < TILESIZE_N) { + uint idx = src2[block_id_n * TILESIZE_N + get_local_id(0)]; + if (idx == 0xFFFFFFFF) { + idx = src2[block_id_n * TILESIZE_N + 0]; + } + out_idx[get_local_id(0)] = idx * ne01; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + // Scatter results back to original position in output grid + uint m_offset = row + get_local_id(0); + + write_imagef(dst, out_idx[1] + m_offset, (reg_c.s1)); + write_imagef(dst, out_idx[2] + m_offset, (reg_c.s2)); + write_imagef(dst, out_idx[3] + m_offset, (reg_c.s3)); + write_imagef(dst, out_idx[4] + m_offset, (reg_c.s4)); + write_imagef(dst, out_idx[5] + m_offset, (reg_c.s5)); + write_imagef(dst, out_idx[6] + m_offset, (reg_c.s6)); + write_imagef(dst, out_idx[7] + m_offset, (reg_c.s7)); + write_imagef(dst, out_idx[8] + m_offset, (reg_c.s8)); + write_imagef(dst, out_idx[9] + m_offset, (reg_c.s9)); + write_imagef(dst, out_idx[10] + m_offset, (reg_c.sa)); + write_imagef(dst, out_idx[11] + m_offset, (reg_c.sb)); + write_imagef(dst, out_idx[12] + m_offset, (reg_c.sc)); + write_imagef(dst, out_idx[13] + m_offset, (reg_c.sd)); + write_imagef(dst, out_idx[14] + m_offset, (reg_c.se)); + write_imagef(dst, out_idx[15] + m_offset, (reg_c.sf)); + write_imagef(dst, out_idx[16] + m_offset, (reg_c.sg)); + write_imagef(dst, out_idx[17] + m_offset, (reg_c.sh)); + write_imagef(dst, out_idx[18] + m_offset, (reg_c.si)); + write_imagef(dst, out_idx[19] + m_offset, (reg_c.sj)); + write_imagef(dst, out_idx[20] + m_offset, (reg_c.sk)); + write_imagef(dst, out_idx[21] + m_offset, (reg_c.sl)); + write_imagef(dst, out_idx[22] + m_offset, (reg_c.sm)); + write_imagef(dst, out_idx[23] + m_offset, (reg_c.sn)); + write_imagef(dst, out_idx[24] + m_offset, (reg_c.so)); + write_imagef(dst, out_idx[25] + m_offset, (reg_c.sp)); + write_imagef(dst, out_idx[26] + m_offset, (reg_c.sq)); + write_imagef(dst, out_idx[27] + m_offset, (reg_c.sr)); + write_imagef(dst, out_idx[28] + m_offset, (reg_c.ss)); + write_imagef(dst, out_idx[29] + m_offset, (reg_c.st)); + write_imagef(dst, out_idx[30] + m_offset, (reg_c.su)); + write_imagef(dst, out_idx[31] + m_offset, (reg_c.sv)); + + // Store zero padding parts to the index of first output in tile, override correct result in the end + barrier(CLK_GLOBAL_MEM_FENCE); + write_imagef(dst, out_idx[0] + m_offset, (reg_c.s0)); +} diff --git a/ggml/src/ggml-opencl/kernels/gemv_moe_q5_0_f32_ns.cl b/ggml/src/ggml-opencl/kernels/gemv_moe_q5_0_f32_ns.cl new file mode 100644 index 00000000000..938054cf982 --- /dev/null +++ b/ggml/src/ggml-opencl/kernels/gemv_moe_q5_0_f32_ns.cl @@ -0,0 +1,119 @@ +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#pragma OPENCL EXTENSION cl_khr_subgroups : enable +#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable + +#define QK_Q5_0 32 +#define N_SIMDGROUP 4 +#define SIMDGROUP_WIDTH 64 + +static inline float8 q5_0_to_fp32_packed8(ushort2 qs5x8, uchar qh5x8) { + float8 fp32x8; + fp32x8.s0 = (float)((( qs5x8.s0 & 0x000F) | (( qh5x8 & 0x01) << 4)) - 16); + fp32x8.s1 = (float)((((qs5x8.s0 & 0x00F0) >> 4 ) | (((qh5x8 >> 1) & 0x01) << 4)) - 16); + fp32x8.s2 = (float)((((qs5x8.s0 & 0x0F00) >> 8 ) | (((qh5x8 >> 2) & 0x01) << 4)) - 16); + fp32x8.s3 = (float)((((qs5x8.s0 & 0xF000) >> 12) | (((qh5x8 >> 3) & 0x01) << 4)) - 16); + fp32x8.s4 = (float)((( qs5x8.s1 & 0x000F) | (((qh5x8 >> 4) & 0x01) << 4)) - 16); + fp32x8.s5 = (float)((((qs5x8.s1 & 0x00F0) >> 4 ) | (((qh5x8 >> 5) & 0x01) << 4)) - 16); + fp32x8.s6 = (float)((((qs5x8.s1 & 0x0F00) >> 8 ) | (((qh5x8 >> 6) & 0x01) << 4)) - 16); + fp32x8.s7 = (float)((((qs5x8.s1 & 0xF000) >> 12) | (((qh5x8 >> 7) & 0x01) << 4)) - 16); + return fp32x8; +} + + +__attribute__((qcom_reqd_sub_group_size("half"))) +__kernel void kernel_gemv_moe_q5_0_f32_ns( + __global uint * src0_qs, + __global uint * src0_qh, + __global half * src0_d, + __read_only image1d_buffer_t src1, + __global uint * src2, + __global float * dst, + ulong offsetd, + uint ne00, + uint ne01, + uint ne11 +) { + uint i01 = get_global_id(0); + uint i20 = get_global_id(2); + uint sgid = get_local_id(1); + uint slid = get_sub_group_local_id(); + + uint i11 = i20 % ne11; + + uint expert_id = src2[i20]; + uint expert_offset = expert_id * ne00 * ne01 / 32; + + __private float sum = 0.0f; // each thread calculate partial sum of one output + + // loop along ne00 in block granularity, skip 4 blocks every iter + for (uint ib00 = sgid; ib00 < (ne00 / QK_Q5_0); ib00 += N_SIMDGROUP) { + + // load one block of q + uint4 regQ; + uint block_offset = expert_offset * 4 + ib00 * ne01 * 4 + i01; + + regQ.s0 = src0_qs[block_offset]; + regQ.s1 = src0_qs[block_offset + ne01]; + regQ.s2 = src0_qs[block_offset + ne01 * 2]; + regQ.s3 = src0_qs[block_offset + ne01 * 3]; + + uint offset = i11 * ne00 / 4 + ib00 * 8; + + uchar4 regQh = as_uchar4(src0_qh[ib00 * ne01 + i01 + expert_offset]); + half regS = src0_d[ib00 * ne01 + i01 + expert_offset]; + + float8 fp32x8 = q5_0_to_fp32_packed8(as_ushort2(regQ.s0), regQh.s0); + + float4 shared_y4; + shared_y4 = read_imagef(src1, (offset + 0)); + float4 acc = shared_y4 * fp32x8.lo; + + shared_y4 = read_imagef(src1, (offset + 1)); + acc += shared_y4 * fp32x8.hi; + + fp32x8 = q5_0_to_fp32_packed8(as_ushort2(regQ.s1), regQh.s1); + + shared_y4 = read_imagef(src1, (offset + 2)); + acc += shared_y4 * fp32x8.lo; + + shared_y4 = read_imagef(src1, (offset + 3)); + acc += shared_y4 * fp32x8.hi; + + + fp32x8 = q5_0_to_fp32_packed8(as_ushort2(regQ.s2), regQh.s2); + + shared_y4 = read_imagef(src1, (offset + 4)); + acc += shared_y4 * fp32x8.lo; + + shared_y4 = read_imagef(src1, (offset + 5)); + acc += shared_y4 * fp32x8.hi; + + + fp32x8 = q5_0_to_fp32_packed8(as_ushort2(regQ.s3), regQh.s3); + + shared_y4 = read_imagef(src1, (offset + 6)); + acc += shared_y4 * fp32x8.lo; + + shared_y4 = read_imagef(src1, (offset + 7)); + acc += shared_y4 * fp32x8.hi; + + sum += (float)(regS) * ((acc.s0 + acc.s1) + (acc.s2 + acc.s3)); + } + + // reduction in local memory, assumes #subgroups=4 + __local float reduceLM[SIMDGROUP_WIDTH * (N_SIMDGROUP - 1)]; + if (sgid == 1) reduceLM[SIMDGROUP_WIDTH * 0 + slid] = sum; + if (sgid == 2) reduceLM[SIMDGROUP_WIDTH * 1 + slid] = sum; + if (sgid == 3) reduceLM[SIMDGROUP_WIDTH * 2 + slid] = sum; + barrier(CLK_LOCAL_MEM_FENCE); + if (sgid == 0) sum += reduceLM[SIMDGROUP_WIDTH * 0 + slid]; + if (sgid == 0) sum += reduceLM[SIMDGROUP_WIDTH * 1 + slid]; + if (sgid == 0) sum += reduceLM[SIMDGROUP_WIDTH * 2 + slid]; + + // 1 outputs per thread in subgroup 0 + if (sgid == 0) { + dst = dst + (offsetd >> 2); + dst[i01 + i20 * ne01] = sum; + } + +} diff --git a/ggml/src/ggml-opencl/kernels/gemv_moe_q5_1_f32_ns.cl b/ggml/src/ggml-opencl/kernels/gemv_moe_q5_1_f32_ns.cl new file mode 100644 index 00000000000..f33a4ef2757 --- /dev/null +++ b/ggml/src/ggml-opencl/kernels/gemv_moe_q5_1_f32_ns.cl @@ -0,0 +1,121 @@ +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#pragma OPENCL EXTENSION cl_khr_subgroups : enable +#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable + +#define QK_Q5_1 32 +#define N_SIMDGROUP 4 +#define SIMDGROUP_WIDTH 64 + +static inline float8 q5_1_to_fp32_packed8(ushort2 qs5x8, uchar qh5x8, half s, half m) { + float8 fp32x8; + fp32x8.s0 = (float)((( qs5x8.s0 & 0x000F) | (( qh5x8 & 0x01) << 4)) * s + m); + fp32x8.s1 = (float)((((qs5x8.s0 & 0x00F0) >> 4 ) | (((qh5x8 >> 1) & 0x01) << 4)) * s + m); + fp32x8.s2 = (float)((((qs5x8.s0 & 0x0F00) >> 8 ) | (((qh5x8 >> 2) & 0x01) << 4)) * s + m); + fp32x8.s3 = (float)((((qs5x8.s0 & 0xF000) >> 12) | (((qh5x8 >> 3) & 0x01) << 4)) * s + m); + fp32x8.s4 = (float)((( qs5x8.s1 & 0x000F) | (((qh5x8 >> 4) & 0x01) << 4)) * s + m); + fp32x8.s5 = (float)((((qs5x8.s1 & 0x00F0) >> 4 ) | (((qh5x8 >> 5) & 0x01) << 4)) * s + m); + fp32x8.s6 = (float)((((qs5x8.s1 & 0x0F00) >> 8 ) | (((qh5x8 >> 6) & 0x01) << 4)) * s + m); + fp32x8.s7 = (float)((((qs5x8.s1 & 0xF000) >> 12) | (((qh5x8 >> 7) & 0x01) << 4)) * s + m); + return fp32x8; +} + + +__attribute__((qcom_reqd_sub_group_size("half"))) +__kernel void kernel_gemv_moe_q5_1_f32_ns( + __global uint * src0_qs, + __global uint * src0_qh, + __global half * src0_d, + __global half * src0_m, + __read_only image1d_buffer_t src1, + __global uint * src2, + __global float * dst, + ulong offsetd, + uint ne00, + uint ne01, + uint ne11 +) { + uint i01 = get_global_id(0); + uint i20 = get_global_id(2); + uint sgid = get_local_id(1); + uint slid = get_sub_group_local_id(); + + uint i11 = i20 % ne11; + + uint expert_id = src2[i20]; + uint expert_offset = expert_id * ne00 * ne01 / 32; + + __private float sum = 0.0f; // each thread calculate partial sum of one output + + // loop along ne00 in block granularity, skip 4 blocks every iter + for (uint ib00 = sgid; ib00 < (ne00 / QK_Q5_1); ib00 += N_SIMDGROUP) { + + // load one block of q + uint4 regQ; + uint block_offset = expert_offset * 4 + ib00 * ne01 * 4 + i01; + + regQ.s0 = src0_qs[block_offset]; + regQ.s1 = src0_qs[block_offset + ne01]; + regQ.s2 = src0_qs[block_offset + ne01 * 2]; + regQ.s3 = src0_qs[block_offset + ne01 * 3]; + + uint offset = i11 * ne00 / 4 + ib00 * 8; + + uchar4 regQh = as_uchar4(src0_qh[ib00 * ne01 + i01 + expert_offset]); + half regM = src0_m[ib00 * ne01 + i01 + expert_offset]; + half regS = src0_d[ib00 * ne01 + i01 + expert_offset]; + + float8 fp32x8 = q5_1_to_fp32_packed8(as_ushort2(regQ.s0), regQh.s0, regS, regM); + + float4 shared_y4; + shared_y4 = read_imagef(src1, (offset + 0)); + float4 acc = shared_y4 * fp32x8.lo; + + shared_y4 = read_imagef(src1, (offset + 1)); + acc += shared_y4 * fp32x8.hi; + + fp32x8 = q5_1_to_fp32_packed8(as_ushort2(regQ.s1), regQh.s1, regS, regM); + + shared_y4 = read_imagef(src1, (offset + 2)); + acc += shared_y4 * fp32x8.lo; + + shared_y4 = read_imagef(src1, (offset + 3)); + acc += shared_y4 * fp32x8.hi; + + + fp32x8 = q5_1_to_fp32_packed8(as_ushort2(regQ.s2), regQh.s2, regS, regM); + + shared_y4 = read_imagef(src1, (offset + 4)); + acc += shared_y4 * fp32x8.lo; + + shared_y4 = read_imagef(src1, (offset + 5)); + acc += shared_y4 * fp32x8.hi; + + + fp32x8 = q5_1_to_fp32_packed8(as_ushort2(regQ.s3), regQh.s3, regS, regM); + + shared_y4 = read_imagef(src1, (offset + 6)); + acc += shared_y4 * fp32x8.lo; + + shared_y4 = read_imagef(src1, (offset + 7)); + acc += shared_y4 * fp32x8.hi; + + sum += ((acc.s0 + acc.s1) + (acc.s2 + acc.s3)); + } + + // reduction in local memory, assumes #subgroups=4 + __local float reduceLM[SIMDGROUP_WIDTH * (N_SIMDGROUP - 1)]; + if (sgid == 1) reduceLM[SIMDGROUP_WIDTH * 0 + slid] = sum; + if (sgid == 2) reduceLM[SIMDGROUP_WIDTH * 1 + slid] = sum; + if (sgid == 3) reduceLM[SIMDGROUP_WIDTH * 2 + slid] = sum; + barrier(CLK_LOCAL_MEM_FENCE); + if (sgid == 0) sum += reduceLM[SIMDGROUP_WIDTH * 0 + slid]; + if (sgid == 0) sum += reduceLM[SIMDGROUP_WIDTH * 1 + slid]; + if (sgid == 0) sum += reduceLM[SIMDGROUP_WIDTH * 2 + slid]; + + // 1 outputs per thread in subgroup 0 + if (sgid == 0) { + dst = dst + (offsetd >> 2); + dst[i01 + i20 * ne01] = sum; + } + +} diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index b24101c78b0..401c75c1230 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -3148,6 +3148,16 @@ static ggml_status ggml_backend_webgpu_graph_compute(ggml_backend_t backend, str } ctx->param_arena.reset(); commands.clear(); +#ifdef GGML_WEBGPU_GPU_PROFILE + // flush before the next batch can overflow the QuerySet + if (ctx->profile_timestamp_query_count + 2 * ctx->global_ctx->command_submit_batch_size >= + WEBGPU_MAX_PROFILE_QUERY_COUNT) { + ggml_backend_webgpu_collect_profile_results(ctx, profile_pipeline_names, num_inflight_batches); + // reset profile timestamp state + ctx->profile_timestamp_query_count = 0; + profile_pipeline_names.clear(); + } +#endif } node_idx += num_encoded_ops; diff --git a/tools/server/public/bundle.js b/tools/server/public/bundle.js index 5a42071e5f2..7258e6bea4a 100644 --- a/tools/server/public/bundle.js +++ b/tools/server/public/bundle.js @@ -6357,20 +6357,20 @@ ChatAttachmentsPreviewCurrentItemPdf($$anchor4,{get currentItem(){return $$props ChatAttachmentsPreviewCurrentItemText($$anchor4,{get displayTextContent(){return $$props.displayTextContent},get language(){return $$props.language}})},consequent_3=$$anchor4=>{ChatAttachmentsPreviewCurrentItemAudio($$anchor4,{get currentItem(){return $$props.currentItem},get audioSrc(){return $$props.audioSrc}})},consequent_4=$$anchor4=>{ChatAttachmentsPreviewCurrentItemUnavailable($$anchor4,{get IconComponent(){return get$3(IconComponent)}})};if_block(node_2,$$render=>{$$props.isPdf?$$render(consequent): $$props.isImage?$$render(consequent_1,1):$$props.isText&&$$props.displayTextContent?$$render(consequent_2,2):$$props.isAudio?$$render(consequent_3,3):get$3(isUnavailable)&&$$render(consequent_4,4)})}append($$anchor3,fragment_2)}),append($$anchor2,fragment_1)};if_block(node2,$$render=>{$$props.currentItem&&$$render(consequent_5)})}append($$anchor,fragment),pop()}class ChatService{static async generateTitle(message,model,signal){let titleResponse="";try{await ChatService.sendMessage([message],{model:model|| void 0,stream:!0,custom:{chat_template_kwargs:{enable_thinking:!1}},onChunk:chunk=>{titleResponse+=chunk}},void 0,signal)}catch{return""}return titleResponse}static async sendMessage(messages,options={},conversationId,signal){const{stream,onChunk,onComplete,onError,onReasoningChunk,onToolCallChunk,onModel,onTimings,tools,temperature,max_tokens,dynatemp_range,dynatemp_exponent,top_k,top_p,min_p,xtc_probability,xtc_threshold,typ_p,repeat_last_n,repeat_penalty,presence_penalty,frequency_penalty,dry_multiplier, -dry_base,dry_allowed_length,dry_penalty_last_n,samplers,backend_sampling,custom:custom2,timings_per_token,disableReasoningParsing,excludeReasoningFromContext}=options,normalizedMessages=messages.map(msg=>{if("id"in msg&&"convId"in msg&&"timestamp"in msg){const dbMsg=msg;return ChatService.convertDbMessageToApiChatMessageData(dbMsg)}else return msg}).filter(msg=>msg.role===MessageRole.SYSTEM?(typeof msg.content=="string"?msg.content:"").trim().length>0:!0);options.model&&!modelsStore.modelSupportsVision( -options.model)&&normalizedMessages.forEach(msg=>{Array.isArray(msg.content)&&(msg.content=msg.content.filter(part=>part.type===ContentPartType.IMAGE_URL?(console.info(`[ChatService] Skipping image attachment in message history (model "${options.model}" does not support vision)`),!1):!0),msg.content.length===1&&msg.content[0].type===ContentPartType.TEXT&&typeof msg.content[0].text=="string"&&(msg.content=msg.content[0].text))});const requestBody={messages:normalizedMessages.map(msg=>{const mapped={ -role:msg.role,content:msg.content,tool_calls:msg.tool_calls,tool_call_id:msg.tool_call_id};return!excludeReasoningFromContext&&msg.reasoning_content&&(mapped.reasoning_content=msg.reasoning_content),mapped}),stream,return_progress:stream?!0:void 0,tools:tools&&tools.length>0?tools:void 0};if(options.model&&(requestBody.model=options.model),requestBody.reasoning_format=disableReasoningParsing?ReasoningFormat.NONE:ReasoningFormat.AUTO,temperature!==void 0&&(requestBody.temperature=temperature),max_tokens!== -void 0&&(requestBody.max_tokens=max_tokens!==null&&max_tokens!==0?max_tokens:-1),dynatemp_range!==void 0&&(requestBody.dynatemp_range=dynatemp_range),dynatemp_exponent!==void 0&&(requestBody.dynatemp_exponent=dynatemp_exponent),top_k!==void 0&&(requestBody.top_k=top_k),top_p!==void 0&&(requestBody.top_p=top_p),min_p!==void 0&&(requestBody.min_p=min_p),xtc_probability!==void 0&&(requestBody.xtc_probability=xtc_probability),xtc_threshold!==void 0&&(requestBody.xtc_threshold=xtc_threshold),typ_p!== -void 0&&(requestBody.typ_p=typ_p),repeat_last_n!==void 0&&(requestBody.repeat_last_n=repeat_last_n),repeat_penalty!==void 0&&(requestBody.repeat_penalty=repeat_penalty),presence_penalty!==void 0&&(requestBody.presence_penalty=presence_penalty),frequency_penalty!==void 0&&(requestBody.frequency_penalty=frequency_penalty),dry_multiplier!==void 0&&(requestBody.dry_multiplier=dry_multiplier),dry_base!==void 0&&(requestBody.dry_base=dry_base),dry_allowed_length!==void 0&&(requestBody.dry_allowed_length= -dry_allowed_length),dry_penalty_last_n!==void 0&&(requestBody.dry_penalty_last_n=dry_penalty_last_n),samplers!==void 0&&(requestBody.samplers=typeof samplers=="string"?samplers.split(";").filter(s2=>s2.trim()):samplers),backend_sampling!==void 0&&(requestBody.backend_sampling=backend_sampling),timings_per_token!==void 0&&(requestBody.timings_per_token=timings_per_token),custom2)try{const customParams=typeof custom2=="string"?JSON.parse(custom2):custom2;Object.assign(requestBody,customParams)}catch(error2){ -console.warn("Failed to parse custom parameters:",error2)}try{const response=await fetch("./v1/chat/completions",{method:"POST",headers:getJsonHeaders(),body:JSON.stringify(requestBody),signal});if(!response.ok){const error2=await ChatService.parseErrorResponse(response);throw onError&&onError(error2),error2}if(stream){await ChatService.handleStreamResponse(response,onChunk,onComplete,onError,onReasoningChunk,onToolCallChunk,onModel,onTimings,conversationId,signal);return}else return ChatService. -handleNonStreamResponse(response,onComplete,onError,onToolCallChunk,onModel)}catch(error2){if(isAbortError(error2)){console.log("Chat completion request was aborted");return}let userFriendlyError;throw error2 instanceof Error?error2.name==="TypeError"&&error2.message.includes("fetch")?(userFriendlyError=new Error("Unable to connect to server - please check if the server is running"),userFriendlyError.name="NetworkError"):error2.message.includes("ECONNREFUSED")?(userFriendlyError=new Error("Conne\ -ction refused - server may be offline"),userFriendlyError.name="NetworkError"):error2.message.includes("ETIMEDOUT")?(userFriendlyError=new Error("Request timed out - the server took too long to respond"),userFriendlyError.name="TimeoutError"):userFriendlyError=error2:userFriendlyError=new Error("Unknown error occurred while sending message"),console.error("Error in sendMessage:",error2),onError&&onError(userFriendlyError),userFriendlyError}}static async areAllSlotsIdle(model,signal){try{const url2=model? -`./slots?model=${encodeURIComponent(model)}`:"./slots",res=await fetch(url2,{signal});return res.ok?(await res.json()).every(s2=>!s2.is_processing):!0}catch{return!0}}static async preEncode(messages,model,excludeReasoning,signal){const requestBody={messages:messages.map(msg=>"id"in msg&&"convId"in msg&&"timestamp"in msg?ChatService.convertDbMessageToApiChatMessageData(msg):msg).filter(msg=>msg.role===MessageRole.SYSTEM?(typeof msg.content=="string"?msg.content:"").trim().length>0:!0).map(msg=>{const mapped={ -role:msg.role,content:excludeReasoning?ChatService.stripReasoningContent(msg.content):msg.content,tool_calls:msg.tool_calls,tool_call_id:msg.tool_call_id};return!excludeReasoning&&msg.reasoning_content&&(mapped.reasoning_content=msg.reasoning_content),mapped}),stream:!1,n_predict:0};model&&(requestBody.model=model);try{await fetch("./v1/chat/completions",{method:"POST",headers:getJsonHeaders(),body:JSON.stringify(requestBody),signal})}catch(error2){isAbortError(error2)||console.warn("[ChatServic\ -e] Pre-encode request failed:",error2)}}static async handleStreamResponse(response,onChunk,onComplete,onError,onReasoningChunk,onToolCallChunk,onModel,onTimings,conversationId,abortSignal){const reader=response.body?.getReader();if(!reader)throw new Error("No response body");const decoder=new TextDecoder;let aggregatedContent="",fullReasoningContent="",aggregatedToolCalls=[],lastTimings,streamFinished=!1,modelEmitted=!1,toolCallIndexOffset=0,hasOpenToolCallBatch=!1;const finalizeOpenToolCallBatch=()=>{ -hasOpenToolCallBatch&&(toolCallIndexOffset=aggregatedToolCalls.length,hasOpenToolCallBatch=!1)},processToolCallDelta=toolCalls=>{if(!toolCalls||toolCalls.length===0||(aggregatedToolCalls=ChatService.mergeToolCallDeltas(aggregatedToolCalls,toolCalls,toolCallIndexOffset),aggregatedToolCalls.length===0))return;hasOpenToolCallBatch=!0;const serializedToolCalls=JSON.stringify(aggregatedToolCalls);serializedToolCalls&&(abortSignal?.aborted||onToolCallChunk?.(serializedToolCalls))};try{let chunk="";for(;!abortSignal?. -aborted;){const{done,value}=await reader.read();if(done||abortSignal?.aborted)break;chunk+=decoder.decode(value,{stream:!0});const lines=chunk.split(` +dry_base,dry_allowed_length,dry_penalty_last_n,samplers,backend_sampling,custom:custom2,timings_per_token,disableReasoningParsing,excludeReasoningFromContext,continueFinalMessage}=options,normalizedMessages=messages.map(msg=>{if("id"in msg&&"convId"in msg&&"timestamp"in msg){const dbMsg=msg;return ChatService.convertDbMessageToApiChatMessageData(dbMsg)}else return msg}).filter(msg=>msg.role===MessageRole.SYSTEM?(typeof msg.content=="string"?msg.content:"").trim().length>0:!0);options.model&&!modelsStore. +modelSupportsVision(options.model)&&normalizedMessages.forEach(msg=>{Array.isArray(msg.content)&&(msg.content=msg.content.filter(part=>part.type===ContentPartType.IMAGE_URL?(console.info(`[ChatService] Skipping image attachment in message history (model "${options.model}" does not support vision)`),!1):!0),msg.content.length===1&&msg.content[0].type===ContentPartType.TEXT&&typeof msg.content[0].text=="string"&&(msg.content=msg.content[0].text))});const requestBody={messages:normalizedMessages.map( +msg=>{const mapped={role:msg.role,content:msg.content,tool_calls:msg.tool_calls,tool_call_id:msg.tool_call_id};return!excludeReasoningFromContext&&msg.reasoning_content&&(mapped.reasoning_content=msg.reasoning_content),mapped}),stream,return_progress:stream?!0:void 0,tools:tools&&tools.length>0?tools:void 0};if(options.model&&(requestBody.model=options.model),requestBody.reasoning_format=disableReasoningParsing?ReasoningFormat.NONE:ReasoningFormat.AUTO,continueFinalMessage&&(requestBody.continue_final_message= +!0,requestBody.add_generation_prompt=!1),temperature!==void 0&&(requestBody.temperature=temperature),max_tokens!==void 0&&(requestBody.max_tokens=max_tokens!==null&&max_tokens!==0?max_tokens:-1),dynatemp_range!==void 0&&(requestBody.dynatemp_range=dynatemp_range),dynatemp_exponent!==void 0&&(requestBody.dynatemp_exponent=dynatemp_exponent),top_k!==void 0&&(requestBody.top_k=top_k),top_p!==void 0&&(requestBody.top_p=top_p),min_p!==void 0&&(requestBody.min_p=min_p),xtc_probability!==void 0&&(requestBody. +xtc_probability=xtc_probability),xtc_threshold!==void 0&&(requestBody.xtc_threshold=xtc_threshold),typ_p!==void 0&&(requestBody.typ_p=typ_p),repeat_last_n!==void 0&&(requestBody.repeat_last_n=repeat_last_n),repeat_penalty!==void 0&&(requestBody.repeat_penalty=repeat_penalty),presence_penalty!==void 0&&(requestBody.presence_penalty=presence_penalty),frequency_penalty!==void 0&&(requestBody.frequency_penalty=frequency_penalty),dry_multiplier!==void 0&&(requestBody.dry_multiplier=dry_multiplier),dry_base!== +void 0&&(requestBody.dry_base=dry_base),dry_allowed_length!==void 0&&(requestBody.dry_allowed_length=dry_allowed_length),dry_penalty_last_n!==void 0&&(requestBody.dry_penalty_last_n=dry_penalty_last_n),samplers!==void 0&&(requestBody.samplers=typeof samplers=="string"?samplers.split(";").filter(s2=>s2.trim()):samplers),backend_sampling!==void 0&&(requestBody.backend_sampling=backend_sampling),timings_per_token!==void 0&&(requestBody.timings_per_token=timings_per_token),custom2)try{const customParams=typeof custom2== +"string"?JSON.parse(custom2):custom2;Object.assign(requestBody,customParams)}catch(error2){console.warn("Failed to parse custom parameters:",error2)}try{const response=await fetch("./v1/chat/completions",{method:"POST",headers:getJsonHeaders(),body:JSON.stringify(requestBody),signal});if(!response.ok){const error2=await ChatService.parseErrorResponse(response);throw onError&&onError(error2),error2}if(stream){await ChatService.handleStreamResponse(response,onChunk,onComplete,onError,onReasoningChunk, +onToolCallChunk,onModel,onTimings,conversationId,signal);return}else return ChatService.handleNonStreamResponse(response,onComplete,onError,onToolCallChunk,onModel)}catch(error2){if(isAbortError(error2)){console.log("Chat completion request was aborted");return}let userFriendlyError;throw error2 instanceof Error?error2.name==="TypeError"&&error2.message.includes("fetch")?(userFriendlyError=new Error("Unable to connect to server - please check if the server is running"),userFriendlyError.name="Ne\ +tworkError"):error2.message.includes("ECONNREFUSED")?(userFriendlyError=new Error("Connection refused - server may be offline"),userFriendlyError.name="NetworkError"):error2.message.includes("ETIMEDOUT")?(userFriendlyError=new Error("Request timed out - the server took too long to respond"),userFriendlyError.name="TimeoutError"):userFriendlyError=error2:userFriendlyError=new Error("Unknown error occurred while sending message"),console.error("Error in sendMessage:",error2),onError&&onError(userFriendlyError), +userFriendlyError}}static async areAllSlotsIdle(model,signal){try{const url2=model?`./slots?model=${encodeURIComponent(model)}`:"./slots",res=await fetch(url2,{signal});return res.ok?(await res.json()).every(s2=>!s2.is_processing):!0}catch{return!0}}static async preEncode(messages,model,excludeReasoning,signal){const requestBody={messages:messages.map(msg=>"id"in msg&&"convId"in msg&&"timestamp"in msg?ChatService.convertDbMessageToApiChatMessageData(msg):msg).filter(msg=>msg.role===MessageRole.SYSTEM? +(typeof msg.content=="string"?msg.content:"").trim().length>0:!0).map(msg=>{const mapped={role:msg.role,content:excludeReasoning?ChatService.stripReasoningContent(msg.content):msg.content,tool_calls:msg.tool_calls,tool_call_id:msg.tool_call_id};return!excludeReasoning&&msg.reasoning_content&&(mapped.reasoning_content=msg.reasoning_content),mapped}),stream:!1,n_predict:0};model&&(requestBody.model=model);try{await fetch("./v1/chat/completions",{method:"POST",headers:getJsonHeaders(),body:JSON.stringify( +requestBody),signal})}catch(error2){isAbortError(error2)||console.warn("[ChatService] Pre-encode request failed:",error2)}}static async handleStreamResponse(response,onChunk,onComplete,onError,onReasoningChunk,onToolCallChunk,onModel,onTimings,conversationId,abortSignal){const reader=response.body?.getReader();if(!reader)throw new Error("No response body");const decoder=new TextDecoder;let aggregatedContent="",fullReasoningContent="",aggregatedToolCalls=[],lastTimings,streamFinished=!1,modelEmitted=!1, +toolCallIndexOffset=0,hasOpenToolCallBatch=!1;const finalizeOpenToolCallBatch=()=>{hasOpenToolCallBatch&&(toolCallIndexOffset=aggregatedToolCalls.length,hasOpenToolCallBatch=!1)},processToolCallDelta=toolCalls=>{if(!toolCalls||toolCalls.length===0||(aggregatedToolCalls=ChatService.mergeToolCallDeltas(aggregatedToolCalls,toolCalls,toolCallIndexOffset),aggregatedToolCalls.length===0))return;hasOpenToolCallBatch=!0;const serializedToolCalls=JSON.stringify(aggregatedToolCalls);serializedToolCalls&&(abortSignal?. +aborted||onToolCallChunk?.(serializedToolCalls))};try{let chunk="";for(;!abortSignal?.aborted;){const{done,value}=await reader.read();if(done||abortSignal?.aborted)break;chunk+=decoder.decode(value,{stream:!0});const lines=chunk.split(` `);chunk=lines.pop()||"";for(const line of lines){if(abortSignal?.aborted)break;if(line.startsWith(UrlProtocol.DATA)){const data=line.slice(6);if(data==="[DONE]"){streamFinished=!0;continue}try{const parsed=JSON.parse(data),choice=parsed.choices?.[0],content2=choice?.delta?.content,reasoningContent=choice?.delta?.reasoning_content,toolCalls=choice?.delta?.tool_calls,timings=parsed.timings,promptProgress=parsed.prompt_progress,chunkModel=ChatService.extractModelName(parsed);chunkModel&&!modelEmitted&& (modelEmitted=!0,onModel?.(chunkModel)),promptProgress&&ChatService.notifyTimings(void 0,promptProgress,onTimings),timings&&(ChatService.notifyTimings(timings,promptProgress,onTimings),lastTimings=timings),content2&&(finalizeOpenToolCallBatch(),aggregatedContent+=content2,abortSignal?.aborted||onChunk?.(content2)),reasoningContent&&(finalizeOpenToolCallBatch(),fullReasoningContent+=reasoningContent,abortSignal?.aborted||onReasoningChunk?.(reasoningContent)),processToolCallDelta(toolCalls)}catch(e){ console.error("Error parsing JSON chunk:",e)}}}if(abortSignal?.aborted)break}if(abortSignal?.aborted)return;if(streamFinished){finalizeOpenToolCallBatch();const finalToolCalls=aggregatedToolCalls.length>0?JSON.stringify(aggregatedToolCalls):void 0;onComplete?.(aggregatedContent,fullReasoningContent||void 0,lastTimings,finalToolCalls)}}catch(error2){const err=error2 instanceof Error?error2:new Error("Stream error");throw onError?.(err),err}finally{reader.releaseLock()}}static async handleNonStreamResponse(response,onComplete,onError,onToolCallChunk,onModel){ @@ -6506,108 +6506,108 @@ push("user message")):msg.role===MessageRole.ASSISTANT&&(assistantMessages++,mes return;if(filterByLeafNodeId(allMessages,activeConv.currNode||"",!1).some(m=>m.id===messageId)&&messageToDelete.parent){const siblings2=allMessages.filter(m=>m.parent===messageToDelete.parent&&m.id!==messageId);if(siblings2.length>0){const latestSibling=siblings2.reduce((latest,sibling2)=>sibling2.timestamp>latest.timestamp?sibling2:latest);await conversationsStore.updateCurrentNode(findLeafNode(allMessages,latestSibling.id))}else messageToDelete.parent&&await conversationsStore.updateCurrentNode( findLeafNode(allMessages,messageToDelete.parent))}await DatabaseService.deleteMessageCascading(activeConv.id,messageId),await conversationsStore.refreshActiveMessages(),conversationsStore.updateConversationTimestamp()}catch(error2){console.error("Failed to delete message:",error2)}}async continueAssistantMessage(messageId){const activeConv=conversationsStore.activeConversation;if(!activeConv||this.isChatLoadingInternal(activeConv.id))return;const result=this.getMessageByIdWithRole(messageId,MessageRole. ASSISTANT);if(!result)return;const{message:msg,index:idx}=result;try{this.showErrorDialog(null),this.setChatLoading(activeConv.id,!0),this.clearChatStreaming(activeConv.id);const allMessages=await conversationsStore.getConversationMessages(activeConv.id),dbMessage=findMessageById(allMessages,messageId);if(!dbMessage){this.setChatLoading(activeConv.id,!1);return}const originalContent=dbMessage.content,originalReasoning=dbMessage.reasoningContent||"",contextWithContinue=[...conversationsStore.activeMessages. -slice(0,idx),{role:MessageRole.ASSISTANT,content:originalContent,reasoning_content:originalReasoning||void 0}];let appendedContent="",appendedReasoning="",hasReceivedContent=!1;const updateStreamingContent=fullContent=>{this.setChatStreaming(msg.convId,fullContent,msg.id),conversationsStore.updateMessageAtIndex(idx,{content:fullContent})},abortController=this.getOrCreateAbortController(msg.convId);await ChatService.sendMessage(contextWithContinue,{...this.getApiOptions(),onChunk:chunk=>{appendedContent+= -chunk,hasReceivedContent=!0,updateStreamingContent(originalContent+appendedContent)},onReasoningChunk:chunk=>{appendedReasoning+=chunk,hasReceivedContent=!0,this.setChatStreaming(msg.convId,originalContent+appendedContent,msg.id),conversationsStore.updateMessageAtIndex(idx,{reasoningContent:originalReasoning+appendedReasoning})},onTimings:(timings,promptProgress)=>{const tokensPerSecond=timings?.predicted_ms&&timings?.predicted_n?timings.predicted_n/timings.predicted_ms*1e3:0;this.updateProcessingStateFromTimings( -{prompt_n:timings?.prompt_n||0,prompt_ms:timings?.prompt_ms,predicted_n:timings?.predicted_n||0,predicted_per_second:tokensPerSecond,cache_n:timings?.cache_n||0,prompt_progress:promptProgress},msg.convId)},onComplete:async(finalContent,reasoningContent,timings)=>{const finalAppendedContent=hasReceivedContent?appendedContent:finalContent||"",finalAppendedReasoning=hasReceivedContent?appendedReasoning:reasoningContent||"",fullContent=originalContent+finalAppendedContent,fullReasoning=originalReasoning+ -finalAppendedReasoning||void 0;await DatabaseService.updateMessage(msg.id,{content:fullContent,reasoningContent:fullReasoning,timestamp:Date.now(),timings}),conversationsStore.updateMessageAtIndex(idx,{content:fullContent,reasoningContent:fullReasoning,timestamp:Date.now(),timings}),conversationsStore.updateConversationTimestamp(),this.setChatLoading(msg.convId,!1),this.clearChatStreaming(msg.convId),this.setProcessingState(msg.convId,null)},onError:async error2=>{if(isAbortError(error2)){hasReceivedContent&& -appendedContent&&(await DatabaseService.updateMessage(msg.id,{content:originalContent+appendedContent,reasoningContent:originalReasoning+appendedReasoning||void 0,timestamp:Date.now()}),conversationsStore.updateMessageAtIndex(idx,{content:originalContent+appendedContent,reasoningContent:originalReasoning+appendedReasoning||void 0,timestamp:Date.now()})),this.setChatLoading(msg.convId,!1),this.clearChatStreaming(msg.convId),this.setProcessingState(msg.convId,null);return}console.error("Continue g\ -eneration error:",error2),conversationsStore.updateMessageAtIndex(idx,{content:originalContent}),await DatabaseService.updateMessage(msg.id,{content:originalContent}),this.setChatLoading(msg.convId,!1),this.clearChatStreaming(msg.convId),this.setProcessingState(msg.convId,null),this.showErrorDialog({type:error2.name==="TimeoutError"?ErrorDialogType.TIMEOUT:ErrorDialogType.SERVER,message:error2.message})}},msg.convId,abortController.signal)}catch(error2){isAbortError(error2)||console.error("Faile\ -d to continue message:",error2),activeConv&&this.setChatLoading(activeConv.id,!1)}}async editAssistantMessage(messageId,newContent,shouldBranch){const activeConv=conversationsStore.activeConversation;if(!activeConv||this.isChatLoadingInternal(activeConv.id))return;const result=this.getMessageByIdWithRole(messageId,MessageRole.ASSISTANT);if(!result)return;const{message:msg,index:idx}=result;try{if(shouldBranch){const newMessage=await DatabaseService.createMessageBranch({convId:msg.convId,type:msg. -type,timestamp:Date.now(),role:msg.role,content:newContent,toolCalls:msg.toolCalls||"",children:[],model:msg.model},msg.parent);await conversationsStore.updateCurrentNode(newMessage.id)}else await DatabaseService.updateMessage(msg.id,{content:newContent}),conversationsStore.updateMessageAtIndex(idx,{content:newContent});conversationsStore.updateConversationTimestamp(),await conversationsStore.refreshActiveMessages()}catch(error2){console.error("Failed to edit assistant message:",error2)}}async editUserMessagePreserveResponses(messageId,newContent,newExtras){ -const activeConv=conversationsStore.activeConversation;if(!activeConv)return;const result=this.getMessageByIdWithRole(messageId,MessageRole.USER);if(!result)return;const{message:msg,index:idx}=result;try{const updateData={content:newContent};newExtras!==void 0&&(updateData.extra=JSON.parse(JSON.stringify(newExtras))),await DatabaseService.updateMessage(messageId,updateData),conversationsStore.updateMessageAtIndex(idx,updateData);const rootMessage=(await conversationsStore.getConversationMessages( -activeConv.id)).find(m=>m.type==="root"&&m.parent===null);rootMessage&&msg.parent===rootMessage.id&&newContent.trim()&&await conversationsStore.updateConversationTitleWithConfirmation(activeConv.id,generateConversationTitle(newContent,!!config$1().titleGenerationUseFirstLine)),conversationsStore.updateConversationTimestamp()}catch(error2){console.error("Failed to edit user message:",error2)}}async editMessageWithBranching(messageId,newContent,newExtras){const activeConv=conversationsStore.activeConversation; -if(!activeConv||this.isChatLoadingInternal(activeConv.id))return;let result=this.getMessageByIdWithRole(messageId,MessageRole.USER);if(result||(result=this.getMessageByIdWithRole(messageId,MessageRole.SYSTEM)),!result)return;const{message:msg,index:idx}=result;try{const allMessages=await conversationsStore.getConversationMessages(activeConv.id),rootMessage=allMessages.find(m=>m.type==="root"&&m.parent===null),isFirstUserMessage=msg.role===MessageRole.USER&&rootMessage&&msg.parent===rootMessage.id, -extrasToUse=newExtras!==void 0?JSON.parse(JSON.stringify(newExtras)):msg.extra?JSON.parse(JSON.stringify(msg.extra)):void 0;let messageIdForResponse;const dbMsg=findMessageById(allMessages,msg.id);if(dbMsg?dbMsg.children.length>0:msg.children.length>0){const parentId=msg.parent||rootMessage?.id;if(!parentId)return;const newMessage=await DatabaseService.createMessageBranch({convId:msg.convId,type:msg.type,timestamp:Date.now(),role:msg.role,content:newContent,toolCalls:msg.toolCalls||"",children:[], -extra:extrasToUse,model:msg.model},parentId);await conversationsStore.updateCurrentNode(newMessage.id),messageIdForResponse=newMessage.id}else{const updates={content:newContent,timestamp:Date.now(),extra:extrasToUse};await DatabaseService.updateMessage(msg.id,updates),conversationsStore.updateMessageAtIndex(idx,updates),messageIdForResponse=msg.id}conversationsStore.updateConversationTimestamp(),isFirstUserMessage&&newContent.trim()&&await conversationsStore.updateConversationTitleWithConfirmation( -activeConv.id,generateConversationTitle(newContent,!!config$1().titleGenerationUseFirstLine)),await conversationsStore.refreshActiveMessages(),msg.role===MessageRole.USER&&await this.generateResponseForMessage(messageIdForResponse)}catch(error2){console.error("Failed to edit message with branching:",error2)}}async generateResponseForMessage(userMessageId){const activeConv=conversationsStore.activeConversation;if(activeConv){this.showErrorDialog(null),this.setChatLoading(activeConv.id,!0),this.clearChatStreaming( -activeConv.id);try{const allMessages=await conversationsStore.getConversationMessages(activeConv.id),conversationPath=filterByLeafNodeId(allMessages,userMessageId,!1),assistantMessage=await DatabaseService.createMessageBranch({convId:activeConv.id,type:MessageType.TEXT,timestamp:Date.now(),role:MessageRole.ASSISTANT,content:"",toolCalls:"",children:[],model:null},userMessageId);conversationsStore.addMessageToActive(assistantMessage),await this.streamChatCompletion(conversationPath,assistantMessage)}catch(error2){ -console.error("Failed to generate response:",error2),this.setChatLoading(activeConv.id,!1)}}}getContextTotal(){const activeConvId=this.activeConversationId,activeState=activeConvId?this.getProcessingState(activeConvId):null;if(activeState&&typeof activeState.contextTotal=="number"&&activeState.contextTotal>0)return activeState.contextTotal;if(isRouterMode()){const modelContextSize=selectedModelContextSize();if(typeof modelContextSize=="number"&&modelContextSize>0)return modelContextSize}else{const propsContextSize=contextSize(); -if(typeof propsContextSize=="number"&&propsContextSize>0)return propsContextSize}return null}updateProcessingStateFromTimings(timingData,conversationId){const processingState=this.parseTimingData(timingData);if(processingState===null){console.warn("Failed to parse timing data - skipping update");return}const targetId=conversationId||this.activeConversationId;targetId&&this.setProcessingState(targetId,processingState)}parseTimingData(timingData){const promptTokens=timingData.prompt_n||0,promptMs=timingData. -prompt_ms||void 0,predictedTokens=timingData.predicted_n||0,tokensPerSecond=timingData.predicted_per_second||0,cacheTokens=timingData.cache_n||0,promptProgress=timingData.prompt_progress,contextTotal=this.getContextTotal(),currentConfig=config$1(),outputTokensMax=currentConfig.max_tokens||-1,contextUsed=promptTokens+cacheTokens+predictedTokens,outputTokensUsed=predictedTokens,progressCache=promptProgress?.cache||0,progressActualDone=(promptProgress?.processed??0)-progressCache,progressActualTotal=(promptProgress?. -total??0)-progressCache,progressPercent=promptProgress?Math.round(progressActualDone/progressActualTotal*100):void 0;return{status:predictedTokens>0?"generating":promptProgress?"preparing":"idle",tokensDecoded:predictedTokens,tokensRemaining:outputTokensMax-predictedTokens,contextUsed,contextTotal,outputTokensUsed,outputTokensMax,hasNextToken:predictedTokens>0,tokensPerSecond,temperature:currentConfig.temperature??.8,topP:currentConfig.top_p??.95,speculative:!1,progressPercent,promptProgress,promptTokens, -promptMs,cacheTokens}}restoreProcessingStateFromMessages(messages,conversationId){for(let i=messages.length-1;i>=0;i--){const message=messages[i];if(message.role===MessageRole.ASSISTANT&&message.timings){const restoredState=this.parseTimingData({prompt_n:message.timings.prompt_n||0,prompt_ms:message.timings.prompt_ms,predicted_n:message.timings.predicted_n||0,predicted_per_second:message.timings.predicted_n&&message.timings.predicted_ms?message.timings.predicted_n/message.timings.predicted_ms*1e3: -0,cache_n:message.timings.cache_n||0});if(restoredState){this.setProcessingState(conversationId,restoredState);return}}}}getConversationModel(messages){for(let i=messages.length-1;i>=0;i--){const message=messages[i];if(message.role===MessageRole.ASSISTANT&&message.model)return message.model}return null}getApiOptions(){const currentConfig=config$1(),hasValue=value=>value!=null&&value!=="",apiOptions={stream:!0,timings_per_token:!0};if(isRouterMode()){const modelName=selectedModelName();modelName&& -(apiOptions.model=modelName)}return currentConfig.systemMessage&&(apiOptions.systemMessage=currentConfig.systemMessage),currentConfig.disableReasoningParsing&&(apiOptions.disableReasoningParsing=!0),currentConfig.excludeReasoningFromContext&&(apiOptions.excludeReasoningFromContext=!0),hasValue(currentConfig.temperature)&&(apiOptions.temperature=Number(currentConfig.temperature)),hasValue(currentConfig.max_tokens)&&(apiOptions.max_tokens=Number(currentConfig.max_tokens)),hasValue(currentConfig.dynatemp_range)&& -(apiOptions.dynatemp_range=Number(currentConfig.dynatemp_range)),hasValue(currentConfig.dynatemp_exponent)&&(apiOptions.dynatemp_exponent=Number(currentConfig.dynatemp_exponent)),hasValue(currentConfig.top_k)&&(apiOptions.top_k=Number(currentConfig.top_k)),hasValue(currentConfig.top_p)&&(apiOptions.top_p=Number(currentConfig.top_p)),hasValue(currentConfig.min_p)&&(apiOptions.min_p=Number(currentConfig.min_p)),hasValue(currentConfig.xtc_probability)&&(apiOptions.xtc_probability=Number(currentConfig. -xtc_probability)),hasValue(currentConfig.xtc_threshold)&&(apiOptions.xtc_threshold=Number(currentConfig.xtc_threshold)),hasValue(currentConfig.typ_p)&&(apiOptions.typ_p=Number(currentConfig.typ_p)),hasValue(currentConfig.repeat_last_n)&&(apiOptions.repeat_last_n=Number(currentConfig.repeat_last_n)),hasValue(currentConfig.repeat_penalty)&&(apiOptions.repeat_penalty=Number(currentConfig.repeat_penalty)),hasValue(currentConfig.presence_penalty)&&(apiOptions.presence_penalty=Number(currentConfig.presence_penalty)), -hasValue(currentConfig.frequency_penalty)&&(apiOptions.frequency_penalty=Number(currentConfig.frequency_penalty)),hasValue(currentConfig.dry_multiplier)&&(apiOptions.dry_multiplier=Number(currentConfig.dry_multiplier)),hasValue(currentConfig.dry_base)&&(apiOptions.dry_base=Number(currentConfig.dry_base)),hasValue(currentConfig.dry_allowed_length)&&(apiOptions.dry_allowed_length=Number(currentConfig.dry_allowed_length)),hasValue(currentConfig.dry_penalty_last_n)&&(apiOptions.dry_penalty_last_n=Number( -currentConfig.dry_penalty_last_n)),currentConfig.samplers&&(apiOptions.samplers=currentConfig.samplers),apiOptions.backend_sampling=currentConfig.backend_sampling,currentConfig.custom&&(apiOptions.custom=currentConfig.custom),apiOptions}cancelPreEncode(){this.preEncodeAbortController&&(this.preEncodeAbortController.abort(),this.preEncodeAbortController=null)}async triggerPreEncode(allMessages,assistantMessage,assistantContent,model,excludeReasoning){this.cancelPreEncode(),this.preEncodeAbortController= -new AbortController;const signal=this.preEncodeAbortController.signal;try{if(!await ChatService.areAllSlotsIdle(model,signal)||signal.aborted)return;const messagesWithAssistant=[...allMessages,{...assistantMessage,content:assistantContent}];await ChatService.preEncode(messagesWithAssistant,model,excludeReasoning,signal)}catch(err){isAbortError(err)||console.warn("[ChatStore] Pre-encode failed:",err)}}}const chatStore=new ChatStore,activeProcessingState=()=>chatStore.activeProcessingState,errorDialog=()=>chatStore. -errorDialogState,getAddFilesHandler=()=>chatStore.getAddFilesHandler(),getAllLoadingChats=()=>chatStore.getAllLoadingChats(),isChatStreaming=()=>chatStore.isStreaming(),isEditing=()=>chatStore.isEditing(),isLoading=()=>chatStore.isLoading,pendingEditMessageId=()=>chatStore.pendingEditMessageId,chatPendingMessageContent=convId=>chatStore.pendingMessageContent(convId),chatPendingMessageExtras=convId=>chatStore.pendingMessageExtras(convId),chatClearPendingMessage=convId=>chatStore.clearPendingMessage( -convId),chatInjectPendingMessage=(convId,content2,extras)=>chatStore.injectPendingMessage(convId,content2,extras);var root$1u=from_html('
',1);function ChatForm($$anchor,$$props){push$1($$props,!0);let attachments=prop($$props,"attachments",19,()=>[]),className=prop($$props,"cla\ -ss",3,""),disabled=prop($$props,"disabled",3,!1),isLoading2=prop($$props,"isLoading",3,!1),placeholder=prop($$props,"placeholder",3,"Type a message..."),showMcpPromptButton=prop($$props,"showMcpPromptButton",3,!1),showAddButton=prop($$props,"showAddButton",3,!0),showModelSelector=prop($$props,"showModelSelector",3,!0),uploadedFiles=prop($$props,"uploadedFiles",31,()=>proxy([])),value=prop($$props,"value",15,""),audioRecorder,chatFormActionsRef=state$1(void 0),fileInputRef=state$1(void 0),pickersRef=state$1( -void 0),textareaRef=state$1(void 0),isRecording=state$1(!1),recordingSupported=state$1(!1),isPromptPickerOpen=state$1(!1),promptSearchQuery=state$1(""),isInlineResourcePickerOpen=state$1(!1),resourceSearchQuery=state$1(""),isResourceDialogOpen=state$1(!1),preSelectedResourceUri=state$1(void 0),currentConfig=user_derived(config$1),pasteLongTextToFileLength=user_derived(()=>{const n=Number(get$3(currentConfig).pasteLongTextToFileLen);return Number.isNaN(n)?Number(SETTING_CONFIG_DEFAULT.pasteLongTextToFileLen): -n}),isRouter=user_derived(isRouterMode),conversationModel=user_derived(()=>chatStore.getConversationModel(activeMessages())),activeModelId=user_derived(()=>{const options=modelOptions();if(!get$3(isRouter))return options.length>0?options[0].model:null;const selectedId=selectedModelId();if(selectedId){const model=options.find(m=>m.id===selectedId);if(model)return model.model}if(get$3(conversationModel)){const model=options.find(m=>m.model===get$3(conversationModel));if(model)return model.model}return null}), -hasModelSelected=user_derived(()=>!get$3(isRouter)||!!get$3(conversationModel)||!!selectedModelId()),hasLoadingAttachments=user_derived(()=>uploadedFiles().some(f=>f.isLoading)),hasAttachments=user_derived(()=>attachments()&&attachments().length>0||uploadedFiles()&&uploadedFiles().length>0),canSubmit=user_derived(()=>value().trim().length>0||get$3(hasAttachments));onMount$1(()=>{set$1(recordingSupported,isAudioRecordingSupported(),!0),audioRecorder=new AudioRecorder});function focus2(){get$3(textareaRef)?. -focus()}function resetTextareaHeight(){get$3(textareaRef)?.resetHeight()}function openModelSelector(){get$3(chatFormActionsRef)?.openModelSelector()}function checkModelSelected(){return get$3(hasModelSelected)?!0:(get$3(chatFormActionsRef)?.openModelSelector(),!1)}function handleFileSelect(files){$$props.onFilesAdd?.(files)}function handleFileUpload(){get$3(fileInputRef)?.click()}function handleFileRemove(fileId){if(fileId.startsWith("attachment-")){const index2=parseInt(fileId.replace("attachme\ -nt-",""),10);!isNaN(index2)&&index2>=0&&index2item.kind==="file").map(item=>item.getAsFile()).filter(file=>file!==null);if(files.length>0){event2.preventDefault(),$$props.onFilesAdd?.(files);return}const text2=event2.clipboardData.getData(MimeTypeText.PLAIN);if(text2.startsWith(CLIPBOARD_CONTENT_QUOTE_PREFIX)){const parsed=parseClipboardContent(text2);if(parsed.textAttachments.length>0||parsed.mcpPromptAttachments.length> -0){if(event2.preventDefault(),value(parsed.message),$$props.onValueChange?.(parsed.message),parsed.textAttachments.length>0){const attachmentFiles=parsed.textAttachments.map(att=>new File([att.content],att.name,{type:MimeTypeText.PLAIN}));$$props.onFilesAdd?.(attachmentFiles)}if(parsed.mcpPromptAttachments.length>0){const mcpPromptFiles=parsed.mcpPromptAttachments.map(att=>({id:uuid$1(),name:att.name,size:att.content.length,type:SpecialFileType.MCP_PROMPT,file:new File([att.content],`${att.name}${FileExtensionText. -TXT}`,{type:MimeTypeText.PLAIN}),isLoading:!1,textContent:att.content,mcpPrompt:{serverName:att.serverName,promptName:att.promptName,arguments:att.arguments}}));uploadedFiles([...uploadedFiles(),...mcpPromptFiles]),$$props.onUploadedFilesChange?.(uploadedFiles())}setTimeout(()=>{get$3(textareaRef)?.focus()},10);return}}if(text2.length>0&&get$3(pasteLongTextToFileLength)>0&&text2.length>get$3(pasteLongTextToFileLength)){event2.preventDefault();const textFile=new File([text2],"Pasted",{type:MimeTypeText. -PLAIN});$$props.onFilesAdd?.([textFile])}}function handlePromptLoadStart(placeholderId,promptInfo,args){value().startsWith(PROMPT_TRIGGER_PREFIX)&&(value(""),$$props.onValueChange?.("")),set$1(isPromptPickerOpen,!1),set$1(promptSearchQuery,"");const promptName=promptInfo.title||promptInfo.name,placeholder2={id:placeholderId,name:promptName,size:INITIAL_FILE_SIZE,type:SpecialFileType.MCP_PROMPT,file:new File([],"loading"),isLoading:!0,mcpPrompt:{serverName:promptInfo.serverName,promptName:promptInfo. -name,arguments:args?{...args}:void 0}};uploadedFiles([...uploadedFiles(),placeholder2]),$$props.onUploadedFilesChange?.(uploadedFiles()),get$3(textareaRef)?.focus()}function handlePromptLoadComplete(placeholderId,result){const promptText=result.messages?.map(msg=>typeof msg.content=="string"?msg.content:msg.content.type===ContentPartType.TEXT?msg.content.text:"").filter(Boolean).join(PROMPT_CONTENT_SEPARATOR);uploadedFiles(uploadedFiles().map(f=>f.id===placeholderId?{...f,isLoading:!1,textContent:promptText, -size:promptText.length,file:new File([promptText],`${f.name}${FileExtensionText.TXT}`,{type:MimeTypeText.PLAIN})}:f)),$$props.onUploadedFilesChange?.(uploadedFiles())}function handlePromptLoadError(placeholderId,error2){uploadedFiles(uploadedFiles().map(f=>f.id===placeholderId?{...f,isLoading:!1,loadError:error2}:f)),$$props.onUploadedFilesChange?.(uploadedFiles())}function handlePromptPickerClose(){set$1(isPromptPickerOpen,!1),set$1(promptSearchQuery,""),get$3(textareaRef)?.focus()}function handleInlineResourcePickerClose(){ -set$1(isInlineResourcePickerOpen,!1),set$1(resourceSearchQuery,""),get$3(textareaRef)?.focus()}function handleInlineResourceSelect(){value().startsWith(RESOURCE_TRIGGER_PREFIX)&&(value(""),$$props.onValueChange?.("")),set$1(isInlineResourcePickerOpen,!1),set$1(resourceSearchQuery,""),get$3(textareaRef)?.focus()}function handleBrowseResources(){set$1(isInlineResourcePickerOpen,!1),set$1(resourceSearchQuery,""),value().startsWith(RESOURCE_TRIGGER_PREFIX)&&(value(""),$$props.onValueChange?.("")),set$1( -isResourceDialogOpen,!0)}async function handleMicClick(){if(!audioRecorder||!get$3(recordingSupported)){console.warn("Audio recording not supported");return}if(get$3(isRecording)){set$1(isRecording,!1);try{const audioBlob=await audioRecorder.stopRecording(),wavBlob=await convertToWav(audioBlob),audioFile=createAudioFile(wavBlob);$$props.onFilesAdd?.([audioFile])}catch(error2){console.error("Failed to stop recording:",error2)}}else try{await audioRecorder.startRecording(),set$1(isRecording,!0)}catch(error2){ -console.error("Failed to start recording:",error2)}}var $$exports={focus:focus2,resetTextareaHeight,openModelSelector,checkModelSelected},fragment=root$1u(),node2=first_child(fragment);bind_this(ChatFormFileInputInvisible(node2,{onFileSelect:handleFileSelect}),$$value=>set$1(fileInputRef,$$value,!0),()=>get$3(fileInputRef));var form=sibling(node2,2),node_1=child(form);bind_this(ChatFormPickers(node_1,{get isPromptPickerOpen(){return get$3(isPromptPickerOpen)},get promptSearchQuery(){return get$3( -promptSearchQuery)},get isInlineResourcePickerOpen(){return get$3(isInlineResourcePickerOpen)},get resourceSearchQuery(){return get$3(resourceSearchQuery)},onPromptPickerClose:handlePromptPickerClose,onInlineResourcePickerClose:handleInlineResourcePickerClose,onInlineResourceSelect:handleInlineResourceSelect,onPromptLoadStart:handlePromptLoadStart,onPromptLoadComplete:handlePromptLoadComplete,onPromptLoadError:handlePromptLoadError,onInlineResourceBrowse:handleBrowseResources}),$$value=>set$1(pickersRef, -$$value,!0),()=>get$3(pickersRef));var div=sibling(node_1,2),node_2=child(div);{let $0=user_derived(()=>get$3(activeModelId)??void 0);ChatAttachmentsList(node_2,{get attachments(){return attachments()},onFileRemove:handleFileRemove,limitToSingleRow:!0,class:"py-5",style:"scroll-padding: 1rem;",get activeModelId(){return get$3($0)},get uploadedFiles(){return uploadedFiles()},set uploadedFiles($$value){uploadedFiles($$value)}})}var div_1=sibling(node_2,2),node_3=child(div_1);bind_this(ChatFormTextarea( -node_3,{class:"px-5 py-1.5 md:pt-0",onKeydown:handleKeydown,onInput:()=>{handleInput(),$$props.onValueChange?.(value())},get disabled(){return disabled()},get placeholder(){return placeholder()},get value(){return value()},set value($$value){value($$value)}}),$$value=>set$1(textareaRef,$$value,!0),()=>get$3(textareaRef));var node_4=sibling(node_3,2);{var consequent=$$anchor2=>{ChatFormMcpResourcesList($$anchor2,{class:"mb-3",onResourceClick:uri2=>{set$1(preSelectedResourceUri,uri2,!0),set$1(isResourceDialogOpen, -!0)}})},d2=user_derived(()=>mcpHasResourceAttachments());if_block(node_4,$$render=>{get$3(d2)&&$$render(consequent)})}var node_5=sibling(node_4,2);{let $0=user_derived(()=>showMcpPromptButton()?()=>set$1(isPromptPickerOpen,!0):void 0);bind_this(ChatFormActions(node_5,{class:"px-3",get canSend(){return get$3(canSubmit)},get disabled(){return disabled()},get isLoading(){return isLoading2()},get isRecording(){return get$3(isRecording)},get showAddButton(){return showAddButton()},get showModelSelector(){ -return showModelSelector()},get uploadedFiles(){return uploadedFiles()},onFileUpload:handleFileUpload,onMicClick:handleMicClick,get onStop(){return $$props.onStop},onSystemPromptClick:()=>$$props.onSystemPromptClick?.({message:value(),files:uploadedFiles()}),get onMcpPromptClick(){return get$3($0)},onMcpResourcesClick:()=>set$1(isResourceDialogOpen,!0)}),$$value=>set$1(chatFormActionsRef,$$value,!0),()=>get$3(chatFormActionsRef))}reset(div_1),reset(div),reset(form);var node_6=sibling(form,2);return DialogMcpResourcesBrowser( -node_6,{get preSelectedUri(){return get$3(preSelectedResourceUri)},onAttach:resource=>{mcpStore.attachResource(resource.uri)},onOpenChange:newOpen=>{newOpen||set$1(preSelectedResourceUri,void 0)},get open(){return get$3(isResourceDialogOpen)},set open($$value){set$1(isResourceDialogOpen,$$value,!0)}}),template_effect(()=>{set_class(form,1,`relative ${className()??""}`),set_class(div,1,`${INPUT_CLASSES??""} overflow-hidden rounded-3xl backdrop-blur-md ${disabled()?"cursor-not-allowed opacity-60": -""}`)}),event("submit",form,event2=>{event2.preventDefault(),!(!get$3(canSubmit)||disabled()||get$3(hasLoadingAttachments))&&$$props.onSubmit?.()}),event("paste",div_1,handlePaste),append($$anchor,fragment),pop($$exports)}function Dropdown_menu_content($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),sideOffset=prop($$props,"sideOffset",3,4),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","sideOffset","portalProps","class"]);var fragment=comment$2(),node2=first_child( -fragment);component(node2,()=>Portal$2,($$anchor2,DropdownMenuPrimitive_Portal)=>{DropdownMenuPrimitive_Portal($$anchor2,spread_props(()=>$$props.portalProps,{children:($$anchor3,$$slotProps)=>{var fragment_1=comment$2(),node_1=first_child(fragment_1);{let $0=user_derived(()=>cn$1("z-50 max-h-(--bits-dropdown-menu-content-available-height) min-w-[8rem] origin-(--bits-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-po\ -pover-foreground shadow-md outline-none data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:fill-mode-forwards data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20",$$props.class));component(node_1,()=>Dropdown_menu_content$1, -($$anchor4,DropdownMenuPrimitive_Content)=>{DropdownMenuPrimitive_Content($$anchor4,spread_props({"data-slot":"dropdown-menu-content",get sideOffset(){return sideOffset()},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor3,fragment_1)},$$slots:{default:!0}}))}),append($$anchor,fragment),pop()}function Dropdown_menu_item($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),variant=prop($$props,"variant", -3,"default"),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","inset","variant"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("relative flex cursor-pointer items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-highlighted:bg-accent data-highlighted:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 data-[variant=destructive]:text-destructive data-[var\ -iant=destructive]:data-highlighted:bg-destructive/10 data-[variant=destructive]:data-highlighted:text-destructive dark:data-[variant=destructive]:data-highlighted:bg-destructive/20 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 [&_svg:not([class*='text-'])]:text-muted-foreground data-[variant=destructive]:*:[svg]:!text-destructive",$$props.class));component(node2,()=>Menu_item,($$anchor2,DropdownMenuPrimitive_Item)=>{DropdownMenuPrimitive_Item($$anchor2,spread_props( -{"data-slot":"dropdown-menu-item",get"data-inset"(){return $$props.inset},get"data-variant"(){return variant()},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}function Dropdown_menu_separator($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived( -()=>cn$1("-mx-1 my-1 h-px bg-border/20",$$props.class));component(node2,()=>Menu_separator,($$anchor2,DropdownMenuPrimitive_Separator)=>{DropdownMenuPrimitive_Separator($$anchor2,spread_props({"data-slot":"dropdown-menu-separator",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}function Dropdown_menu_trigger($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props, -["$$slots","$$events","$$legacy","ref"]);var fragment=comment$2(),node2=first_child(fragment);component(node2,()=>Menu_trigger,($$anchor2,DropdownMenuPrimitive_Trigger)=>{DropdownMenuPrimitive_Trigger($$anchor2,spread_props({"data-slot":"dropdown-menu-trigger"},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))}),append($$anchor,fragment),pop()}function Dropdown_menu_sub_content($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props( -$$props,["$$slots","$$events","$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("z-50 max-h-(--bits-dropdown-menu-content-available-height) min-w-[8rem] origin-(--bits-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-popover-foreground shadow-md outline-none data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-\ -in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:fill-mode-forwards data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20",$$props.class));component(node2,()=>Menu_sub_content,($$anchor2,DropdownMenuPrimitive_SubContent)=>{DropdownMenuPrimitive_SubContent($$anchor2,spread_props({"data-slot":"dropdown-menu-sub-content",get class(){ -return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}var root_1$P=from_html(" ",1);function Dropdown_menu_sub_trigger($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","inset","children"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("flex cursor-default items-center gap-2 rounded\ --sm px-2 py-1.5 text-sm outline-hidden select-none data-highlighted:bg-accent data-highlighted:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 data-[state=open]:bg-accent data-[state=open]:text-accent-foreground [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 [&_svg:not([class*='text-'])]:text-muted-foreground",$$props.class));component(node2,()=>Menu_sub_trigger,($$anchor2,DropdownMenuPrimitive_SubTrigger)=>{ -DropdownMenuPrimitive_SubTrigger($$anchor2,spread_props({"data-slot":"dropdown-menu-sub-trigger",get"data-inset"(){return $$props.inset},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)},children:($$anchor3,$$slotProps)=>{var fragment_1=root_1$P(),node_1=first_child(fragment_1);snippet(node_1,()=>$$props.children??noop$3);var node_2=sibling(node_1,2);Chevron_right(node_2,{class:"ml-auto size-4"}),append($$anchor3,fragment_1)},$$slots:{default:!0}}))})} -append($$anchor,fragment),pop()}const Sub=Menu_sub,Root$4=Menu;function useAttachmentMenu(getFlags,getCallbacks,close2){const modalityFlags=user_derived(getFlags),callbacks=user_derived(()=>{const cbs=getCallbacks(),wrap2=fn=>()=>{close2(),fn?.()};return{[AttachmentAction.FILE_UPLOAD]:wrap2(cbs.onFileUpload),[AttachmentAction.SYSTEM_PROMPT_CLICK]:wrap2(cbs.onSystemPromptClick),[AttachmentAction.MCP_PROMPT_CLICK]:wrap2(cbs.onMcpPromptClick),[AttachmentAction.MCP_RESOURCES_CLICK]:wrap2(cbs.onMcpResourcesClick)}}); -function isItemEnabled(enabledWhen){return!enabledWhen||enabledWhen==="always"?!0:!!get$3(modalityFlags)[enabledWhen]}function isItemVisible(visibleWhen){return visibleWhen?!!get$3(modalityFlags)[visibleWhen]:!0}function getSystemMessageTooltip(){return page$1.params.id?"Inject custom system message at the beginning of the conversation":"Add custom system message for a new conversation"}return{get callbacks(){return get$3(callbacks)},isItemEnabled,isItemVisible,getSystemMessageTooltip}}var root_6$s=from_html( -" ",1),root_10$f=from_html(" ",1),root_11$f=from_html("

"),root_8$r=from_html(" ",1),root_16$6=from_html(" ",1),root_17$8=from_html("

PDFs will be converted to text. Image-based PDFs may not work properly.

"),root_13$d=from_html(" ",1),root_22$2=from_html(" ",1),root_23$5=from_html("

"),root_20$5=from_html(" ",1),root_26$2=from_html(" ",1),root_3$S=from_html(" \ - ",1),root_1$O=from_html(" ",1),root$1t=from_html("
");function ChatFormActionAddDropdown($$anchor,$$props){push$1($$props,!0);let className=prop($$props,"class",3,""),disabled=prop($$props,"disabled",3,!1),hasAudioModality=prop($$props,"hasAudioModality",3,!1),hasVisionModality=prop($$props,"hasVisionModality",3,!1),hasMcpPromptsSupport=prop($$props,"hasMcpPromptsSupport",3,!1),hasMcpResourcesSupport=prop($$props,"hasMcpResourcesSupport",3,!1),dropdownOpen=state$1(!1); -function handleMcpSettingsClick(){set$1(dropdownOpen,!1),$$props.onMcpSettingsClick?.()}const attachmentMenu=useAttachmentMenu(()=>({hasVisionModality:hasVisionModality(),hasAudioModality:hasAudioModality(),hasMcpPromptsSupport:hasMcpPromptsSupport(),hasMcpResourcesSupport:hasMcpResourcesSupport()}),()=>({onFileUpload:$$props.onFileUpload,onSystemPromptClick:$$props.onSystemPromptClick,onMcpPromptClick:$$props.onMcpPromptClick,onMcpResourcesClick:$$props.onMcpResourcesClick}),()=>{set$1(dropdownOpen, -!1)});var div=root$1t(),node2=child(div);component(node2,()=>Root$4,($$anchor2,DropdownMenu_Root)=>{DropdownMenu_Root($$anchor2,{get open(){return get$3(dropdownOpen)},set open($$value){set$1(dropdownOpen,$$value,!0)},children:($$anchor3,$$slotProps)=>{var fragment=root_1$O(),node_1=first_child(fragment);component(node_1,()=>Dropdown_menu_trigger,($$anchor4,DropdownMenu_Trigger)=>{DropdownMenu_Trigger($$anchor4,{name:"Attach files",get disabled(){return disabled()},children:($$anchor5,$$slotProps2)=>{ -var fragment_1=comment$2(),node_2=first_child(fragment_1);snippet(node_2,()=>$$props.trigger,()=>({disabled:disabled()})),append($$anchor5,fragment_1)},$$slots:{default:!0}})});var node_3=sibling(node_1,2);component(node_3,()=>Dropdown_menu_content,($$anchor4,DropdownMenu_Content)=>{DropdownMenu_Content($$anchor4,{align:"start",class:"w-48",children:($$anchor5,$$slotProps2)=>{var fragment_2=root_3$S(),node_4=first_child(fragment_2);each(node_4,17,()=>ATTACHMENT_FILE_ITEMS,item=>item.id,($$anchor6,item)=>{ -const enabled=user_derived(()=>attachmentMenu.isItemEnabled(get$3(item).enabledWhen));var fragment_3=comment$2(),node_5=first_child(fragment_3);{var consequent=$$anchor7=>{var fragment_4=comment$2(),node_6=first_child(fragment_4);{let $0=user_derived(()=>get$3(item).class??"");component(node_6,()=>Dropdown_menu_item,($$anchor8,DropdownMenu_Item)=>{DropdownMenu_Item($$anchor8,{get class(){return`${get$3($0)??""} flex cursor-pointer items-center gap-2`},onclick:()=>attachmentMenu.callbacks[get$3(item). -action](),children:($$anchor9,$$slotProps3)=>{var fragment_5=root_6$s(),node_7=first_child(fragment_5);component(node_7,()=>get$3(item).icon,($$anchor10,item_icon)=>{item_icon($$anchor10,{class:"h-4 w-4"})});var span=sibling(node_7,2),text2=child(span,!0);reset(span),template_effect(()=>set_text(text2,get$3(item).label)),append($$anchor9,fragment_5)},$$slots:{default:!0}})})}append($$anchor7,fragment_4)},consequent_1=$$anchor7=>{var fragment_6=comment$2(),node_8=first_child(fragment_6);component( -node_8,()=>Root$5,($$anchor8,Tooltip_Root)=>{Tooltip_Root($$anchor8,{get delayDuration(){return TOOLTIP_DELAY_DURATION},children:($$anchor9,$$slotProps3)=>{var fragment_7=root_8$r(),node_9=first_child(fragment_7);component(node_9,()=>Tooltip_trigger,($$anchor10,Tooltip_Trigger)=>{Tooltip_Trigger($$anchor10,{class:"w-full",children:($$anchor11,$$slotProps4)=>{var fragment_8=comment$2(),node_10=first_child(fragment_8);{let $0=user_derived(()=>get$3(item).class??"");component(node_10,()=>Dropdown_menu_item, -($$anchor12,DropdownMenu_Item_1)=>{DropdownMenu_Item_1($$anchor12,{get class(){return`${get$3($0)??""} flex cursor-pointer items-center gap-2`},disabled:!0,children:($$anchor13,$$slotProps5)=>{var fragment_9=root_10$f(),node_11=first_child(fragment_9);component(node_11,()=>get$3(item).icon,($$anchor14,item_icon_1)=>{item_icon_1($$anchor14,{class:"h-4 w-4"})});var span_1=sibling(node_11,2),text_1=child(span_1,!0);reset(span_1),template_effect(()=>set_text(text_1,get$3(item).label)),append($$anchor13, -fragment_9)},$$slots:{default:!0}})})}append($$anchor11,fragment_8)},$$slots:{default:!0}})});var node_12=sibling(node_9,2);component(node_12,()=>Tooltip_content,($$anchor10,Tooltip_Content)=>{Tooltip_Content($$anchor10,{side:"right",children:($$anchor11,$$slotProps4)=>{var p2=root_11$f(),text_2=child(p2,!0);reset(p2),template_effect(()=>set_text(text_2,get$3(item).disabledTooltip)),append($$anchor11,p2)},$$slots:{default:!0}})}),append($$anchor9,fragment_7)},$$slots:{default:!0}})}),append($$anchor7, -fragment_6)};if_block(node_5,$$render=>{get$3(enabled)?$$render(consequent):get$3(item).disabledTooltip&&$$render(consequent_1,1)})}append($$anchor6,fragment_3)});var node_13=sibling(node_4,2);{var consequent_3=$$anchor6=>{var fragment_10=comment$2(),node_14=first_child(fragment_10);component(node_14,()=>Root$5,($$anchor7,Tooltip_Root_1)=>{Tooltip_Root_1($$anchor7,{get delayDuration(){return TOOLTIP_DELAY_DURATION},children:($$anchor8,$$slotProps3)=>{var fragment_11=root_13$d(),node_15=first_child( -fragment_11);component(node_15,()=>Tooltip_trigger,($$anchor9,Tooltip_Trigger_1)=>{Tooltip_Trigger_1($$anchor9,{class:"w-full",children:($$anchor10,$$slotProps4)=>{var fragment_12=comment$2(),node_16=first_child(fragment_12);component(node_16,()=>Dropdown_menu_item,($$anchor11,DropdownMenu_Item_2)=>{DropdownMenu_Item_2($$anchor11,{class:"flex cursor-pointer items-center gap-2",get onclick(){return attachmentMenu.callbacks.onFileUpload},children:($$anchor12,$$slotProps5)=>{const pdfItem=user_derived( -()=>ATTACHMENT_FILE_ITEMS.find(i=>i.id===AttachmentMenuItemId.PDF));var fragment_13=comment$2(),node_17=first_child(fragment_13);{var consequent_2=$$anchor13=>{var fragment_14=root_16$6(),node_18=first_child(fragment_14);component(node_18,()=>get$3(pdfItem).icon,($$anchor14,pdfItem_icon)=>{pdfItem_icon($$anchor14,{class:"h-4 w-4"})});var span_2=sibling(node_18,2),text_3=child(span_2,!0);reset(span_2),template_effect(()=>set_text(text_3,get$3(pdfItem).label)),append($$anchor13,fragment_14)};if_block( -node_17,$$render=>{get$3(pdfItem)&&$$render(consequent_2)})}append($$anchor12,fragment_13)},$$slots:{default:!0}})}),append($$anchor10,fragment_12)},$$slots:{default:!0}})});var node_19=sibling(node_15,2);component(node_19,()=>Tooltip_content,($$anchor9,Tooltip_Content_1)=>{Tooltip_Content_1($$anchor9,{side:"right",children:($$anchor10,$$slotProps4)=>{var p_1=root_17$8();append($$anchor10,p_1)},$$slots:{default:!0}})}),append($$anchor8,fragment_11)},$$slots:{default:!0}})}),append($$anchor6,fragment_10)}, -d2=user_derived(()=>!attachmentMenu.isItemEnabled("hasVisionModality"));if_block(node_13,$$render=>{get$3(d2)&&$$render(consequent_3)})}var node_20=sibling(node_13,2);component(node_20,()=>Dropdown_menu_separator,($$anchor6,DropdownMenu_Separator)=>{DropdownMenu_Separator($$anchor6,{})});var node_21=sibling(node_20,2);each(node_21,17,()=>ATTACHMENT_EXTRA_ITEMS,item=>item.id,($$anchor6,item)=>{var fragment_15=comment$2(),node_22=first_child(fragment_15);{var consequent_4=$$anchor7=>{var fragment_16=comment$2(), -node_23=first_child(fragment_16);component(node_23,()=>Root$5,($$anchor8,Tooltip_Root_2)=>{Tooltip_Root_2($$anchor8,{get delayDuration(){return TOOLTIP_DELAY_DURATION},children:($$anchor9,$$slotProps3)=>{var fragment_17=root_20$5(),node_24=first_child(fragment_17);component(node_24,()=>Tooltip_trigger,($$anchor10,Tooltip_Trigger_2)=>{Tooltip_Trigger_2($$anchor10,{class:"w-full",children:($$anchor11,$$slotProps4)=>{var fragment_18=comment$2(),node_25=first_child(fragment_18);component(node_25,()=>Dropdown_menu_item, -($$anchor12,DropdownMenu_Item_3)=>{DropdownMenu_Item_3($$anchor12,{class:"flex cursor-pointer items-center gap-2",onclick:()=>attachmentMenu.callbacks[get$3(item).action](),children:($$anchor13,$$slotProps5)=>{var fragment_19=root_22$2(),node_26=first_child(fragment_19);component(node_26,()=>get$3(item).icon,($$anchor14,item_icon_2)=>{item_icon_2($$anchor14,{class:"h-4 w-4"})});var span_3=sibling(node_26,2),text_4=child(span_3,!0);reset(span_3),template_effect(()=>set_text(text_4,get$3(item).label)), -append($$anchor13,fragment_19)},$$slots:{default:!0}})}),append($$anchor11,fragment_18)},$$slots:{default:!0}})});var node_27=sibling(node_24,2);component(node_27,()=>Tooltip_content,($$anchor10,Tooltip_Content_2)=>{Tooltip_Content_2($$anchor10,{side:"right",children:($$anchor11,$$slotProps4)=>{var p_2=root_23$5(),text_5=child(p_2,!0);reset(p_2),template_effect($0=>set_text(text_5,$0),[()=>attachmentMenu.getSystemMessageTooltip()]),append($$anchor11,p_2)},$$slots:{default:!0}})}),append($$anchor9, -fragment_17)},$$slots:{default:!0}})}),append($$anchor7,fragment_16)};if_block(node_22,$$render=>{get$3(item).id===AttachmentMenuItemId.SYSTEM_MESSAGE&&$$render(consequent_4)})}append($$anchor6,fragment_15)});var node_28=sibling(node_21,2);ChatFormActionAddToolsSubmenu(node_28,{});var node_29=sibling(node_28,2);ChatFormActionAddMcpServersSubmenu(node_29,{onMcpSettingsClick:handleMcpSettingsClick});var node_30=sibling(node_29,2);each(node_30,17,()=>ATTACHMENT_MCP_ITEMS,item=>item.id,($$anchor6,item)=>{ -var fragment_20=comment$2(),node_31=first_child(fragment_20);{var consequent_5=$$anchor7=>{var fragment_21=comment$2(),node_32=first_child(fragment_21);component(node_32,()=>Dropdown_menu_item,($$anchor8,DropdownMenu_Item_4)=>{DropdownMenu_Item_4($$anchor8,{class:"flex cursor-pointer items-center gap-2",onclick:()=>attachmentMenu.callbacks[get$3(item).action](),children:($$anchor9,$$slotProps3)=>{var fragment_22=root_26$2(),node_33=first_child(fragment_22);component(node_33,()=>get$3(item).icon, -($$anchor10,item_icon_3)=>{item_icon_3($$anchor10,{class:"h-4 w-4"})});var span_4=sibling(node_33,2),text_6=child(span_4,!0);reset(span_4),template_effect(()=>set_text(text_6,get$3(item).label)),append($$anchor9,fragment_22)},$$slots:{default:!0}})}),append($$anchor7,fragment_21)},d_12=user_derived(()=>attachmentMenu.isItemVisible(get$3(item).visibleWhen));if_block(node_31,$$render=>{get$3(d_12)&&$$render(consequent_5)})}append($$anchor6,fragment_20)}),append($$anchor5,fragment_2)},$$slots:{default:!0}})}), -append($$anchor3,fragment)},$$slots:{default:!0}})}),reset(div),template_effect(()=>set_class(div,1,`flex items-center gap-1 ${className()??""}`)),append($$anchor,div),pop()}function Sheet_overlay($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("fixed inset-0 z-50 bg-black/50 data-[state=closed]:animate-out data\ --[state=closed]:fade-out-0 data-[state=closed]:fill-mode-forwards data-[state=open]:animate-in data-[state=open]:fade-in-0",$$props.class));component(node2,()=>Dialog_overlay$1,($$anchor2,SheetPrimitive_Overlay)=>{SheetPrimitive_Overlay($$anchor2,spread_props({"data-slot":"sheet-overlay",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}const sheetVariants=tv({base:`border-border/30 dark:border-border/20 da\ -ta-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fill-mode-forwards fixed z-50 flex flex-col gap-4 shadow-sm transition ease-in-out data-[state=closed]:duration-300 data-[state=open]:duration-500 ${PANEL_CLASSES}`,variants:{side:{top:"data-[state=closed]:slide-out-to-top data-[state=open]:slide-in-from-top inset-x-0 top-0 h-auto border-b",bottom:"data-[state=closed]:slide-out-to-bottom data-[state=open]:slide-in-from-bottom inset-x-0 bottom-0 h-auto border-t",left:"\ -data-[state=closed]:slide-out-to-left data-[state=open]:slide-in-from-left inset-y-0 left-0 h-full w-3/4 border-r sm:max-w-sm",right:"data-[state=closed]:slide-out-to-right data-[state=open]:slide-in-from-right inset-y-0 right-0 h-full w-3/4 border-l sm:max-w-sm"}},defaultVariants:{side:"right"}});var root_3$R=from_html(' Close',1),root_2$12=from_html(" ",1),root_1$N=from_html(" ",1);function Sheet_content($$anchor,$$props){push$1($$props,!0);let ref2=prop( -$$props,"ref",15,null),side=prop($$props,"side",3,"right"),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","side","portalProps","children"]);var fragment=comment$2(),node2=first_child(fragment);component(node2,()=>Portal$2,($$anchor2,SheetPrimitive_Portal)=>{SheetPrimitive_Portal($$anchor2,spread_props(()=>$$props.portalProps,{children:($$anchor3,$$slotProps)=>{var fragment_1=root_1$N(),node_1=first_child(fragment_1);Sheet_overlay(node_1,{});var node_2=sibling(node_1, -2);{let $0=user_derived(()=>cn$1(sheetVariants({side:side()}),$$props.class));component(node_2,()=>Dialog_content$1,($$anchor4,SheetPrimitive_Content)=>{SheetPrimitive_Content($$anchor4,spread_props({"data-slot":"sheet-content",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)},children:($$anchor5,$$slotProps2)=>{var fragment_2=root_2$12(),node_3=first_child(fragment_2);snippet(node_3,()=>$$props.children??noop$3);var node_4=sibling(node_3,2);component( -node_4,()=>Dialog_close,($$anchor6,SheetPrimitive_Close)=>{SheetPrimitive_Close($$anchor6,{class:"absolute top-4 right-4 rounded-xs opacity-70 ring-offset-background transition-opacity hover:opacity-100 focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:outline-hidden disabled:pointer-events-none",children:($$anchor7,$$slotProps3)=>{var fragment_3=root_3$R(),node_5=first_child(fragment_3);X(node_5,{class:"size-4"}),next$1(2),append($$anchor7,fragment_3)},$$slots:{ -default:!0}})}),append($$anchor5,fragment_2)},$$slots:{default:!0}}))})}append($$anchor3,fragment_1)},$$slots:{default:!0}}))}),append($$anchor,fragment),pop()}var root$1s=from_html("
");function Sheet_header($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","children"]);var div=root$1s();attribute_effect(div,$0=>({"data-slot":"sheet-header",class:$0,...restProps}),[()=>cn$1("flex fle\ -x-col gap-1.5 p-4",$$props.class)]);var node2=child(div);snippet(node2,()=>$$props.children??noop$3),reset(div),bind_this(div,$$value=>ref2($$value),()=>ref2()),append($$anchor,div),pop()}function Sheet_title($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("font-semibold text-foreground",$$props.class));component( -node2,()=>Dialog_title$1,($$anchor2,SheetPrimitive_Title)=>{SheetPrimitive_Title($$anchor2,spread_props({"data-slot":"sheet-title",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}function Sheet_description($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment); -{let $0=user_derived(()=>cn$1("text-sm text-muted-foreground",$$props.class));component(node2,()=>Dialog_description$1,($$anchor2,SheetPrimitive_Description)=>{SheetPrimitive_Description($$anchor2,spread_props({"data-slot":"sheet-description",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}const Root$3=Dialog;var root_3$Q=from_html(" ",1),root_7$r=from_html(''),root_11$e=from_html("

"),root_9$k=from_html(" ",1),root_15$9=from_html(''),root_16$5=from_html("

PDFs will be converted to text. Image-based PDFs may not work properly.

"),root_14$5=from_html(" ",1),root_20$4=from_html(''),root_21$3=from_html("

"),root_19$9=from_html(" ",1),root_23$4=from_html(''),root_2$11=from_html(' ',1),root_1$M=from_html(" ",1),root$1r=from_html("
");function ChatFormActionAddSheet($$anchor,$$props){ +slice(0,idx),{role:MessageRole.ASSISTANT,content:originalContent,reasoning_content:originalReasoning||void 0}];let appendedContent="",appendedReasoning="",hasReceivedContent=!1;const updateStreamingContent=fullContent=>{this.setChatStreaming(msg.convId,fullContent,msg.id),conversationsStore.updateMessageAtIndex(idx,{content:fullContent})},abortController=this.getOrCreateAbortController(msg.convId);await ChatService.sendMessage(contextWithContinue,{...this.getApiOptions(),continueFinalMessage:!0, +onChunk:chunk=>{appendedContent+=chunk,hasReceivedContent=!0,updateStreamingContent(originalContent+appendedContent)},onReasoningChunk:chunk=>{appendedReasoning+=chunk,hasReceivedContent=!0,this.setChatStreaming(msg.convId,originalContent+appendedContent,msg.id),conversationsStore.updateMessageAtIndex(idx,{reasoningContent:originalReasoning+appendedReasoning})},onTimings:(timings,promptProgress)=>{const tokensPerSecond=timings?.predicted_ms&&timings?.predicted_n?timings.predicted_n/timings.predicted_ms* +1e3:0;this.updateProcessingStateFromTimings({prompt_n:timings?.prompt_n||0,prompt_ms:timings?.prompt_ms,predicted_n:timings?.predicted_n||0,predicted_per_second:tokensPerSecond,cache_n:timings?.cache_n||0,prompt_progress:promptProgress},msg.convId)},onComplete:async(finalContent,reasoningContent,timings)=>{const finalAppendedContent=hasReceivedContent?appendedContent:finalContent||"",finalAppendedReasoning=hasReceivedContent?appendedReasoning:reasoningContent||"",fullContent=originalContent+finalAppendedContent, +fullReasoning=originalReasoning+finalAppendedReasoning||void 0;await DatabaseService.updateMessage(msg.id,{content:fullContent,reasoningContent:fullReasoning,timestamp:Date.now(),timings}),conversationsStore.updateMessageAtIndex(idx,{content:fullContent,reasoningContent:fullReasoning,timestamp:Date.now(),timings}),conversationsStore.updateConversationTimestamp(),this.setChatLoading(msg.convId,!1),this.clearChatStreaming(msg.convId),this.setProcessingState(msg.convId,null)},onError:async error2=>{ +if(isAbortError(error2)){hasReceivedContent&&appendedContent&&(await DatabaseService.updateMessage(msg.id,{content:originalContent+appendedContent,reasoningContent:originalReasoning+appendedReasoning||void 0,timestamp:Date.now()}),conversationsStore.updateMessageAtIndex(idx,{content:originalContent+appendedContent,reasoningContent:originalReasoning+appendedReasoning||void 0,timestamp:Date.now()})),this.setChatLoading(msg.convId,!1),this.clearChatStreaming(msg.convId),this.setProcessingState(msg. +convId,null);return}console.error("Continue generation error:",error2),conversationsStore.updateMessageAtIndex(idx,{content:originalContent}),await DatabaseService.updateMessage(msg.id,{content:originalContent}),this.setChatLoading(msg.convId,!1),this.clearChatStreaming(msg.convId),this.setProcessingState(msg.convId,null),this.showErrorDialog({type:error2.name==="TimeoutError"?ErrorDialogType.TIMEOUT:ErrorDialogType.SERVER,message:error2.message})}},msg.convId,abortController.signal)}catch(error2){ +isAbortError(error2)||console.error("Failed to continue message:",error2),activeConv&&this.setChatLoading(activeConv.id,!1)}}async editAssistantMessage(messageId,newContent,shouldBranch){const activeConv=conversationsStore.activeConversation;if(!activeConv||this.isChatLoadingInternal(activeConv.id))return;const result=this.getMessageByIdWithRole(messageId,MessageRole.ASSISTANT);if(!result)return;const{message:msg,index:idx}=result;try{if(shouldBranch){const newMessage=await DatabaseService.createMessageBranch( +{convId:msg.convId,type:msg.type,timestamp:Date.now(),role:msg.role,content:newContent,toolCalls:msg.toolCalls||"",children:[],model:msg.model},msg.parent);await conversationsStore.updateCurrentNode(newMessage.id)}else await DatabaseService.updateMessage(msg.id,{content:newContent}),conversationsStore.updateMessageAtIndex(idx,{content:newContent});conversationsStore.updateConversationTimestamp(),await conversationsStore.refreshActiveMessages()}catch(error2){console.error("Failed to edit assistan\ +t message:",error2)}}async editUserMessagePreserveResponses(messageId,newContent,newExtras){const activeConv=conversationsStore.activeConversation;if(!activeConv)return;const result=this.getMessageByIdWithRole(messageId,MessageRole.USER);if(!result)return;const{message:msg,index:idx}=result;try{const updateData={content:newContent};newExtras!==void 0&&(updateData.extra=JSON.parse(JSON.stringify(newExtras))),await DatabaseService.updateMessage(messageId,updateData),conversationsStore.updateMessageAtIndex( +idx,updateData);const rootMessage=(await conversationsStore.getConversationMessages(activeConv.id)).find(m=>m.type==="root"&&m.parent===null);rootMessage&&msg.parent===rootMessage.id&&newContent.trim()&&await conversationsStore.updateConversationTitleWithConfirmation(activeConv.id,generateConversationTitle(newContent,!!config$1().titleGenerationUseFirstLine)),conversationsStore.updateConversationTimestamp()}catch(error2){console.error("Failed to edit user message:",error2)}}async editMessageWithBranching(messageId,newContent,newExtras){ +const activeConv=conversationsStore.activeConversation;if(!activeConv||this.isChatLoadingInternal(activeConv.id))return;let result=this.getMessageByIdWithRole(messageId,MessageRole.USER);if(result||(result=this.getMessageByIdWithRole(messageId,MessageRole.SYSTEM)),!result)return;const{message:msg,index:idx}=result;try{const allMessages=await conversationsStore.getConversationMessages(activeConv.id),rootMessage=allMessages.find(m=>m.type==="root"&&m.parent===null),isFirstUserMessage=msg.role===MessageRole. +USER&&rootMessage&&msg.parent===rootMessage.id,extrasToUse=newExtras!==void 0?JSON.parse(JSON.stringify(newExtras)):msg.extra?JSON.parse(JSON.stringify(msg.extra)):void 0;let messageIdForResponse;const dbMsg=findMessageById(allMessages,msg.id);if(dbMsg?dbMsg.children.length>0:msg.children.length>0){const parentId=msg.parent||rootMessage?.id;if(!parentId)return;const newMessage=await DatabaseService.createMessageBranch({convId:msg.convId,type:msg.type,timestamp:Date.now(),role:msg.role,content:newContent, +toolCalls:msg.toolCalls||"",children:[],extra:extrasToUse,model:msg.model},parentId);await conversationsStore.updateCurrentNode(newMessage.id),messageIdForResponse=newMessage.id}else{const updates={content:newContent,timestamp:Date.now(),extra:extrasToUse};await DatabaseService.updateMessage(msg.id,updates),conversationsStore.updateMessageAtIndex(idx,updates),messageIdForResponse=msg.id}conversationsStore.updateConversationTimestamp(),isFirstUserMessage&&newContent.trim()&&await conversationsStore. +updateConversationTitleWithConfirmation(activeConv.id,generateConversationTitle(newContent,!!config$1().titleGenerationUseFirstLine)),await conversationsStore.refreshActiveMessages(),msg.role===MessageRole.USER&&await this.generateResponseForMessage(messageIdForResponse)}catch(error2){console.error("Failed to edit message with branching:",error2)}}async generateResponseForMessage(userMessageId){const activeConv=conversationsStore.activeConversation;if(activeConv){this.showErrorDialog(null),this. +setChatLoading(activeConv.id,!0),this.clearChatStreaming(activeConv.id);try{const allMessages=await conversationsStore.getConversationMessages(activeConv.id),conversationPath=filterByLeafNodeId(allMessages,userMessageId,!1),assistantMessage=await DatabaseService.createMessageBranch({convId:activeConv.id,type:MessageType.TEXT,timestamp:Date.now(),role:MessageRole.ASSISTANT,content:"",toolCalls:"",children:[],model:null},userMessageId);conversationsStore.addMessageToActive(assistantMessage),await this. +streamChatCompletion(conversationPath,assistantMessage)}catch(error2){console.error("Failed to generate response:",error2),this.setChatLoading(activeConv.id,!1)}}}getContextTotal(){const activeConvId=this.activeConversationId,activeState=activeConvId?this.getProcessingState(activeConvId):null;if(activeState&&typeof activeState.contextTotal=="number"&&activeState.contextTotal>0)return activeState.contextTotal;if(isRouterMode()){const modelContextSize=selectedModelContextSize();if(typeof modelContextSize== +"number"&&modelContextSize>0)return modelContextSize}else{const propsContextSize=contextSize();if(typeof propsContextSize=="number"&&propsContextSize>0)return propsContextSize}return null}updateProcessingStateFromTimings(timingData,conversationId){const processingState=this.parseTimingData(timingData);if(processingState===null){console.warn("Failed to parse timing data - skipping update");return}const targetId=conversationId||this.activeConversationId;targetId&&this.setProcessingState(targetId,processingState)}parseTimingData(timingData){ +const promptTokens=timingData.prompt_n||0,promptMs=timingData.prompt_ms||void 0,predictedTokens=timingData.predicted_n||0,tokensPerSecond=timingData.predicted_per_second||0,cacheTokens=timingData.cache_n||0,promptProgress=timingData.prompt_progress,contextTotal=this.getContextTotal(),currentConfig=config$1(),outputTokensMax=currentConfig.max_tokens||-1,contextUsed=promptTokens+cacheTokens+predictedTokens,outputTokensUsed=predictedTokens,progressCache=promptProgress?.cache||0,progressActualDone=(promptProgress?. +processed??0)-progressCache,progressActualTotal=(promptProgress?.total??0)-progressCache,progressPercent=promptProgress?Math.round(progressActualDone/progressActualTotal*100):void 0;return{status:predictedTokens>0?"generating":promptProgress?"preparing":"idle",tokensDecoded:predictedTokens,tokensRemaining:outputTokensMax-predictedTokens,contextUsed,contextTotal,outputTokensUsed,outputTokensMax,hasNextToken:predictedTokens>0,tokensPerSecond,temperature:currentConfig.temperature??.8,topP:currentConfig. +top_p??.95,speculative:!1,progressPercent,promptProgress,promptTokens,promptMs,cacheTokens}}restoreProcessingStateFromMessages(messages,conversationId){for(let i=messages.length-1;i>=0;i--){const message=messages[i];if(message.role===MessageRole.ASSISTANT&&message.timings){const restoredState=this.parseTimingData({prompt_n:message.timings.prompt_n||0,prompt_ms:message.timings.prompt_ms,predicted_n:message.timings.predicted_n||0,predicted_per_second:message.timings.predicted_n&&message.timings.predicted_ms? +message.timings.predicted_n/message.timings.predicted_ms*1e3:0,cache_n:message.timings.cache_n||0});if(restoredState){this.setProcessingState(conversationId,restoredState);return}}}}getConversationModel(messages){for(let i=messages.length-1;i>=0;i--){const message=messages[i];if(message.role===MessageRole.ASSISTANT&&message.model)return message.model}return null}getApiOptions(){const currentConfig=config$1(),hasValue=value=>value!=null&&value!=="",apiOptions={stream:!0,timings_per_token:!0};if(isRouterMode()){ +const modelName=selectedModelName();modelName&&(apiOptions.model=modelName)}return currentConfig.systemMessage&&(apiOptions.systemMessage=currentConfig.systemMessage),currentConfig.disableReasoningParsing&&(apiOptions.disableReasoningParsing=!0),currentConfig.excludeReasoningFromContext&&(apiOptions.excludeReasoningFromContext=!0),hasValue(currentConfig.temperature)&&(apiOptions.temperature=Number(currentConfig.temperature)),hasValue(currentConfig.max_tokens)&&(apiOptions.max_tokens=Number(currentConfig. +max_tokens)),hasValue(currentConfig.dynatemp_range)&&(apiOptions.dynatemp_range=Number(currentConfig.dynatemp_range)),hasValue(currentConfig.dynatemp_exponent)&&(apiOptions.dynatemp_exponent=Number(currentConfig.dynatemp_exponent)),hasValue(currentConfig.top_k)&&(apiOptions.top_k=Number(currentConfig.top_k)),hasValue(currentConfig.top_p)&&(apiOptions.top_p=Number(currentConfig.top_p)),hasValue(currentConfig.min_p)&&(apiOptions.min_p=Number(currentConfig.min_p)),hasValue(currentConfig.xtc_probability)&& +(apiOptions.xtc_probability=Number(currentConfig.xtc_probability)),hasValue(currentConfig.xtc_threshold)&&(apiOptions.xtc_threshold=Number(currentConfig.xtc_threshold)),hasValue(currentConfig.typ_p)&&(apiOptions.typ_p=Number(currentConfig.typ_p)),hasValue(currentConfig.repeat_last_n)&&(apiOptions.repeat_last_n=Number(currentConfig.repeat_last_n)),hasValue(currentConfig.repeat_penalty)&&(apiOptions.repeat_penalty=Number(currentConfig.repeat_penalty)),hasValue(currentConfig.presence_penalty)&&(apiOptions. +presence_penalty=Number(currentConfig.presence_penalty)),hasValue(currentConfig.frequency_penalty)&&(apiOptions.frequency_penalty=Number(currentConfig.frequency_penalty)),hasValue(currentConfig.dry_multiplier)&&(apiOptions.dry_multiplier=Number(currentConfig.dry_multiplier)),hasValue(currentConfig.dry_base)&&(apiOptions.dry_base=Number(currentConfig.dry_base)),hasValue(currentConfig.dry_allowed_length)&&(apiOptions.dry_allowed_length=Number(currentConfig.dry_allowed_length)),hasValue(currentConfig. +dry_penalty_last_n)&&(apiOptions.dry_penalty_last_n=Number(currentConfig.dry_penalty_last_n)),currentConfig.samplers&&(apiOptions.samplers=currentConfig.samplers),apiOptions.backend_sampling=currentConfig.backend_sampling,currentConfig.custom&&(apiOptions.custom=currentConfig.custom),apiOptions}cancelPreEncode(){this.preEncodeAbortController&&(this.preEncodeAbortController.abort(),this.preEncodeAbortController=null)}async triggerPreEncode(allMessages,assistantMessage,assistantContent,model,excludeReasoning){ +this.cancelPreEncode(),this.preEncodeAbortController=new AbortController;const signal=this.preEncodeAbortController.signal;try{if(!await ChatService.areAllSlotsIdle(model,signal)||signal.aborted)return;const messagesWithAssistant=[...allMessages,{...assistantMessage,content:assistantContent}];await ChatService.preEncode(messagesWithAssistant,model,excludeReasoning,signal)}catch(err){isAbortError(err)||console.warn("[ChatStore] Pre-encode failed:",err)}}}const chatStore=new ChatStore,activeProcessingState=()=>chatStore. +activeProcessingState,errorDialog=()=>chatStore.errorDialogState,getAddFilesHandler=()=>chatStore.getAddFilesHandler(),getAllLoadingChats=()=>chatStore.getAllLoadingChats(),isChatStreaming=()=>chatStore.isStreaming(),isEditing=()=>chatStore.isEditing(),isLoading=()=>chatStore.isLoading,pendingEditMessageId=()=>chatStore.pendingEditMessageId,chatPendingMessageContent=convId=>chatStore.pendingMessageContent(convId),chatPendingMessageExtras=convId=>chatStore.pendingMessageExtras(convId),chatClearPendingMessage=convId=>chatStore. +clearPendingMessage(convId),chatInjectPendingMessage=(convId,content2,extras)=>chatStore.injectPendingMessage(convId,content2,extras);var root$1u=from_html('
',1);function ChatForm($$anchor,$$props){push$1($$props,!0);let attachments=prop($$props,"attachments",19,()=>[]),className=prop( +$$props,"class",3,""),disabled=prop($$props,"disabled",3,!1),isLoading2=prop($$props,"isLoading",3,!1),placeholder=prop($$props,"placeholder",3,"Type a message..."),showMcpPromptButton=prop($$props,"showMcpPromptButton",3,!1),showAddButton=prop($$props,"showAddButton",3,!0),showModelSelector=prop($$props,"showModelSelector",3,!0),uploadedFiles=prop($$props,"uploadedFiles",31,()=>proxy([])),value=prop($$props,"value",15,""),audioRecorder,chatFormActionsRef=state$1(void 0),fileInputRef=state$1(void 0), +pickersRef=state$1(void 0),textareaRef=state$1(void 0),isRecording=state$1(!1),recordingSupported=state$1(!1),isPromptPickerOpen=state$1(!1),promptSearchQuery=state$1(""),isInlineResourcePickerOpen=state$1(!1),resourceSearchQuery=state$1(""),isResourceDialogOpen=state$1(!1),preSelectedResourceUri=state$1(void 0),currentConfig=user_derived(config$1),pasteLongTextToFileLength=user_derived(()=>{const n=Number(get$3(currentConfig).pasteLongTextToFileLen);return Number.isNaN(n)?Number(SETTING_CONFIG_DEFAULT. +pasteLongTextToFileLen):n}),isRouter=user_derived(isRouterMode),conversationModel=user_derived(()=>chatStore.getConversationModel(activeMessages())),activeModelId=user_derived(()=>{const options=modelOptions();if(!get$3(isRouter))return options.length>0?options[0].model:null;const selectedId=selectedModelId();if(selectedId){const model=options.find(m=>m.id===selectedId);if(model)return model.model}if(get$3(conversationModel)){const model=options.find(m=>m.model===get$3(conversationModel));if(model) +return model.model}return null}),hasModelSelected=user_derived(()=>!get$3(isRouter)||!!get$3(conversationModel)||!!selectedModelId()),hasLoadingAttachments=user_derived(()=>uploadedFiles().some(f=>f.isLoading)),hasAttachments=user_derived(()=>attachments()&&attachments().length>0||uploadedFiles()&&uploadedFiles().length>0),canSubmit=user_derived(()=>value().trim().length>0||get$3(hasAttachments));onMount$1(()=>{set$1(recordingSupported,isAudioRecordingSupported(),!0),audioRecorder=new AudioRecorder}); +function focus2(){get$3(textareaRef)?.focus()}function resetTextareaHeight(){get$3(textareaRef)?.resetHeight()}function openModelSelector(){get$3(chatFormActionsRef)?.openModelSelector()}function checkModelSelected(){return get$3(hasModelSelected)?!0:(get$3(chatFormActionsRef)?.openModelSelector(),!1)}function handleFileSelect(files){$$props.onFilesAdd?.(files)}function handleFileUpload(){get$3(fileInputRef)?.click()}function handleFileRemove(fileId){if(fileId.startsWith("attachment-")){const index2=parseInt( +fileId.replace("attachment-",""),10);!isNaN(index2)&&index2>=0&&index2item.kind==="file").map(item=>item.getAsFile()).filter(file=>file!==null);if(files.length>0){event2.preventDefault(),$$props.onFilesAdd?.(files);return}const text2=event2.clipboardData.getData(MimeTypeText.PLAIN);if(text2.startsWith(CLIPBOARD_CONTENT_QUOTE_PREFIX)){const parsed=parseClipboardContent(text2);if(parsed.textAttachments.length>0||parsed. +mcpPromptAttachments.length>0){if(event2.preventDefault(),value(parsed.message),$$props.onValueChange?.(parsed.message),parsed.textAttachments.length>0){const attachmentFiles=parsed.textAttachments.map(att=>new File([att.content],att.name,{type:MimeTypeText.PLAIN}));$$props.onFilesAdd?.(attachmentFiles)}if(parsed.mcpPromptAttachments.length>0){const mcpPromptFiles=parsed.mcpPromptAttachments.map(att=>({id:uuid$1(),name:att.name,size:att.content.length,type:SpecialFileType.MCP_PROMPT,file:new File( +[att.content],`${att.name}${FileExtensionText.TXT}`,{type:MimeTypeText.PLAIN}),isLoading:!1,textContent:att.content,mcpPrompt:{serverName:att.serverName,promptName:att.promptName,arguments:att.arguments}}));uploadedFiles([...uploadedFiles(),...mcpPromptFiles]),$$props.onUploadedFilesChange?.(uploadedFiles())}setTimeout(()=>{get$3(textareaRef)?.focus()},10);return}}if(text2.length>0&&get$3(pasteLongTextToFileLength)>0&&text2.length>get$3(pasteLongTextToFileLength)){event2.preventDefault();const textFile=new File( +[text2],"Pasted",{type:MimeTypeText.PLAIN});$$props.onFilesAdd?.([textFile])}}function handlePromptLoadStart(placeholderId,promptInfo,args){value().startsWith(PROMPT_TRIGGER_PREFIX)&&(value(""),$$props.onValueChange?.("")),set$1(isPromptPickerOpen,!1),set$1(promptSearchQuery,"");const promptName=promptInfo.title||promptInfo.name,placeholder2={id:placeholderId,name:promptName,size:INITIAL_FILE_SIZE,type:SpecialFileType.MCP_PROMPT,file:new File([],"loading"),isLoading:!0,mcpPrompt:{serverName:promptInfo. +serverName,promptName:promptInfo.name,arguments:args?{...args}:void 0}};uploadedFiles([...uploadedFiles(),placeholder2]),$$props.onUploadedFilesChange?.(uploadedFiles()),get$3(textareaRef)?.focus()}function handlePromptLoadComplete(placeholderId,result){const promptText=result.messages?.map(msg=>typeof msg.content=="string"?msg.content:msg.content.type===ContentPartType.TEXT?msg.content.text:"").filter(Boolean).join(PROMPT_CONTENT_SEPARATOR);uploadedFiles(uploadedFiles().map(f=>f.id===placeholderId? +{...f,isLoading:!1,textContent:promptText,size:promptText.length,file:new File([promptText],`${f.name}${FileExtensionText.TXT}`,{type:MimeTypeText.PLAIN})}:f)),$$props.onUploadedFilesChange?.(uploadedFiles())}function handlePromptLoadError(placeholderId,error2){uploadedFiles(uploadedFiles().map(f=>f.id===placeholderId?{...f,isLoading:!1,loadError:error2}:f)),$$props.onUploadedFilesChange?.(uploadedFiles())}function handlePromptPickerClose(){set$1(isPromptPickerOpen,!1),set$1(promptSearchQuery,""), +get$3(textareaRef)?.focus()}function handleInlineResourcePickerClose(){set$1(isInlineResourcePickerOpen,!1),set$1(resourceSearchQuery,""),get$3(textareaRef)?.focus()}function handleInlineResourceSelect(){value().startsWith(RESOURCE_TRIGGER_PREFIX)&&(value(""),$$props.onValueChange?.("")),set$1(isInlineResourcePickerOpen,!1),set$1(resourceSearchQuery,""),get$3(textareaRef)?.focus()}function handleBrowseResources(){set$1(isInlineResourcePickerOpen,!1),set$1(resourceSearchQuery,""),value().startsWith( +RESOURCE_TRIGGER_PREFIX)&&(value(""),$$props.onValueChange?.("")),set$1(isResourceDialogOpen,!0)}async function handleMicClick(){if(!audioRecorder||!get$3(recordingSupported)){console.warn("Audio recording not supported");return}if(get$3(isRecording)){set$1(isRecording,!1);try{const audioBlob=await audioRecorder.stopRecording(),wavBlob=await convertToWav(audioBlob),audioFile=createAudioFile(wavBlob);$$props.onFilesAdd?.([audioFile])}catch(error2){console.error("Failed to stop recording:",error2)}}else +try{await audioRecorder.startRecording(),set$1(isRecording,!0)}catch(error2){console.error("Failed to start recording:",error2)}}var $$exports={focus:focus2,resetTextareaHeight,openModelSelector,checkModelSelected},fragment=root$1u(),node2=first_child(fragment);bind_this(ChatFormFileInputInvisible(node2,{onFileSelect:handleFileSelect}),$$value=>set$1(fileInputRef,$$value,!0),()=>get$3(fileInputRef));var form=sibling(node2,2),node_1=child(form);bind_this(ChatFormPickers(node_1,{get isPromptPickerOpen(){ +return get$3(isPromptPickerOpen)},get promptSearchQuery(){return get$3(promptSearchQuery)},get isInlineResourcePickerOpen(){return get$3(isInlineResourcePickerOpen)},get resourceSearchQuery(){return get$3(resourceSearchQuery)},onPromptPickerClose:handlePromptPickerClose,onInlineResourcePickerClose:handleInlineResourcePickerClose,onInlineResourceSelect:handleInlineResourceSelect,onPromptLoadStart:handlePromptLoadStart,onPromptLoadComplete:handlePromptLoadComplete,onPromptLoadError:handlePromptLoadError, +onInlineResourceBrowse:handleBrowseResources}),$$value=>set$1(pickersRef,$$value,!0),()=>get$3(pickersRef));var div=sibling(node_1,2),node_2=child(div);{let $0=user_derived(()=>get$3(activeModelId)??void 0);ChatAttachmentsList(node_2,{get attachments(){return attachments()},onFileRemove:handleFileRemove,limitToSingleRow:!0,class:"py-5",style:"scroll-padding: 1rem;",get activeModelId(){return get$3($0)},get uploadedFiles(){return uploadedFiles()},set uploadedFiles($$value){uploadedFiles($$value)}})} +var div_1=sibling(node_2,2),node_3=child(div_1);bind_this(ChatFormTextarea(node_3,{class:"px-5 py-1.5 md:pt-0",onKeydown:handleKeydown,onInput:()=>{handleInput(),$$props.onValueChange?.(value())},get disabled(){return disabled()},get placeholder(){return placeholder()},get value(){return value()},set value($$value){value($$value)}}),$$value=>set$1(textareaRef,$$value,!0),()=>get$3(textareaRef));var node_4=sibling(node_3,2);{var consequent=$$anchor2=>{ChatFormMcpResourcesList($$anchor2,{class:"mb\ +-3",onResourceClick:uri2=>{set$1(preSelectedResourceUri,uri2,!0),set$1(isResourceDialogOpen,!0)}})},d2=user_derived(()=>mcpHasResourceAttachments());if_block(node_4,$$render=>{get$3(d2)&&$$render(consequent)})}var node_5=sibling(node_4,2);{let $0=user_derived(()=>showMcpPromptButton()?()=>set$1(isPromptPickerOpen,!0):void 0);bind_this(ChatFormActions(node_5,{class:"px-3",get canSend(){return get$3(canSubmit)},get disabled(){return disabled()},get isLoading(){return isLoading2()},get isRecording(){ +return get$3(isRecording)},get showAddButton(){return showAddButton()},get showModelSelector(){return showModelSelector()},get uploadedFiles(){return uploadedFiles()},onFileUpload:handleFileUpload,onMicClick:handleMicClick,get onStop(){return $$props.onStop},onSystemPromptClick:()=>$$props.onSystemPromptClick?.({message:value(),files:uploadedFiles()}),get onMcpPromptClick(){return get$3($0)},onMcpResourcesClick:()=>set$1(isResourceDialogOpen,!0)}),$$value=>set$1(chatFormActionsRef,$$value,!0),()=>get$3( +chatFormActionsRef))}reset(div_1),reset(div),reset(form);var node_6=sibling(form,2);return DialogMcpResourcesBrowser(node_6,{get preSelectedUri(){return get$3(preSelectedResourceUri)},onAttach:resource=>{mcpStore.attachResource(resource.uri)},onOpenChange:newOpen=>{newOpen||set$1(preSelectedResourceUri,void 0)},get open(){return get$3(isResourceDialogOpen)},set open($$value){set$1(isResourceDialogOpen,$$value,!0)}}),template_effect(()=>{set_class(form,1,`relative ${className()??""}`),set_class(div, +1,`${INPUT_CLASSES??""} overflow-hidden rounded-3xl backdrop-blur-md ${disabled()?"cursor-not-allowed opacity-60":""}`)}),event("submit",form,event2=>{event2.preventDefault(),!(!get$3(canSubmit)||disabled()||get$3(hasLoadingAttachments))&&$$props.onSubmit?.()}),event("paste",div_1,handlePaste),append($$anchor,fragment),pop($$exports)}function Dropdown_menu_content($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),sideOffset=prop($$props,"sideOffset",3,4),restProps=rest_props( +$$props,["$$slots","$$events","$$legacy","ref","sideOffset","portalProps","class"]);var fragment=comment$2(),node2=first_child(fragment);component(node2,()=>Portal$2,($$anchor2,DropdownMenuPrimitive_Portal)=>{DropdownMenuPrimitive_Portal($$anchor2,spread_props(()=>$$props.portalProps,{children:($$anchor3,$$slotProps)=>{var fragment_1=comment$2(),node_1=first_child(fragment_1);{let $0=user_derived(()=>cn$1("z-50 max-h-(--bits-dropdown-menu-content-available-height) min-w-[8rem] origin-(--bits-dro\ +pdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-popover-foreground shadow-md outline-none data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:fill-mode-forwards data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=op\ +en]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20",$$props.class));component(node_1,()=>Dropdown_menu_content$1,($$anchor4,DropdownMenuPrimitive_Content)=>{DropdownMenuPrimitive_Content($$anchor4,spread_props({"data-slot":"dropdown-menu-content",get sideOffset(){return sideOffset()},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor3,fragment_1)},$$slots:{default:!0}}))}),append($$anchor,fragment),pop()}function Dropdown_menu_item($$anchor,$$props){ +push$1($$props,!0);let ref2=prop($$props,"ref",15,null),variant=prop($$props,"variant",3,"default"),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","inset","variant"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("relative flex cursor-pointer items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-highlighted:bg-accent data-highlighted:text-accent-foreground data-[disabled]:pointer-events-none data-[disab\ +led]:opacity-50 data-[inset]:pl-8 data-[variant=destructive]:text-destructive data-[variant=destructive]:data-highlighted:bg-destructive/10 data-[variant=destructive]:data-highlighted:text-destructive dark:data-[variant=destructive]:data-highlighted:bg-destructive/20 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 [&_svg:not([class*='text-'])]:text-muted-foreground data-[variant=destructive]:*:[svg]:!text-destructive",$$props.class));component(node2,()=>Menu_item, +($$anchor2,DropdownMenuPrimitive_Item)=>{DropdownMenuPrimitive_Item($$anchor2,spread_props({"data-slot":"dropdown-menu-item",get"data-inset"(){return $$props.inset},get"data-variant"(){return variant()},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}function Dropdown_menu_separator($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events", +"$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("-mx-1 my-1 h-px bg-border/20",$$props.class));component(node2,()=>Menu_separator,($$anchor2,DropdownMenuPrimitive_Separator)=>{DropdownMenuPrimitive_Separator($$anchor2,spread_props({"data-slot":"dropdown-menu-separator",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}function Dropdown_menu_trigger($$anchor,$$props){ +push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref"]);var fragment=comment$2(),node2=first_child(fragment);component(node2,()=>Menu_trigger,($$anchor2,DropdownMenuPrimitive_Trigger)=>{DropdownMenuPrimitive_Trigger($$anchor2,spread_props({"data-slot":"dropdown-menu-trigger"},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))}),append($$anchor,fragment),pop()}function Dropdown_menu_sub_content($$anchor,$$props){ +push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("z-50 max-h-(--bits-dropdown-menu-content-available-height) min-w-[8rem] origin-(--bits-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border border-border bg-popover p-1.5 text-popover-foreground shadow-md outline-none data-[side=bottom]:slide-\ +in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:fill-mode-forwards data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 dark:border-border/20",$$props.class));component(node2,()=>Menu_sub_content,($$anchor2,DropdownMenuPrimitive_SubContent)=>{DropdownMenuPrimitive_SubContent( +$$anchor2,spread_props({"data-slot":"dropdown-menu-sub-content",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}var root_1$P=from_html(" ",1);function Dropdown_menu_sub_trigger($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","inset","children"]);var fragment=comment$2(),node2=first_child(fragment);{ +let $0=user_derived(()=>cn$1("flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-highlighted:bg-accent data-highlighted:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 data-[state=open]:bg-accent data-[state=open]:text-accent-foreground [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 [&_svg:not([class*='text-'])]:text-muted-foreground",$$props.class));component( +node2,()=>Menu_sub_trigger,($$anchor2,DropdownMenuPrimitive_SubTrigger)=>{DropdownMenuPrimitive_SubTrigger($$anchor2,spread_props({"data-slot":"dropdown-menu-sub-trigger",get"data-inset"(){return $$props.inset},get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)},children:($$anchor3,$$slotProps)=>{var fragment_1=root_1$P(),node_1=first_child(fragment_1);snippet(node_1,()=>$$props.children??noop$3);var node_2=sibling(node_1,2);Chevron_right(node_2, +{class:"ml-auto size-4"}),append($$anchor3,fragment_1)},$$slots:{default:!0}}))})}append($$anchor,fragment),pop()}const Sub=Menu_sub,Root$4=Menu;function useAttachmentMenu(getFlags,getCallbacks,close2){const modalityFlags=user_derived(getFlags),callbacks=user_derived(()=>{const cbs=getCallbacks(),wrap2=fn=>()=>{close2(),fn?.()};return{[AttachmentAction.FILE_UPLOAD]:wrap2(cbs.onFileUpload),[AttachmentAction.SYSTEM_PROMPT_CLICK]:wrap2(cbs.onSystemPromptClick),[AttachmentAction.MCP_PROMPT_CLICK]:wrap2( +cbs.onMcpPromptClick),[AttachmentAction.MCP_RESOURCES_CLICK]:wrap2(cbs.onMcpResourcesClick)}});function isItemEnabled(enabledWhen){return!enabledWhen||enabledWhen==="always"?!0:!!get$3(modalityFlags)[enabledWhen]}function isItemVisible(visibleWhen){return visibleWhen?!!get$3(modalityFlags)[visibleWhen]:!0}function getSystemMessageTooltip(){return page$1.params.id?"Inject custom system message at the beginning of the conversation":"Add custom system message for a new conversation"}return{get callbacks(){ +return get$3(callbacks)},isItemEnabled,isItemVisible,getSystemMessageTooltip}}var root_6$s=from_html(" ",1),root_10$f=from_html(" ",1),root_11$f=from_html("

"),root_8$r=from_html(" ",1),root_16$6=from_html(" ",1),root_17$8=from_html("

PDFs will be converted to text. Image-based PDFs may not work properly.

"),root_13$d=from_html(" ",1),root_22$2=from_html(" ",1),root_23$5=from_html("

"),root_20$5=from_html( +" ",1),root_26$2=from_html(" ",1),root_3$S=from_html(" ",1),root_1$O=from_html(" ",1),root$1t=from_html("
");function ChatFormActionAddDropdown($$anchor,$$props){push$1($$props,!0);let className=prop($$props,"class",3,""),disabled=prop($$props,"disabled",3,!1),hasAudioModality=prop($$props,"hasAudioModality",3,!1),hasVisionModality=prop($$props,"hasVisionModality",3,!1),hasMcpPromptsSupport=prop($$props,"hasMcpPromptsSupport",3, +!1),hasMcpResourcesSupport=prop($$props,"hasMcpResourcesSupport",3,!1),dropdownOpen=state$1(!1);function handleMcpSettingsClick(){set$1(dropdownOpen,!1),$$props.onMcpSettingsClick?.()}const attachmentMenu=useAttachmentMenu(()=>({hasVisionModality:hasVisionModality(),hasAudioModality:hasAudioModality(),hasMcpPromptsSupport:hasMcpPromptsSupport(),hasMcpResourcesSupport:hasMcpResourcesSupport()}),()=>({onFileUpload:$$props.onFileUpload,onSystemPromptClick:$$props.onSystemPromptClick,onMcpPromptClick:$$props. +onMcpPromptClick,onMcpResourcesClick:$$props.onMcpResourcesClick}),()=>{set$1(dropdownOpen,!1)});var div=root$1t(),node2=child(div);component(node2,()=>Root$4,($$anchor2,DropdownMenu_Root)=>{DropdownMenu_Root($$anchor2,{get open(){return get$3(dropdownOpen)},set open($$value){set$1(dropdownOpen,$$value,!0)},children:($$anchor3,$$slotProps)=>{var fragment=root_1$O(),node_1=first_child(fragment);component(node_1,()=>Dropdown_menu_trigger,($$anchor4,DropdownMenu_Trigger)=>{DropdownMenu_Trigger($$anchor4, +{name:"Attach files",get disabled(){return disabled()},children:($$anchor5,$$slotProps2)=>{var fragment_1=comment$2(),node_2=first_child(fragment_1);snippet(node_2,()=>$$props.trigger,()=>({disabled:disabled()})),append($$anchor5,fragment_1)},$$slots:{default:!0}})});var node_3=sibling(node_1,2);component(node_3,()=>Dropdown_menu_content,($$anchor4,DropdownMenu_Content)=>{DropdownMenu_Content($$anchor4,{align:"start",class:"w-48",children:($$anchor5,$$slotProps2)=>{var fragment_2=root_3$S(),node_4=first_child( +fragment_2);each(node_4,17,()=>ATTACHMENT_FILE_ITEMS,item=>item.id,($$anchor6,item)=>{const enabled=user_derived(()=>attachmentMenu.isItemEnabled(get$3(item).enabledWhen));var fragment_3=comment$2(),node_5=first_child(fragment_3);{var consequent=$$anchor7=>{var fragment_4=comment$2(),node_6=first_child(fragment_4);{let $0=user_derived(()=>get$3(item).class??"");component(node_6,()=>Dropdown_menu_item,($$anchor8,DropdownMenu_Item)=>{DropdownMenu_Item($$anchor8,{get class(){return`${get$3($0)??""}\ + flex cursor-pointer items-center gap-2`},onclick:()=>attachmentMenu.callbacks[get$3(item).action](),children:($$anchor9,$$slotProps3)=>{var fragment_5=root_6$s(),node_7=first_child(fragment_5);component(node_7,()=>get$3(item).icon,($$anchor10,item_icon)=>{item_icon($$anchor10,{class:"h-4 w-4"})});var span=sibling(node_7,2),text2=child(span,!0);reset(span),template_effect(()=>set_text(text2,get$3(item).label)),append($$anchor9,fragment_5)},$$slots:{default:!0}})})}append($$anchor7,fragment_4)},consequent_1=$$anchor7=>{ +var fragment_6=comment$2(),node_8=first_child(fragment_6);component(node_8,()=>Root$5,($$anchor8,Tooltip_Root)=>{Tooltip_Root($$anchor8,{get delayDuration(){return TOOLTIP_DELAY_DURATION},children:($$anchor9,$$slotProps3)=>{var fragment_7=root_8$r(),node_9=first_child(fragment_7);component(node_9,()=>Tooltip_trigger,($$anchor10,Tooltip_Trigger)=>{Tooltip_Trigger($$anchor10,{class:"w-full",children:($$anchor11,$$slotProps4)=>{var fragment_8=comment$2(),node_10=first_child(fragment_8);{let $0=user_derived( +()=>get$3(item).class??"");component(node_10,()=>Dropdown_menu_item,($$anchor12,DropdownMenu_Item_1)=>{DropdownMenu_Item_1($$anchor12,{get class(){return`${get$3($0)??""} flex cursor-pointer items-center gap-2`},disabled:!0,children:($$anchor13,$$slotProps5)=>{var fragment_9=root_10$f(),node_11=first_child(fragment_9);component(node_11,()=>get$3(item).icon,($$anchor14,item_icon_1)=>{item_icon_1($$anchor14,{class:"h-4 w-4"})});var span_1=sibling(node_11,2),text_1=child(span_1,!0);reset(span_1),template_effect( +()=>set_text(text_1,get$3(item).label)),append($$anchor13,fragment_9)},$$slots:{default:!0}})})}append($$anchor11,fragment_8)},$$slots:{default:!0}})});var node_12=sibling(node_9,2);component(node_12,()=>Tooltip_content,($$anchor10,Tooltip_Content)=>{Tooltip_Content($$anchor10,{side:"right",children:($$anchor11,$$slotProps4)=>{var p2=root_11$f(),text_2=child(p2,!0);reset(p2),template_effect(()=>set_text(text_2,get$3(item).disabledTooltip)),append($$anchor11,p2)},$$slots:{default:!0}})}),append($$anchor9, +fragment_7)},$$slots:{default:!0}})}),append($$anchor7,fragment_6)};if_block(node_5,$$render=>{get$3(enabled)?$$render(consequent):get$3(item).disabledTooltip&&$$render(consequent_1,1)})}append($$anchor6,fragment_3)});var node_13=sibling(node_4,2);{var consequent_3=$$anchor6=>{var fragment_10=comment$2(),node_14=first_child(fragment_10);component(node_14,()=>Root$5,($$anchor7,Tooltip_Root_1)=>{Tooltip_Root_1($$anchor7,{get delayDuration(){return TOOLTIP_DELAY_DURATION},children:($$anchor8,$$slotProps3)=>{ +var fragment_11=root_13$d(),node_15=first_child(fragment_11);component(node_15,()=>Tooltip_trigger,($$anchor9,Tooltip_Trigger_1)=>{Tooltip_Trigger_1($$anchor9,{class:"w-full",children:($$anchor10,$$slotProps4)=>{var fragment_12=comment$2(),node_16=first_child(fragment_12);component(node_16,()=>Dropdown_menu_item,($$anchor11,DropdownMenu_Item_2)=>{DropdownMenu_Item_2($$anchor11,{class:"flex cursor-pointer items-center gap-2",get onclick(){return attachmentMenu.callbacks.onFileUpload},children:($$anchor12,$$slotProps5)=>{ +const pdfItem=user_derived(()=>ATTACHMENT_FILE_ITEMS.find(i=>i.id===AttachmentMenuItemId.PDF));var fragment_13=comment$2(),node_17=first_child(fragment_13);{var consequent_2=$$anchor13=>{var fragment_14=root_16$6(),node_18=first_child(fragment_14);component(node_18,()=>get$3(pdfItem).icon,($$anchor14,pdfItem_icon)=>{pdfItem_icon($$anchor14,{class:"h-4 w-4"})});var span_2=sibling(node_18,2),text_3=child(span_2,!0);reset(span_2),template_effect(()=>set_text(text_3,get$3(pdfItem).label)),append($$anchor13, +fragment_14)};if_block(node_17,$$render=>{get$3(pdfItem)&&$$render(consequent_2)})}append($$anchor12,fragment_13)},$$slots:{default:!0}})}),append($$anchor10,fragment_12)},$$slots:{default:!0}})});var node_19=sibling(node_15,2);component(node_19,()=>Tooltip_content,($$anchor9,Tooltip_Content_1)=>{Tooltip_Content_1($$anchor9,{side:"right",children:($$anchor10,$$slotProps4)=>{var p_1=root_17$8();append($$anchor10,p_1)},$$slots:{default:!0}})}),append($$anchor8,fragment_11)},$$slots:{default:!0}})}), +append($$anchor6,fragment_10)},d2=user_derived(()=>!attachmentMenu.isItemEnabled("hasVisionModality"));if_block(node_13,$$render=>{get$3(d2)&&$$render(consequent_3)})}var node_20=sibling(node_13,2);component(node_20,()=>Dropdown_menu_separator,($$anchor6,DropdownMenu_Separator)=>{DropdownMenu_Separator($$anchor6,{})});var node_21=sibling(node_20,2);each(node_21,17,()=>ATTACHMENT_EXTRA_ITEMS,item=>item.id,($$anchor6,item)=>{var fragment_15=comment$2(),node_22=first_child(fragment_15);{var consequent_4=$$anchor7=>{ +var fragment_16=comment$2(),node_23=first_child(fragment_16);component(node_23,()=>Root$5,($$anchor8,Tooltip_Root_2)=>{Tooltip_Root_2($$anchor8,{get delayDuration(){return TOOLTIP_DELAY_DURATION},children:($$anchor9,$$slotProps3)=>{var fragment_17=root_20$5(),node_24=first_child(fragment_17);component(node_24,()=>Tooltip_trigger,($$anchor10,Tooltip_Trigger_2)=>{Tooltip_Trigger_2($$anchor10,{class:"w-full",children:($$anchor11,$$slotProps4)=>{var fragment_18=comment$2(),node_25=first_child(fragment_18); +component(node_25,()=>Dropdown_menu_item,($$anchor12,DropdownMenu_Item_3)=>{DropdownMenu_Item_3($$anchor12,{class:"flex cursor-pointer items-center gap-2",onclick:()=>attachmentMenu.callbacks[get$3(item).action](),children:($$anchor13,$$slotProps5)=>{var fragment_19=root_22$2(),node_26=first_child(fragment_19);component(node_26,()=>get$3(item).icon,($$anchor14,item_icon_2)=>{item_icon_2($$anchor14,{class:"h-4 w-4"})});var span_3=sibling(node_26,2),text_4=child(span_3,!0);reset(span_3),template_effect( +()=>set_text(text_4,get$3(item).label)),append($$anchor13,fragment_19)},$$slots:{default:!0}})}),append($$anchor11,fragment_18)},$$slots:{default:!0}})});var node_27=sibling(node_24,2);component(node_27,()=>Tooltip_content,($$anchor10,Tooltip_Content_2)=>{Tooltip_Content_2($$anchor10,{side:"right",children:($$anchor11,$$slotProps4)=>{var p_2=root_23$5(),text_5=child(p_2,!0);reset(p_2),template_effect($0=>set_text(text_5,$0),[()=>attachmentMenu.getSystemMessageTooltip()]),append($$anchor11,p_2)}, +$$slots:{default:!0}})}),append($$anchor9,fragment_17)},$$slots:{default:!0}})}),append($$anchor7,fragment_16)};if_block(node_22,$$render=>{get$3(item).id===AttachmentMenuItemId.SYSTEM_MESSAGE&&$$render(consequent_4)})}append($$anchor6,fragment_15)});var node_28=sibling(node_21,2);ChatFormActionAddToolsSubmenu(node_28,{});var node_29=sibling(node_28,2);ChatFormActionAddMcpServersSubmenu(node_29,{onMcpSettingsClick:handleMcpSettingsClick});var node_30=sibling(node_29,2);each(node_30,17,()=>ATTACHMENT_MCP_ITEMS, +item=>item.id,($$anchor6,item)=>{var fragment_20=comment$2(),node_31=first_child(fragment_20);{var consequent_5=$$anchor7=>{var fragment_21=comment$2(),node_32=first_child(fragment_21);component(node_32,()=>Dropdown_menu_item,($$anchor8,DropdownMenu_Item_4)=>{DropdownMenu_Item_4($$anchor8,{class:"flex cursor-pointer items-center gap-2",onclick:()=>attachmentMenu.callbacks[get$3(item).action](),children:($$anchor9,$$slotProps3)=>{var fragment_22=root_26$2(),node_33=first_child(fragment_22);component( +node_33,()=>get$3(item).icon,($$anchor10,item_icon_3)=>{item_icon_3($$anchor10,{class:"h-4 w-4"})});var span_4=sibling(node_33,2),text_6=child(span_4,!0);reset(span_4),template_effect(()=>set_text(text_6,get$3(item).label)),append($$anchor9,fragment_22)},$$slots:{default:!0}})}),append($$anchor7,fragment_21)},d_12=user_derived(()=>attachmentMenu.isItemVisible(get$3(item).visibleWhen));if_block(node_31,$$render=>{get$3(d_12)&&$$render(consequent_5)})}append($$anchor6,fragment_20)}),append($$anchor5, +fragment_2)},$$slots:{default:!0}})}),append($$anchor3,fragment)},$$slots:{default:!0}})}),reset(div),template_effect(()=>set_class(div,1,`flex items-center gap-1 ${className()??""}`)),append($$anchor,div),pop()}function Sheet_overlay($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("fixed inset-0 z-50 bg-black/5\ +0 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:fill-mode-forwards data-[state=open]:animate-in data-[state=open]:fade-in-0",$$props.class));component(node2,()=>Dialog_overlay$1,($$anchor2,SheetPrimitive_Overlay)=>{SheetPrimitive_Overlay($$anchor2,spread_props({"data-slot":"sheet-overlay",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}const sheetVariants=tv({base:`bor\ +der-border/30 dark:border-border/20 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fill-mode-forwards fixed z-50 flex flex-col gap-4 shadow-sm transition ease-in-out data-[state=closed]:duration-300 data-[state=open]:duration-500 ${PANEL_CLASSES}`,variants:{side:{top:"data-[state=closed]:slide-out-to-top data-[state=open]:slide-in-from-top inset-x-0 top-0 h-auto border-b",bottom:"data-[state=closed]:slide-out-to-bottom data-[state=open]:slide-in-from-bottom inse\ +t-x-0 bottom-0 h-auto border-t",left:"data-[state=closed]:slide-out-to-left data-[state=open]:slide-in-from-left inset-y-0 left-0 h-full w-3/4 border-r sm:max-w-sm",right:"data-[state=closed]:slide-out-to-right data-[state=open]:slide-in-from-right inset-y-0 right-0 h-full w-3/4 border-l sm:max-w-sm"}},defaultVariants:{side:"right"}});var root_3$R=from_html(' Close',1),root_2$12=from_html(" ",1),root_1$N=from_html(" ",1);function Sheet_content($$anchor,$$props){ +push$1($$props,!0);let ref2=prop($$props,"ref",15,null),side=prop($$props,"side",3,"right"),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","side","portalProps","children"]);var fragment=comment$2(),node2=first_child(fragment);component(node2,()=>Portal$2,($$anchor2,SheetPrimitive_Portal)=>{SheetPrimitive_Portal($$anchor2,spread_props(()=>$$props.portalProps,{children:($$anchor3,$$slotProps)=>{var fragment_1=root_1$N(),node_1=first_child(fragment_1);Sheet_overlay(node_1, +{});var node_2=sibling(node_1,2);{let $0=user_derived(()=>cn$1(sheetVariants({side:side()}),$$props.class));component(node_2,()=>Dialog_content$1,($$anchor4,SheetPrimitive_Content)=>{SheetPrimitive_Content($$anchor4,spread_props({"data-slot":"sheet-content",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)},children:($$anchor5,$$slotProps2)=>{var fragment_2=root_2$12(),node_3=first_child(fragment_2);snippet(node_3,()=>$$props.children??noop$3); +var node_4=sibling(node_3,2);component(node_4,()=>Dialog_close,($$anchor6,SheetPrimitive_Close)=>{SheetPrimitive_Close($$anchor6,{class:"absolute top-4 right-4 rounded-xs opacity-70 ring-offset-background transition-opacity hover:opacity-100 focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:outline-hidden disabled:pointer-events-none",children:($$anchor7,$$slotProps3)=>{var fragment_3=root_3$R(),node_5=first_child(fragment_3);X(node_5,{class:"size-4"}),next$1( +2),append($$anchor7,fragment_3)},$$slots:{default:!0}})}),append($$anchor5,fragment_2)},$$slots:{default:!0}}))})}append($$anchor3,fragment_1)},$$slots:{default:!0}}))}),append($$anchor,fragment),pop()}var root$1s=from_html("
");function Sheet_header($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class","children"]);var div=root$1s();attribute_effect(div,$0=>({"data-slot":"sheet-header",class:$0, +...restProps}),[()=>cn$1("flex flex-col gap-1.5 p-4",$$props.class)]);var node2=child(div);snippet(node2,()=>$$props.children??noop$3),reset(div),bind_this(div,$$value=>ref2($$value),()=>ref2()),append($$anchor,div),pop()}function Sheet_title($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class"]);var fragment=comment$2(),node2=first_child(fragment);{let $0=user_derived(()=>cn$1("font-semibold text-fo\ +reground",$$props.class));component(node2,()=>Dialog_title$1,($$anchor2,SheetPrimitive_Title)=>{SheetPrimitive_Title($$anchor2,spread_props({"data-slot":"sheet-title",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}function Sheet_description($$anchor,$$props){push$1($$props,!0);let ref2=prop($$props,"ref",15,null),restProps=rest_props($$props,["$$slots","$$events","$$legacy","ref","class"]);var fragment=comment$2(), +node2=first_child(fragment);{let $0=user_derived(()=>cn$1("text-sm text-muted-foreground",$$props.class));component(node2,()=>Dialog_description$1,($$anchor2,SheetPrimitive_Description)=>{SheetPrimitive_Description($$anchor2,spread_props({"data-slot":"sheet-description",get class(){return get$3($0)}},()=>restProps,{get ref(){return ref2()},set ref($$value){ref2($$value)}}))})}append($$anchor,fragment),pop()}const Root$3=Dialog;var root_3$Q=from_html(" ",1),root_7$r=from_html(''),root_10$e=from_html(''),root_11$e=from_html("

"),root_9$k=from_html(" ",1),root_15$9=from_html(''),root_16$5=from_html("

PDFs will be converted to text. Image-based PDFs may not work properly.

"),root_14$5=from_html(" ",1),root_20$4=from_html(''),root_21$3=from_html("

"), +root_19$9=from_html(" ",1),root_23$4=from_html(''),root_2$11=from_html(' ',1),root_1$M=from_html(" ",1),root$1r=from_html("
");function ChatFormActionAddSheet($$anchor,$$props){ push$1($$props,!0);let className=prop($$props,"class",3,""),disabled=prop($$props,"disabled",3,!1),hasAudioModality=prop($$props,"hasAudioModality",3,!1),hasVisionModality=prop($$props,"hasVisionModality",3,!1),hasMcpPromptsSupport=prop($$props,"hasMcpPromptsSupport",3,!1),hasMcpResourcesSupport=prop($$props,"hasMcpResourcesSupport",3,!1),sheetOpen=state$1(!1);const attachmentMenu=useAttachmentMenu(()=>({hasVisionModality:hasVisionModality(),hasAudioModality:hasAudioModality(),hasMcpPromptsSupport:hasMcpPromptsSupport(), hasMcpResourcesSupport:hasMcpResourcesSupport()}),()=>({onFileUpload:$$props.onFileUpload,onSystemPromptClick:$$props.onSystemPromptClick,onMcpPromptClick:$$props.onMcpPromptClick,onMcpResourcesClick:$$props.onMcpResourcesClick}),()=>{set$1(sheetOpen,!1)}),sheetItemClass="flex w-full items-center gap-3 rounded-md px-3 py-2.5 text-left text-sm transition-colors hover:bg-accent active:bg-accent disabled:cursor-not-allowed disabled:opacity-50";var div=root$1r(),node2=child(div);component(node2,()=>Root$3, ($$anchor2,Sheet_Root)=>{Sheet_Root($$anchor2,{get open(){return get$3(sheetOpen)},set open($$value){set$1(sheetOpen,$$value,!0)},children:($$anchor3,$$slotProps)=>{var fragment=root_1$M(),node_1=first_child(fragment);snippet(node_1,()=>$$props.trigger,()=>({disabled:disabled(),onclick:()=>set$1(sheetOpen,!0)}));var node_2=sibling(node_1,2);component(node_2,()=>Sheet_content,($$anchor4,Sheet_Content)=>{Sheet_Content($$anchor4,{side:"bottom",class:"max-h-[85vh] gap-0 overflow-y-auto",children:($$anchor5,$$slotProps2)=>{ diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index 0675ce31d06..73de0d3bba1 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1040,6 +1040,10 @@ json oaicompat_chat_params_parse( inputs.use_jinja = opt.use_jinja; inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", caps["supports_parallel_tool_calls"]); inputs.add_generation_prompt = json_value(body, "add_generation_prompt", true); + const bool continue_final_message = json_value(body, "continue_final_message", false); + if (continue_final_message && inputs.add_generation_prompt) { + throw std::invalid_argument("Cannot set both add_generation_prompt and continue_final_message to true."); + } inputs.reasoning_format = opt.reasoning_format; if (body.contains("reasoning_format")) { inputs.reasoning_format = common_reasoning_format_from_name(body.at("reasoning_format").get()); @@ -1071,7 +1075,10 @@ json oaicompat_chat_params_parse( // if the assistant message appears at the end of list, we do not add end-of-turn token // for ex. this can be useful to modify the reasoning process in reasoning models - bool prefill_assistant_message = !inputs.messages.empty() && inputs.messages.back().role == "assistant" && opt.prefill_assistant; + // continue_final_message is the explicit opt in alias from the vLLM/transformers API, + // equivalent to the prefill_assistant heuristic + bool prefill_assistant_message = !inputs.messages.empty() && inputs.messages.back().role == "assistant" + && (continue_final_message || opt.prefill_assistant); common_chat_msg last_message; if (prefill_assistant_message) { last_message = inputs.messages.back(); diff --git a/tools/server/tests/unit/test_chat_completion.py b/tools/server/tests/unit/test_chat_completion.py index edef0a93b49..243e4160578 100644 --- a/tools/server/tests/unit/test_chat_completion.py +++ b/tools/server/tests/unit/test_chat_completion.py @@ -178,6 +178,45 @@ def test_chat_template_assistant_prefill(prefill, re_prefill): assert res.body["__verbose"]["prompt"] == f" <|start_header_id|>system<|end_header_id|>\n\nBook<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the best book<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{re_prefill}" +def test_chat_template_continue_final_message_vllm_compat(): + """continue_final_message is the vLLM/transformers explicit alias for the prefill_assistant heuristic. + Both must produce the same prompt.""" + global server + server.chat_template = "llama3" + server.debug = True + server.start() + res = server.make_request("POST", "/chat/completions", data={ + "max_tokens": 8, + "add_generation_prompt": False, + "continue_final_message": True, + "messages": [ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + {"role": "assistant", "content": "Whill"}, + ] + }) + assert res.status_code == 200 + assert "__verbose" in res.body + assert res.body["__verbose"]["prompt"] == " <|start_header_id|>system<|end_header_id|>\n\nBook<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the best book<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nWhill" + + +def test_chat_template_continue_final_message_mutual_exclusion(): + """add_generation_prompt and continue_final_message both set to true must be rejected""" + global server + server.chat_template = "llama3" + server.start() + res = server.make_request("POST", "/chat/completions", data={ + "max_tokens": 8, + "add_generation_prompt": True, + "continue_final_message": True, + "messages": [ + {"role": "user", "content": "Hi"}, + {"role": "assistant", "content": "Hello"}, + ] + }) + assert res.status_code == 400 + + def test_apply_chat_template(): global server server.chat_template = "command-r" diff --git a/tools/server/webui/src/lib/services/chat.service.ts b/tools/server/webui/src/lib/services/chat.service.ts index a26de0d5d6e..587b48b6f19 100644 --- a/tools/server/webui/src/lib/services/chat.service.ts +++ b/tools/server/webui/src/lib/services/chat.service.ts @@ -130,7 +130,8 @@ export class ChatService { timings_per_token, // Config options disableReasoningParsing, - excludeReasoningFromContext + excludeReasoningFromContext, + continueFinalMessage } = options; const normalizedMessages: ApiChatMessageData[] = messages @@ -209,6 +210,11 @@ export class ChatService { ? ReasoningFormat.NONE : ReasoningFormat.AUTO; + if (continueFinalMessage) { + requestBody.continue_final_message = true; + requestBody.add_generation_prompt = false; + } + if (temperature !== undefined) requestBody.temperature = temperature; if (max_tokens !== undefined) { // Set max_tokens to -1 (infinite) when explicitly configured as 0 or null diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index 7b4a4e04293..7c34579ca56 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -1301,6 +1301,7 @@ class ChatStore { contextWithContinue, { ...this.getApiOptions(), + continueFinalMessage: true, onChunk: (chunk: string) => { appendedContent += chunk; hasReceivedContent = true; diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index c1a02342357..63a464cf194 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -239,6 +239,9 @@ export interface ApiChatCompletionRequest { // Custom parameters (JSON string) custom?: Record; timings_per_token?: boolean; + // Continuation control (vLLM compat) + add_generation_prompt?: boolean; + continue_final_message?: boolean; } export interface ApiChatCompletionToolCallFunctionDelta { diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts index 8f5f164bd72..1ab7a7e5d54 100644 --- a/tools/server/webui/src/lib/types/settings.d.ts +++ b/tools/server/webui/src/lib/types/settings.d.ts @@ -92,6 +92,8 @@ export interface SettingsChatServiceOptions { // Custom parameters custom?: string; timings_per_token?: boolean; + // Continuation control (vLLM compat), opt in to the explicit continue final message flag + continueFinalMessage?: boolean; // Callbacks onChunk?: (chunk: string) => void; onReasoningChunk?: (chunk: string) => void;