Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions ggml/src/ggml-sycl/ggml-sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4999,11 +4999,10 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
case GGML_OP_ACC:
return ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op->src[1]);
case GGML_OP_PAD:
// TODO: add circular padding support for syscl, see https://github.com/ggml-org/llama.cpp/pull/16985
if (ggml_get_op_params_i32(op, 8) != 0) {
return false;
}
return ggml_is_contiguous(op->src[0]);
return true;
case GGML_OP_LEAKY_RELU:
case GGML_OP_TIMESTEP_EMBEDDING:
case GGML_OP_RWKV_WKV6:
Expand Down
54 changes: 27 additions & 27 deletions ggml/src/ggml-sycl/pad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
//#include "common.hpp"
#include "pad.hpp"

static void pad_f32(const float * src, float * dst,
static void pad_f32(const float * src, size_t s00, size_t s01, size_t s02, size_t s03,
float * dst,
const int lp0, const int rp0, const int lp1, const int rp1,
const int lp2, const int rp2, const int lp3, const int rp3,
const int ne0, const int ne1, const int ne2, const int ne3,
Expand All @@ -27,7 +28,6 @@ static void pad_f32(const float * src, float * dst,
return;
}

// operation
const int64_t dst_idx = i3*(ne0*ne1*ne2) + i2*(ne0*ne1) + i1*ne0 + i0;
if ((i0 >= lp0 && i0 < ne0 - rp0) &&
(i1 >= lp1 && i1 < ne1 - rp1) &&
Expand All @@ -37,56 +37,56 @@ static void pad_f32(const float * src, float * dst,
const int64_t i01 = i1 - lp1;
const int64_t i02 = i2 - lp2;
const int64_t i03 = i3 - lp3;
const int64_t ne02 = ne2 - lp2 - rp2;
const int64_t ne01 = ne1 - lp1 - rp1;
const int64_t ne00 = ne0 - lp0 - rp0;

const int64_t src_idx = i03 * (ne00 * ne01 * ne02) +
i02 * (ne00 * ne01) + i01 * ne00 + i00;
const int64_t src_idx = i03 * s03 + i02 * s02 + i01 * s01 + i00 * s00;

dst[dst_idx] = src[src_idx];
} else {
dst[dst_idx] = 0.0f;
}
}

static void pad_f32_sycl(const float *src, float *dst, const int lp0,
const int rp0, const int lp1, const int rp1,
const int lp2, const int rp2, const int lp3,
const int rp3, const int ne0, const int ne1,
const int ne2, const int ne3,
static void pad_f32_sycl(const float * src, size_t s00, size_t s01, size_t s02, size_t s03,
float * dst, const int lp0, const int rp0, const int lp1, const int rp1,
const int lp2, const int rp2, const int lp3, const int rp3,
const int ne0, const int ne1, const int ne2, const int ne3,
dpct::queue_ptr stream) {
int num_blocks = (ne0 + SYCL_PAD_BLOCK_SIZE - 1) / SYCL_PAD_BLOCK_SIZE;
dpct::dim3 gridDim(num_blocks, ne1, ne2 * ne3);
sycl::range<3> grid(ne2 * ne3, ne1, num_blocks);
stream->parallel_for(
sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_PAD_BLOCK_SIZE),
sycl::nd_range<3>(grid * sycl::range<3>(1, 1, SYCL_PAD_BLOCK_SIZE),
sycl::range<3>(1, 1, SYCL_PAD_BLOCK_SIZE)),
[=](sycl::nd_item<3> item_ct1) {
pad_f32(src, dst, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3, ne0, ne1,
ne2, ne3, item_ct1);
pad_f32(src, s00, s01, s02, s03, dst, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3,
ne0, ne1, ne2, ne3, item_ct1);
});
}

void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data;
dpct::queue_ptr stream = ctx.stream();
dpct::queue_ptr stream = ctx.stream();

GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(ggml_is_contiguous(src0));

const int32_t lp0 = ((const int32_t*)(dst->op_params))[0];
const int32_t rp0 = ((const int32_t*)(dst->op_params))[1];
const int32_t lp1 = ((const int32_t*)(dst->op_params))[2];
const int32_t rp1 = ((const int32_t*)(dst->op_params))[3];
const int32_t lp2 = ((const int32_t*)(dst->op_params))[4];
const int32_t rp2 = ((const int32_t*)(dst->op_params))[5];
const int32_t lp3 = ((const int32_t*)(dst->op_params))[6];
const int32_t rp3 = ((const int32_t*)(dst->op_params))[7];
const size_t ts = ggml_type_size(src0->type);
const size_t s00 = src0->nb[0] / ts;
const size_t s01 = src0->nb[1] / ts;
const size_t s02 = src0->nb[2] / ts;
const size_t s03 = src0->nb[3] / ts;

pad_f32_sycl(src0_d, dst_d,
const int32_t lp0 = ((const int32_t *)(dst->op_params))[0];
const int32_t rp0 = ((const int32_t *)(dst->op_params))[1];
const int32_t lp1 = ((const int32_t *)(dst->op_params))[2];
const int32_t rp1 = ((const int32_t *)(dst->op_params))[3];
const int32_t lp2 = ((const int32_t *)(dst->op_params))[4];
const int32_t rp2 = ((const int32_t *)(dst->op_params))[5];
const int32_t lp3 = ((const int32_t *)(dst->op_params))[6];
const int32_t rp3 = ((const int32_t *)(dst->op_params))[7];

pad_f32_sycl(src0_d, s00, s01, s02, s03, dst_d,
lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3,
dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], stream);
}
Expand Down