未验证 提交 ab30ccc2 编写于 作者: X xiebaiyuan 提交者: GitHub

[mobile][opencl] suite model male2fe ,support a type element_mul ,test=mobile (#2705)

上级 ec18a843
......@@ -110,4 +110,22 @@ __kernel void channel_mul_d2(__global image2d_t input, __global image2d_t bias,
half4 in = read_imageh(input, sampler, coords);
half4 output = mad(in, biase, 0);
write_imageh(outputImage, coords, output);
}
__kernel void channel_mul_d4(__global image2d_t input, __global image2d_t bias,
__write_only image2d_t outputImage, int w) {
int x = get_global_id(0);
int y = get_global_id(1);
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int2 coords;
coords.x = x;
coords.y = y;
int2 coords_bias;
coords_bias.x = x / w;
coords_bias.y = 0;
half4 in = read_imageh(input, sampler, coords);
half4 biase = read_imageh(bias, sampler, coords_bias);
half4 output = in * biase;
write_imageh(outputImage, coords, output);
}
\ No newline at end of file
......@@ -30,16 +30,23 @@ bool ElementwiseMulKernel<GPU_CL, float>::Init(
if (bias->dims() == param->InputX()->dims()) {
DLOG << "init element wise mul";
this->cl_helper_.AddKernel("elementwise_mul", "elementwise_mul_kernel.cl");
} else if (bias->dims().size() == 1) {
DLOG << "init channel_mul";
this->cl_helper_.AddKernel("channel_mul", "elementwise_mul_kernel.cl");
} else if (bias->dims().size() == 2) {
// etc. input 1 72 28 28
// filter 1 72
DLOG << "init channel_mul_d2";
this->cl_helper_.AddKernel("channel_mul_d2", "elementwise_mul_kernel.cl");
} else {
PADDLE_MOBILE_ENFORCE(false, "element mul not supported yet");
const int bias_dim_size = bias->dims().size();
if (bias_dim_size == 1) {
DLOG << "init channel_mul";
this->cl_helper_.AddKernel("channel_mul", "elementwise_mul_kernel.cl");
} else if (bias_dim_size == 2) {
// etc. input 1 72 28 28
// filter 1 72
DLOG << "init channel_mul_d2";
this->cl_helper_.AddKernel("channel_mul_d2", "elementwise_mul_kernel.cl");
} else if (bias_dim_size == 4) {
DLOG << "init channel_mul_d4";
this->cl_helper_.AddKernel("channel_mul_d4", "elementwise_mul_kernel.cl");
} else {
PADDLE_MOBILE_ENFORCE(false,
"element mul not supported this situation yet");
}
}
return true;
}
......@@ -71,68 +78,103 @@ void ElementwiseMulKernel<GPU_CL, float>::Compute(
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
} else if (bias->dims().size() == 1) {
DLOG << "channel mul";
cl_mem input_image = input->GetCLImage();
cl_mem bias_image = bias->GetCLImage();
cl_mem output_image = output->GetCLImage();
int tensor_w = input->dims()[input->dims().size() - 1];
status = clSetKernelArg(kernel, 0, sizeof(cl_mem),
reinterpret_cast<void *>(&input_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem),
reinterpret_cast<void *>(&bias_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem),
reinterpret_cast<void *>(&output_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(cl_int),
reinterpret_cast<void *>(&tensor_w));
CL_CHECK_ERRORS(status);
auto width = input->ImageWidth();
auto height = input->ImageHeight();
size_t global_work_size[2] = {width, height};
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
} else if (bias->dims().size() == 2) {
DLOG << "channel mul d2";
} else {
const int bias_dim_size = bias->dims().size();
if (bias_dim_size == 1) {
DLOG << "channel mul";
cl_mem input_image = input->GetCLImage();
cl_mem bias_image = bias->GetCLImage();
cl_mem output_image = output->GetCLImage();
int tensor_w = input->dims()[input->dims().size() - 1];
status = clSetKernelArg(kernel, 0, sizeof(cl_mem),
reinterpret_cast<void *>(&input_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem),
reinterpret_cast<void *>(&bias_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem),
reinterpret_cast<void *>(&output_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(cl_int),
reinterpret_cast<void *>(&tensor_w));
CL_CHECK_ERRORS(status);
auto width = input->ImageWidth();
auto height = input->ImageHeight();
size_t global_work_size[2] = {width, height};
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
} else if (bias_dim_size == 2) {
DLOG << "channel mul d2";
// etc. input 1 72 28 28
// filter 1 72 --> 1 1 1 72
DLOG << "input->ImageDims(): " << input->ImageDims();
DLOG << "bias->ImageDims(): " << bias->ImageDims();
DLOG << "out->ImageDims(): " << output->ImageDims();
// etc. input 1 72 28 28
// filter 1 72 --> 1 1 1 72
DLOG << "input->ImageDims(): " << input->ImageDims();
DLOG << "bias->ImageDims(): " << bias->ImageDims();
DLOG << "out->ImageDims(): " << output->ImageDims();
DLOG << "channel mul d2";
cl_mem input_image = input->GetCLImage();
cl_mem bias_image = bias->GetCLImage();
cl_mem output_image = output->GetCLImage();
int tensor_w = input->dims()[input->dims().size() - 1];
status = clSetKernelArg(kernel, 0, sizeof(cl_mem),
reinterpret_cast<void *>(&input_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem),
reinterpret_cast<void *>(&bias_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem),
reinterpret_cast<void *>(&output_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(cl_int),
reinterpret_cast<void *>(&tensor_w));
CL_CHECK_ERRORS(status);
auto width = input->ImageWidth();
auto height = input->ImageHeight();
size_t global_work_size[2] = {width, height};
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
DLOG << "channel mul d2";
cl_mem input_image = input->GetCLImage();
cl_mem bias_image = bias->GetCLImage();
cl_mem output_image = output->GetCLImage();
int tensor_w = input->dims()[input->dims().size() - 1];
status = clSetKernelArg(kernel, 0, sizeof(cl_mem),
reinterpret_cast<void *>(&input_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem),
reinterpret_cast<void *>(&bias_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem),
reinterpret_cast<void *>(&output_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(cl_int),
reinterpret_cast<void *>(&tensor_w));
CL_CHECK_ERRORS(status);
auto width = input->ImageWidth();
auto height = input->ImageHeight();
size_t global_work_size[2] = {width, height};
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
// bias->PrintTensor(*bias);
} else {
PADDLE_MOBILE_ENFORCE(false, "element mul not support this situation yet")
// bias->PrintTensor(*bias);
} else if (bias_dim_size == 4) {
DLOG << "channel_mul_d4";
// etc. input 1 72 28 28
// filter 1 72 --> 1 1 1 72
DLOG << "input->ImageDims(): " << input->ImageDims();
DLOG << "bias->ImageDims(): " << bias->ImageDims();
DLOG << "out->ImageDims(): " << output->ImageDims();
DLOG << "channel mul d2";
cl_mem input_image = input->GetCLImage();
cl_mem bias_image = bias->GetCLImage();
cl_mem output_image = output->GetCLImage();
int tensor_w = input->dims()[input->dims().size() - 1];
status = clSetKernelArg(kernel, 0, sizeof(cl_mem),
reinterpret_cast<void *>(&input_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem),
reinterpret_cast<void *>(&bias_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem),
reinterpret_cast<void *>(&output_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(cl_int),
reinterpret_cast<void *>(&tensor_w));
CL_CHECK_ERRORS(status);
auto width = input->ImageWidth();
auto height = input->ImageHeight();
size_t global_work_size[2] = {width, height};
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
} else {
PADDLE_MOBILE_ENFORCE(false, "element mul not support this situation yet")
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册