From ab30ccc27bbcaf52ee08fd42b1bdef605e797266 Mon Sep 17 00:00:00 2001 From: xiebaiyuan Date: Tue, 31 Dec 2019 22:42:43 +0800 Subject: [PATCH] [mobile][opencl] suite model male2fe ,support a type element_mul ,test=mobile (#2705) --- .../cl/cl_kernel/elementwise_mul_kernel.cl | 18 ++ .../kernel/cl/elementwise_mul_kernel.cpp | 178 +++++++++++------- 2 files changed, 128 insertions(+), 68 deletions(-) diff --git a/mobile/src/operators/kernel/cl/cl_kernel/elementwise_mul_kernel.cl b/mobile/src/operators/kernel/cl/cl_kernel/elementwise_mul_kernel.cl index 4895c07d20..b7f4d16c3b 100644 --- a/mobile/src/operators/kernel/cl/cl_kernel/elementwise_mul_kernel.cl +++ b/mobile/src/operators/kernel/cl/cl_kernel/elementwise_mul_kernel.cl @@ -110,4 +110,22 @@ __kernel void channel_mul_d2(__global image2d_t input, __global image2d_t bias, half4 in = read_imageh(input, sampler, coords); half4 output = mad(in, biase, 0); write_imageh(outputImage, coords, output); +} + +__kernel void channel_mul_d4(__global image2d_t input, __global image2d_t bias, + __write_only image2d_t outputImage, int w) { + int x = get_global_id(0); + int y = get_global_id(1); + const sampler_t sampler = + CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; + int2 coords; + coords.x = x; + coords.y = y; + int2 coords_bias; + coords_bias.x = x / w; + coords_bias.y = 0; + half4 in = read_imageh(input, sampler, coords); + half4 biase = read_imageh(bias, sampler, coords_bias); + half4 output = in * biase; + write_imageh(outputImage, coords, output); } \ No newline at end of file diff --git a/mobile/src/operators/kernel/cl/elementwise_mul_kernel.cpp b/mobile/src/operators/kernel/cl/elementwise_mul_kernel.cpp index fd5b9e6bc3..37034a0189 100644 --- a/mobile/src/operators/kernel/cl/elementwise_mul_kernel.cpp +++ b/mobile/src/operators/kernel/cl/elementwise_mul_kernel.cpp @@ -30,16 +30,23 @@ bool ElementwiseMulKernel::Init( if (bias->dims() == param->InputX()->dims()) { DLOG << "init element wise mul"; this->cl_helper_.AddKernel("elementwise_mul", "elementwise_mul_kernel.cl"); - } else if (bias->dims().size() == 1) { - DLOG << "init channel_mul"; - this->cl_helper_.AddKernel("channel_mul", "elementwise_mul_kernel.cl"); - } else if (bias->dims().size() == 2) { - // etc. input 1 72 28 28 - // filter 1 72 - DLOG << "init channel_mul_d2"; - this->cl_helper_.AddKernel("channel_mul_d2", "elementwise_mul_kernel.cl"); } else { - PADDLE_MOBILE_ENFORCE(false, "element mul not supported yet"); + const int bias_dim_size = bias->dims().size(); + if (bias_dim_size == 1) { + DLOG << "init channel_mul"; + this->cl_helper_.AddKernel("channel_mul", "elementwise_mul_kernel.cl"); + } else if (bias_dim_size == 2) { + // etc. input 1 72 28 28 + // filter 1 72 + DLOG << "init channel_mul_d2"; + this->cl_helper_.AddKernel("channel_mul_d2", "elementwise_mul_kernel.cl"); + } else if (bias_dim_size == 4) { + DLOG << "init channel_mul_d4"; + this->cl_helper_.AddKernel("channel_mul_d4", "elementwise_mul_kernel.cl"); + } else { + PADDLE_MOBILE_ENFORCE(false, + "element mul not supported this situation yet"); + } } return true; } @@ -71,68 +78,103 @@ void ElementwiseMulKernel::Compute( clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); CL_CHECK_ERRORS(status); - } else if (bias->dims().size() == 1) { - DLOG << "channel mul"; - cl_mem input_image = input->GetCLImage(); - cl_mem bias_image = bias->GetCLImage(); - cl_mem output_image = output->GetCLImage(); - int tensor_w = input->dims()[input->dims().size() - 1]; - status = clSetKernelArg(kernel, 0, sizeof(cl_mem), - reinterpret_cast(&input_image)); - CL_CHECK_ERRORS(status); - status = clSetKernelArg(kernel, 1, sizeof(cl_mem), - reinterpret_cast(&bias_image)); - CL_CHECK_ERRORS(status); - status = clSetKernelArg(kernel, 2, sizeof(cl_mem), - reinterpret_cast(&output_image)); - CL_CHECK_ERRORS(status); - status = clSetKernelArg(kernel, 3, sizeof(cl_int), - reinterpret_cast(&tensor_w)); - CL_CHECK_ERRORS(status); - auto width = input->ImageWidth(); - auto height = input->ImageHeight(); - size_t global_work_size[2] = {width, height}; - status = - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, - NULL, global_work_size, NULL, 0, NULL, NULL); - CL_CHECK_ERRORS(status); - } else if (bias->dims().size() == 2) { - DLOG << "channel mul d2"; + } else { + const int bias_dim_size = bias->dims().size(); + if (bias_dim_size == 1) { + DLOG << "channel mul"; + cl_mem input_image = input->GetCLImage(); + cl_mem bias_image = bias->GetCLImage(); + cl_mem output_image = output->GetCLImage(); + int tensor_w = input->dims()[input->dims().size() - 1]; + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), + reinterpret_cast(&input_image)); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 1, sizeof(cl_mem), + reinterpret_cast(&bias_image)); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 2, sizeof(cl_mem), + reinterpret_cast(&output_image)); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 3, sizeof(cl_int), + reinterpret_cast(&tensor_w)); + CL_CHECK_ERRORS(status); + auto width = input->ImageWidth(); + auto height = input->ImageHeight(); + size_t global_work_size[2] = {width, height}; + status = + clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, + NULL, global_work_size, NULL, 0, NULL, NULL); + CL_CHECK_ERRORS(status); + } else if (bias_dim_size == 2) { + DLOG << "channel mul d2"; - // etc. input 1 72 28 28 - // filter 1 72 --> 1 1 1 72 - DLOG << "input->ImageDims(): " << input->ImageDims(); - DLOG << "bias->ImageDims(): " << bias->ImageDims(); - DLOG << "out->ImageDims(): " << output->ImageDims(); + // etc. input 1 72 28 28 + // filter 1 72 --> 1 1 1 72 + DLOG << "input->ImageDims(): " << input->ImageDims(); + DLOG << "bias->ImageDims(): " << bias->ImageDims(); + DLOG << "out->ImageDims(): " << output->ImageDims(); - DLOG << "channel mul d2"; - cl_mem input_image = input->GetCLImage(); - cl_mem bias_image = bias->GetCLImage(); - cl_mem output_image = output->GetCLImage(); - int tensor_w = input->dims()[input->dims().size() - 1]; - status = clSetKernelArg(kernel, 0, sizeof(cl_mem), - reinterpret_cast(&input_image)); - CL_CHECK_ERRORS(status); - status = clSetKernelArg(kernel, 1, sizeof(cl_mem), - reinterpret_cast(&bias_image)); - CL_CHECK_ERRORS(status); - status = clSetKernelArg(kernel, 2, sizeof(cl_mem), - reinterpret_cast(&output_image)); - CL_CHECK_ERRORS(status); - status = clSetKernelArg(kernel, 3, sizeof(cl_int), - reinterpret_cast(&tensor_w)); - CL_CHECK_ERRORS(status); - auto width = input->ImageWidth(); - auto height = input->ImageHeight(); - size_t global_work_size[2] = {width, height}; - status = - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, - NULL, global_work_size, NULL, 0, NULL, NULL); - CL_CHECK_ERRORS(status); + DLOG << "channel mul d2"; + cl_mem input_image = input->GetCLImage(); + cl_mem bias_image = bias->GetCLImage(); + cl_mem output_image = output->GetCLImage(); + int tensor_w = input->dims()[input->dims().size() - 1]; + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), + reinterpret_cast(&input_image)); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 1, sizeof(cl_mem), + reinterpret_cast(&bias_image)); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 2, sizeof(cl_mem), + reinterpret_cast(&output_image)); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 3, sizeof(cl_int), + reinterpret_cast(&tensor_w)); + CL_CHECK_ERRORS(status); + auto width = input->ImageWidth(); + auto height = input->ImageHeight(); + size_t global_work_size[2] = {width, height}; + status = + clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, + NULL, global_work_size, NULL, 0, NULL, NULL); + CL_CHECK_ERRORS(status); - // bias->PrintTensor(*bias); - } else { - PADDLE_MOBILE_ENFORCE(false, "element mul not support this situation yet") + // bias->PrintTensor(*bias); + } else if (bias_dim_size == 4) { + DLOG << "channel_mul_d4"; + // etc. input 1 72 28 28 + // filter 1 72 --> 1 1 1 72 + DLOG << "input->ImageDims(): " << input->ImageDims(); + DLOG << "bias->ImageDims(): " << bias->ImageDims(); + DLOG << "out->ImageDims(): " << output->ImageDims(); + + DLOG << "channel mul d2"; + cl_mem input_image = input->GetCLImage(); + cl_mem bias_image = bias->GetCLImage(); + cl_mem output_image = output->GetCLImage(); + int tensor_w = input->dims()[input->dims().size() - 1]; + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), + reinterpret_cast(&input_image)); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 1, sizeof(cl_mem), + reinterpret_cast(&bias_image)); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 2, sizeof(cl_mem), + reinterpret_cast(&output_image)); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 3, sizeof(cl_int), + reinterpret_cast(&tensor_w)); + CL_CHECK_ERRORS(status); + auto width = input->ImageWidth(); + auto height = input->ImageHeight(); + size_t global_work_size[2] = {width, height}; + status = + clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, + NULL, global_work_size, NULL, 0, NULL, NULL); + CL_CHECK_ERRORS(status); + } else { + PADDLE_MOBILE_ENFORCE(false, "element mul not support this situation yet") + } } } -- GitLab