From 27c4c43cc7b45191701d4f9d11ac12168ebb05f6 Mon Sep 17 00:00:00 2001 From: yangfei Date: Tue, 16 Oct 2018 20:39:37 +0800 Subject: [PATCH] repair bug of softmax op kernel --- src/framework/cl/cl_helper.h | 11 ++++++++++ src/framework/executor.cpp | 2 +- src/operators/kernel/cl/relu_kernel.cpp | 24 +++++++++++----------- src/operators/kernel/cl/softmax_kernel.cpp | 13 +++++++----- 4 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/framework/cl/cl_helper.h b/src/framework/cl/cl_helper.h index 8ebbcc911a..e1fdd54d6a 100644 --- a/src/framework/cl/cl_helper.h +++ b/src/framework/cl/cl_helper.h @@ -64,6 +64,17 @@ class CLHelper { auto work_size_2 = n * h; + return {work_size_0, work_size_1, work_size_2}; + }else if(image_dim.size()==2){ + + auto image_width = image.ImageWidth(); + + auto work_size_0 = image_width / image_dim[1]; + + auto work_size_1 = image_dim[1]; + + auto work_size_2 = image_dim[0]; + return {work_size_0, work_size_1, work_size_2}; } PADDLE_MOBILE_THROW_EXCEPTION("not support this dim, need imp"); diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index 8b1a2cd8ec..43725f6b2c 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -37,7 +37,7 @@ limitations under the License. */ #include "framework/cl/cl_image.h" #endif -int debug_to = 4; +int debug_to = 115; namespace paddle_mobile { namespace framework { diff --git a/src/operators/kernel/cl/relu_kernel.cpp b/src/operators/kernel/cl/relu_kernel.cpp index 71304b9c30..ae40e8ae6a 100644 --- a/src/operators/kernel/cl/relu_kernel.cpp +++ b/src/operators/kernel/cl/relu_kernel.cpp @@ -20,23 +20,23 @@ namespace operators { template <> bool ReluKernel::Init(ReluParam* param) { - this->cl_helper_.AddKernel("relu", "relu.cl"); +// this->cl_helper_.AddKernel("relu", "relu.cl"); return true; } template <> void ReluKernel::Compute(const ReluParam& param) { - auto kernel = this->cl_helper_.KernelAt(0); - const auto* input = param.InputX(); - auto* output = param.Out(); - auto default_work_size = this->cl_helper_.DefaultWorkSize(*output); - auto inputImage = input->GetCLImage(); - auto outputImage = output->GetCLImage(); - clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); - clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); - const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()}; - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, - work_size, NULL, 0, NULL, NULL); +// auto kernel = this->cl_helper_.KernelAt(0); +// const auto* input = param.InputX(); +// auto* output = param.Out(); +// auto default_work_size = this->cl_helper_.DefaultWorkSize(*output); +// auto inputImage = input->GetCLImage(); +// auto outputImage = output->GetCLImage(); +// clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); +// clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); +// const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()}; +// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, +// work_size, NULL, 0, NULL, NULL); } template class ReluKernel; diff --git a/src/operators/kernel/cl/softmax_kernel.cpp b/src/operators/kernel/cl/softmax_kernel.cpp index 1404ea40c7..304c716c6e 100644 --- a/src/operators/kernel/cl/softmax_kernel.cpp +++ b/src/operators/kernel/cl/softmax_kernel.cpp @@ -36,11 +36,14 @@ void SoftmaxKernel::Compute(const SoftmaxParam ¶m) { clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); const auto &inputDim = input->dims(); - int dims[4] = {inputDim[0], inputDim[1], inputDim[2], inputDim[3]}; - clSetKernelArg(kernel, 2, sizeof(int), dims); - clSetKernelArg(kernel, 3, sizeof(int), dims + 1); - clSetKernelArg(kernel, 4, sizeof(int), dims + 2); - clSetKernelArg(kernel, 5, sizeof(int), dims + 3); + int dims[4] = {1, 1, 1, 1}; + for (int i = 0; i < inputDim.size(); i++) { + dims[4-inputDim.size()+i] = inputDim[i]; + } + clSetKernelArg(kernel, 2, sizeof(int), &dims); + clSetKernelArg(kernel, 3, sizeof(int), &dims[1]); + clSetKernelArg(kernel, 4, sizeof(int), &dims[2]); + clSetKernelArg(kernel, 5, sizeof(int), &dims[3]); clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, default_work_size.data(), NULL, 0, NULL, NULL); -- GitLab