diff --git a/src/framework/cl/cl_helper.h b/src/framework/cl/cl_helper.h index 8ebbcc911a558e3acbc5f54914300ca8226f0b0a..e1fdd54d6a1a506b817a6f6e464e951d082c6f9a 100644 --- a/src/framework/cl/cl_helper.h +++ b/src/framework/cl/cl_helper.h @@ -64,6 +64,17 @@ class CLHelper { auto work_size_2 = n * h; + return {work_size_0, work_size_1, work_size_2}; + }else if(image_dim.size()==2){ + + auto image_width = image.ImageWidth(); + + auto work_size_0 = image_width / image_dim[1]; + + auto work_size_1 = image_dim[1]; + + auto work_size_2 = image_dim[0]; + return {work_size_0, work_size_1, work_size_2}; } PADDLE_MOBILE_THROW_EXCEPTION("not support this dim, need imp"); diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index 8b1a2cd8ec05c98321dfdc0366e01d1abca9e206..43725f6b2cb3f055844f3d2d9521f838cf9a36d4 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -37,7 +37,7 @@ limitations under the License. */ #include "framework/cl/cl_image.h" #endif -int debug_to = 4; +int debug_to = 115; namespace paddle_mobile { namespace framework { diff --git a/src/operators/kernel/cl/relu_kernel.cpp b/src/operators/kernel/cl/relu_kernel.cpp index 71304b9c307f36f7a3db754a7a41958e206f77cd..ae40e8ae6adc307c405e3f5f65febbff174232c6 100644 --- a/src/operators/kernel/cl/relu_kernel.cpp +++ b/src/operators/kernel/cl/relu_kernel.cpp @@ -20,23 +20,23 @@ namespace operators { template <> bool ReluKernel::Init(ReluParam* param) { - this->cl_helper_.AddKernel("relu", "relu.cl"); +// this->cl_helper_.AddKernel("relu", "relu.cl"); return true; } template <> void ReluKernel::Compute(const ReluParam& param) { - auto kernel = this->cl_helper_.KernelAt(0); - const auto* input = param.InputX(); - auto* output = param.Out(); - auto default_work_size = this->cl_helper_.DefaultWorkSize(*output); - auto inputImage = input->GetCLImage(); - auto outputImage = output->GetCLImage(); - clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); - clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); - const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()}; - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, - work_size, NULL, 0, NULL, NULL); +// auto kernel = this->cl_helper_.KernelAt(0); +// const auto* input = param.InputX(); +// auto* output = param.Out(); +// auto default_work_size = this->cl_helper_.DefaultWorkSize(*output); +// auto inputImage = input->GetCLImage(); +// auto outputImage = output->GetCLImage(); +// clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); +// clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); +// const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()}; +// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, +// work_size, NULL, 0, NULL, NULL); } template class ReluKernel; diff --git a/src/operators/kernel/cl/softmax_kernel.cpp b/src/operators/kernel/cl/softmax_kernel.cpp index 1404ea40c703c8da2db09551fc6da440771f7366..304c716c6e5c2e3a5cfaef328d7df1535b335ff2 100644 --- a/src/operators/kernel/cl/softmax_kernel.cpp +++ b/src/operators/kernel/cl/softmax_kernel.cpp @@ -36,11 +36,14 @@ void SoftmaxKernel::Compute(const SoftmaxParam ¶m) { clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); const auto &inputDim = input->dims(); - int dims[4] = {inputDim[0], inputDim[1], inputDim[2], inputDim[3]}; - clSetKernelArg(kernel, 2, sizeof(int), dims); - clSetKernelArg(kernel, 3, sizeof(int), dims + 1); - clSetKernelArg(kernel, 4, sizeof(int), dims + 2); - clSetKernelArg(kernel, 5, sizeof(int), dims + 3); + int dims[4] = {1, 1, 1, 1}; + for (int i = 0; i < inputDim.size(); i++) { + dims[4-inputDim.size()+i] = inputDim[i]; + } + clSetKernelArg(kernel, 2, sizeof(int), &dims); + clSetKernelArg(kernel, 3, sizeof(int), &dims[1]); + clSetKernelArg(kernel, 4, sizeof(int), &dims[2]); + clSetKernelArg(kernel, 5, sizeof(int), &dims[3]); clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, default_work_size.data(), NULL, 0, NULL, NULL);