diff --git a/src/framework/cl/cl_image.h b/src/framework/cl/cl_image.h index b8645a42989765656c43c73c04da500c9606cae1..f653256d389a0636b3f10abefcd6f7a7d2cf6e3a 100644 --- a/src/framework/cl/cl_image.h +++ b/src/framework/cl/cl_image.h @@ -101,7 +101,7 @@ class CLImage { T *data() const { if (initialized_) { PADDLE_MOBILE_THROW_EXCEPTION( - " cl image has initialized, tensor data has been deleted "); + " cl image has initialized, tensor data has been deleted, can't use tensor data"); } return reinterpret_cast(tensor_data_); } @@ -118,6 +118,7 @@ class CLImage { private: void InitCLImage(cl_context context, float *tensor_data, const DDim &dim) { + DLOG << " tensor dim: " << dim; cl_image_format cf = {.image_channel_order = CL_RGBA, .image_channel_data_type = CL_HALF_FLOAT}; // NCHW -> [W * (C+3)/4, H * N] @@ -135,29 +136,23 @@ class CLImage { tensor_data_[i] = 0; } } - size_t N, C, H, W; - if (tensor_dims_.size() == 4) { - N = tensor_dims_[0]; - if (N < 0) { - N = 1; - } - C = tensor_dims_[1]; - H = tensor_dims_[2]; - W = tensor_dims_[3]; - width_of_one_block_ = W; - height_of_one_block_ = H; + size_t new_dims[] = {1, 1, 1, 1}; - } else if (tensor_dims_.size() == 1) { - N = 1; - C = tensor_dims_[0]; - H = 1; - W = 1; - - width_of_one_block_ = W; - height_of_one_block_ = H; + for (int j = 0; j < dim.size(); ++j) { + new_dims[4 - dim.size() + j] = dim[j]; } + size_t N, C, H, W; + + N = new_dims[0]; + C = new_dims[1]; + H = new_dims[2]; + W = new_dims[3]; + + width_of_one_block_ = W; + height_of_one_block_ = H; + size_t width = W * ((C + 3) / 4); size_t height = H * N; @@ -196,6 +191,8 @@ class CLImage { } } cl_int err; + DLOG << " image width: " << width; + DLOG << " image height: " << height; cl_image_ = clCreateImage2D( context, // cl_context context CL_MEM_READ_WRITE | (imageData ? CL_MEM_COPY_HOST_PTR : 0), // cl_mem_flags flags diff --git a/src/framework/operator.cpp b/src/framework/operator.cpp index 941bc0bba96ada36595b265594f6c463bd2e3ab0..2116c9b1192a9dfa2e64d72040af5d4c3dd6c81c 100644 --- a/src/framework/operator.cpp +++ b/src/framework/operator.cpp @@ -60,6 +60,7 @@ void OperatorBase::Run() { DLOG << " begin run " << type_; RunImpl(); DLOG << " end run " << type_; + #ifdef PADDLE_MOBILE_DEBUG DLOG << "-------------" << type_ << "----------------------------"; vector input_keys = GetInputKeys(); diff --git a/src/operators/kernel/cl/cl_kernel/conv_kernel.cl b/src/operators/kernel/cl/cl_kernel/conv_kernel.cl index 2a5c823295c7562361433414cf35be81d2fbf00c..fa718a7326d8fcdd5fff614f8c67632c9badec3e 100644 --- a/src/operators/kernel/cl/cl_kernel/conv_kernel.cl +++ b/src/operators/kernel/cl/cl_kernel/conv_kernel.cl @@ -12,4 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "conv_kernel.inc.cl" +//#include "conv_kernel.inc.cl" + +__kernel void conv_3x3() {} \ No newline at end of file diff --git a/src/operators/kernel/cl/conv_kernel.cpp b/src/operators/kernel/cl/conv_kernel.cpp index 63c1776f832843ad36f7f1e209dd380d93c7440b..a9d72d23212a46ae6cadb0c0927fd205aa937c27 100644 --- a/src/operators/kernel/cl/conv_kernel.cpp +++ b/src/operators/kernel/cl/conv_kernel.cpp @@ -42,18 +42,18 @@ bool ConvKernel::Init(ConvParam *param) { param->Filter()->HeightOfOneBlock() == 1) { DLOG << " here1 "; - this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl"); + this->cl_helper_.AddKernel("conv_1x1", "conv_kernel.cl"); } else if (param->Filter()->dims()[1] == 1) { DLOG << " here2 "; - this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl"); + this->cl_helper_.AddKernel("depth_conv_3x3", "conv_kernel.cl"); } else if (param->Filter()->WidthOfOneBlock() == 3 && param->Filter()->HeightOfOneBlock() == 3) { DLOG << " here3 "; - this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl"); + this->cl_helper_.AddKernel("conv_3x3", "conv_kernel.cl"); } else { PADDLE_MOBILE_THROW_EXCEPTION(" not support "); @@ -64,64 +64,64 @@ bool ConvKernel::Init(ConvParam *param) { template <> void ConvKernel::Compute(const ConvParam ¶m) { - DLOG << " Compute helper: " << &cl_helper_; - DLOG << " begin compute "; - auto kernel = this->cl_helper_.KernelAt(0); - DLOG << " get work size "; - auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output()); - DLOG << " end work size "; - int c_block = default_work_size[0]; - int w = default_work_size[1]; - int nh = default_work_size[2]; - auto input = param.Input()->GetCLImage(); - - DLOG << " get Input "; - - auto filter = param.Filter()->GetCLImage(); - - DLOG << " get Filter "; - - auto output = param.Output(); - - DLOG << " get Output "; - - int stride = param.Strides()[0]; - int offset = param.Offset(); - int input_c = param.Input()->CBlock(); - int dilation = param.Dilations()[0]; - int input_width = param.Input()->WidthOfOneBlock(); - int input_height = param.Input()->HeightOfOneBlock(); - - cl_int status; - - DLOG << " begin set kernel arg "; - - status = clSetKernelArg(kernel, 0, sizeof(int), &c_block); - status = clSetKernelArg(kernel, 1, sizeof(int), &w); - status = clSetKernelArg(kernel, 2, sizeof(int), &nh); - status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input); - status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter); - status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &output); - status = clSetKernelArg(kernel, 6, sizeof(int), &stride); - status = clSetKernelArg(kernel, 7, sizeof(int), &offset); - status = clSetKernelArg(kernel, 8, sizeof(int), &input_c); - status = clSetKernelArg(kernel, 9, sizeof(int), &dilation); - status = clSetKernelArg(kernel, 10, sizeof(int), &input_width); - status = clSetKernelArg(kernel, 11, sizeof(int), &input_height); - - DLOG << " end set kernel arg "; - - CL_CHECK_ERRORS(status); - - DLOG << " begin enqueue "; - - status = - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, - default_work_size.data(), NULL, 0, NULL, NULL); - - DLOG << " end enqueue "; - - CL_CHECK_ERRORS(status); +// DLOG << " Compute helper: " << &cl_helper_; +// DLOG << " begin compute "; +// auto kernel = this->cl_helper_.KernelAt(0); +// DLOG << " get work size "; +// auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output()); +// DLOG << " end work size "; +// int c_block = default_work_size[0]; +// int w = default_work_size[1]; +// int nh = default_work_size[2]; +// auto input = param.Input()->GetCLImage(); +// +// DLOG << " get Input "; +// +// auto filter = param.Filter()->GetCLImage(); +// +// DLOG << " get Filter "; +// +// auto output = param.Output(); +// +// DLOG << " get Output "; +// +// int stride = param.Strides()[0]; +// int offset = param.Offset(); +// int input_c = param.Input()->CBlock(); +// int dilation = param.Dilations()[0]; +// int input_width = param.Input()->WidthOfOneBlock(); +// int input_height = param.Input()->HeightOfOneBlock(); +// +// cl_int status; +// +// DLOG << " begin set kernel arg "; +// +// status = clSetKernelArg(kernel, 0, sizeof(int), &c_block); +// status = clSetKernelArg(kernel, 1, sizeof(int), &w); +// status = clSetKernelArg(kernel, 2, sizeof(int), &nh); +// status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input); +// status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter); +// status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &output); +// status = clSetKernelArg(kernel, 6, sizeof(int), &stride); +// status = clSetKernelArg(kernel, 7, sizeof(int), &offset); +// status = clSetKernelArg(kernel, 8, sizeof(int), &input_c); +// status = clSetKernelArg(kernel, 9, sizeof(int), &dilation); +// status = clSetKernelArg(kernel, 10, sizeof(int), &input_width); +// status = clSetKernelArg(kernel, 11, sizeof(int), &input_height); +// +// DLOG << " end set kernel arg "; +// +// CL_CHECK_ERRORS(status); +// +// DLOG << " begin enqueue "; +// +// status = +// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, +// default_work_size.data(), NULL, 0, NULL, NULL); +// +// DLOG << " end enqueue "; +// +// CL_CHECK_ERRORS(status); } template class ConvKernel;