From 88c0ed3bd04b925ced80f83ede248e749dbc2ea1 Mon Sep 17 00:00:00 2001
From: liuruilong <liuruilong@baidu.com>
Date: Mon, 15 Oct 2018 15:52:38 +0800
Subject: [PATCH] fix cl image error

---
 src/framework/cl/cl_image.h                   |  37 +++---
 src/framework/operator.cpp                    |   1 +
 .../kernel/cl/cl_kernel/conv_kernel.cl        |   4 +-
 src/operators/kernel/cl/conv_kernel.cpp       | 122 +++++++++---------
 4 files changed, 82 insertions(+), 82 deletions(-)
diff --git a/src/framework/cl/cl_image.h b/src/framework/cl/cl_image.h
index b8645a4298..f653256d38 100644
--- a/src/framework/cl/cl_image.h
+++ b/src/framework/cl/cl_image.h
@@ -101,7 +101,7 @@ class CLImage {
   T *data() const {
     if (initialized_) {
       PADDLE_MOBILE_THROW_EXCEPTION(
-          " cl image has initialized, tensor data has been deleted ");
+          " cl image has initialized, tensor data has been deleted, can't use tensor data");
     }
     return reinterpret_cast<T *>(tensor_data_);
   }
@@ -118,6 +118,7 @@ class CLImage {
 
  private:
   void InitCLImage(cl_context context, float *tensor_data, const DDim &dim) {
+    DLOG << " tensor dim: " << dim;
     cl_image_format cf = {.image_channel_order = CL_RGBA,
                           .image_channel_data_type = CL_HALF_FLOAT};
     // NCHW -> [W * (C+3)/4, H * N]
@@ -135,29 +136,23 @@ class CLImage {
         tensor_data_[i] = 0;
       }
     }
-    size_t N, C, H, W;
-    if (tensor_dims_.size() == 4) {
-      N = tensor_dims_[0];
-      if (N < 0) {
-        N = 1;
-      }
-      C = tensor_dims_[1];
-      H = tensor_dims_[2];
-      W = tensor_dims_[3];
 
-      width_of_one_block_ = W;
-      height_of_one_block_ = H;
+    size_t new_dims[] = {1, 1, 1, 1};
 
-    } else if (tensor_dims_.size() == 1) {
-      N = 1;
-      C = tensor_dims_[0];
-      H = 1;
-      W = 1;
-
-      width_of_one_block_ = W;
-      height_of_one_block_ = H;
+    for (int j = 0; j < dim.size(); ++j) {
+      new_dims[4 - dim.size() + j] = dim[j];
     }
 
+    size_t N, C, H, W;
+
+    N = new_dims[0];
+    C = new_dims[1];
+    H = new_dims[2];
+    W = new_dims[3];
+
+    width_of_one_block_ = W;
+    height_of_one_block_ = H;
+
     size_t width = W * ((C + 3) / 4);
     size_t height = H * N;
 
@@ -196,6 +191,8 @@ class CLImage {
       }
     }
     cl_int err;
+    DLOG << " image width: " << width;
+    DLOG << " image height: " << height;
     cl_image_ = clCreateImage2D(
         context,                                   // cl_context context
         CL_MEM_READ_WRITE | (imageData ? CL_MEM_COPY_HOST_PTR : 0),  // cl_mem_flags flags
diff --git a/src/framework/operator.cpp b/src/framework/operator.cpp
index 941bc0bba9..2116c9b119 100644
--- a/src/framework/operator.cpp
+++ b/src/framework/operator.cpp
@@ -60,6 +60,7 @@ void OperatorBase<Dtype>::Run() {
   DLOG << " begin run " << type_;
   RunImpl();
   DLOG << " end run " << type_;
+
 #ifdef PADDLE_MOBILE_DEBUG
   DLOG << "-------------" << type_ << "----------------------------";
   vector<string> input_keys = GetInputKeys();
diff --git a/src/operators/kernel/cl/cl_kernel/conv_kernel.cl b/src/operators/kernel/cl/cl_kernel/conv_kernel.cl
index 2a5c823295..fa718a7326 100644
--- a/src/operators/kernel/cl/cl_kernel/conv_kernel.cl
+++ b/src/operators/kernel/cl/cl_kernel/conv_kernel.cl
@@ -12,4 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "conv_kernel.inc.cl"
+//#include "conv_kernel.inc.cl"
+
+__kernel void conv_3x3() {}
\ No newline at end of file
diff --git a/src/operators/kernel/cl/conv_kernel.cpp b/src/operators/kernel/cl/conv_kernel.cpp
index 63c1776f83..a9d72d2321 100644
--- a/src/operators/kernel/cl/conv_kernel.cpp
+++ b/src/operators/kernel/cl/conv_kernel.cpp
@@ -42,18 +42,18 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
       param->Filter()->HeightOfOneBlock() == 1) {
 
     DLOG << " here1 ";
-    this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl");
+    this->cl_helper_.AddKernel("conv_1x1", "conv_kernel.cl");
 
   } else if (param->Filter()->dims()[1] == 1) {
 
     DLOG << " here2 ";
-    this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl");
+    this->cl_helper_.AddKernel("depth_conv_3x3", "conv_kernel.cl");
 
   } else if (param->Filter()->WidthOfOneBlock() == 3 &&
              param->Filter()->HeightOfOneBlock() == 3) {
 
     DLOG << " here3 ";
-    this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl");
+    this->cl_helper_.AddKernel("conv_3x3", "conv_kernel.cl");
 
   } else {
     PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
@@ -64,64 +64,64 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
 
 template <>
 void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
-  DLOG << " Compute helper: " << &cl_helper_;
-  DLOG << " begin compute ";
-  auto kernel = this->cl_helper_.KernelAt(0);
-  DLOG << " get work size ";
-  auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output());
-  DLOG << " end work size ";
-  int c_block = default_work_size[0];
-  int w = default_work_size[1];
-  int nh = default_work_size[2];
-  auto input = param.Input()->GetCLImage();
-
-  DLOG << " get Input ";
-
-  auto filter = param.Filter()->GetCLImage();
-
-  DLOG << " get Filter ";
-
-  auto output = param.Output();
-
-  DLOG << " get Output ";
-
-  int stride = param.Strides()[0];
-  int offset = param.Offset();
-  int input_c = param.Input()->CBlock();
-  int dilation = param.Dilations()[0];
-  int input_width = param.Input()->WidthOfOneBlock();
-  int input_height = param.Input()->HeightOfOneBlock();
-
-  cl_int status;
-
-  DLOG << " begin set kernel arg ";
-
-  status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
-  status = clSetKernelArg(kernel, 1, sizeof(int), &w);
-  status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
-  status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input);
-  status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter);
-  status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &output);
-  status = clSetKernelArg(kernel, 6, sizeof(int), &stride);
-  status = clSetKernelArg(kernel, 7, sizeof(int), &offset);
-  status = clSetKernelArg(kernel, 8, sizeof(int), &input_c);
-  status = clSetKernelArg(kernel, 9, sizeof(int), &dilation);
-  status = clSetKernelArg(kernel, 10, sizeof(int), &input_width);
-  status = clSetKernelArg(kernel, 11, sizeof(int), &input_height);
-
-  DLOG << " end set kernel arg ";
-
-  CL_CHECK_ERRORS(status);
-
-  DLOG << " begin enqueue ";
-
-  status =
-      clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
-                             default_work_size.data(), NULL, 0, NULL, NULL);
-
-  DLOG << " end enqueue ";
-
-  CL_CHECK_ERRORS(status);
+//  DLOG << " Compute helper: " << &cl_helper_;
+//  DLOG << " begin compute ";
+//  auto kernel = this->cl_helper_.KernelAt(0);
+//  DLOG << " get work size ";
+//  auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output());
+//  DLOG << " end work size ";
+//  int c_block = default_work_size[0];
+//  int w = default_work_size[1];
+//  int nh = default_work_size[2];
+//  auto input = param.Input()->GetCLImage();
+//
+//  DLOG << " get Input ";
+//
+//  auto filter = param.Filter()->GetCLImage();
+//
+//  DLOG << " get Filter ";
+//
+//  auto output = param.Output();
+//
+//  DLOG << " get Output ";
+//
+//  int stride = param.Strides()[0];
+//  int offset = param.Offset();
+//  int input_c = param.Input()->CBlock();
+//  int dilation = param.Dilations()[0];
+//  int input_width = param.Input()->WidthOfOneBlock();
+//  int input_height = param.Input()->HeightOfOneBlock();
+//
+//  cl_int status;
+//
+//  DLOG << " begin set kernel arg ";
+//
+//  status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
+//  status = clSetKernelArg(kernel, 1, sizeof(int), &w);
+//  status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
+//  status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input);
+//  status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter);
+//  status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &output);
+//  status = clSetKernelArg(kernel, 6, sizeof(int), &stride);
+//  status = clSetKernelArg(kernel, 7, sizeof(int), &offset);
+//  status = clSetKernelArg(kernel, 8, sizeof(int), &input_c);
+//  status = clSetKernelArg(kernel, 9, sizeof(int), &dilation);
+//  status = clSetKernelArg(kernel, 10, sizeof(int), &input_width);
+//  status = clSetKernelArg(kernel, 11, sizeof(int), &input_height);
+//
+//  DLOG << " end set kernel arg ";
+//
+//  CL_CHECK_ERRORS(status);
+//
+//  DLOG << " begin enqueue ";
+//
+//  status =
+//      clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
+//                             default_work_size.data(), NULL, 0, NULL, NULL);
+//
+//  DLOG << " end enqueue ";
+//
+//  CL_CHECK_ERRORS(status);
 }
 
 template class ConvKernel<GPU_CL, float>;
-- 
GitLab