Merge pull request #1134 from codeWorm2015/opencl

fix image memory copy bug

Merge pull request #1134 from codeWorm2015/opencl
fix image memory copy bug
e309c772 · Ray Liu · GitHub · ea8138bc · 4af8f0f4 · e309c772
4 changed file
--- a/src/framework/cl/cl_image.cpp
+++ b/src/framework/cl/cl_image.cpp
@@ -126,7 +126,8 @@ Print &operator<<(Print &printer, const CLImage &cl_image) {
  DDim ddim = cl_image.dims();
  size_t N, C, H, W, width, height;
-  if (cl_image.GetImageType() == Normal || cl_image.dims().size() == 3 || cl_image.dims().size() == 4) {
+  if (cl_image.GetImageType() == Normal || cl_image.dims().size() == 3 ||
+      cl_image.dims().size() == 4) {
    if (ddim.size() == 4) {
      N = ddim[0];
      if (N < 0) {
@@ -185,8 +186,6 @@ Print &operator<<(Print &printer, const CLImage &cl_image) {
    delete (imageData);
    CL_CHECK_ERRORS(err);
  } else {
    if (ddim.size() == 2) {
      width = (ddim[1] + 3) / 4;
@@ -220,7 +219,7 @@ Print &operator<<(Print &printer, const CLImage &cl_image) {
  for (int i = 0; i < cl_image.numel(); i += stride) {
    printer << data[i] << " ";
  }
-  delete(data);
+  delete (data);
  return printer;
 }
 #endif

--- a/src/framework/cl/cl_image.h
+++ b/src/framework/cl/cl_image.h
@@ -26,11 +26,7 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace framework {
-enum ImageType {
+enum ImageType { Invalid = -1, Normal = 0, Folder = 1 };
-  Invalid = -1,
-  Normal = 0,
-  Folder = 1
-};
 class CLImage {
 public:
@@ -43,9 +39,10 @@ class CLImage {
    int numel = product(dim);
    if (tensor_data_ != nullptr) {
      delete[](tensor_data_);
+      tensor_data_ = nullptr;
    }
    tensor_data_ = new float[numel];
-    memcpy(tensor_data_, tensorData, numel);
+    memcpy(tensor_data_, tensorData, numel * sizeof(float));
    tensor_dims_ = dim;
  }

--- a/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
@@ -40,6 +40,25 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
  const framework::CLImage *scale = param->InputScale();
  const framework::CLImage *bias = param->InputBias();
  const float epsilon = param->Epsilon();
+  const int C = mean->numel();
+  for (int j = 0; j < C; ++j) {
+    DLOG << " mean - " << j << mean->data<float>()[j];
+  }
+  for (int j = 0; j < C; ++j) {
+    DLOG << " variance - " << j << variance->data<float>()[j];
+  }
+  for (int j = 0; j < C; ++j) {
+    DLOG << " scale - " << j << scale->data<float>()[j];
+  }
+  for (int j = 0; j < C; ++j) {
+    DLOG << " bias - " << j << bias->data<float>()[j];
+  }
  //
  //  DLOG << " climage mean: " << *mean;
  //  DLOG << " climage variance: " << *variance;
@@ -51,8 +70,6 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
  auto scale_ptr = scale->data<float>();
  auto bias_ptr = bias->data<float>();
-  const int C = mean->numel();
  float inv_std_ptr[C];
  for (int i = 0; i < C; i++) {
    inv_std_ptr[i] =
@@ -68,6 +85,14 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
  framework::CLImage *new_scale = new framework::CLImage();
+  for (int j = 0; j < C; ++j) {
+    DLOG << " new scale - " << j << new_scale_ptr[j];
+  }
+  for (int j = 0; j < C; ++j) {
+    DLOG << " new bias - " << j << new_bias_ptr[j];
+  }
  new_scale->SetTensorData(new_scale_ptr, variance->dims());
  new_scale->InitCLImage(this->cl_helper_.CLContext(),
                         cl_helper_.CLCommandQueue());

--- a/src/operators/kernel/cl/relu_kernel.cpp
+++ b/src/operators/kernel/cl/relu_kernel.cpp
@@ -23,8 +23,10 @@ bool ReluKernel<GPU_CL, float>::Init(ReluParam<GPU_CL>* param) {
  this->cl_helper_.AddKernel("relu", "relu.cl");
  this->cl_helper_.AddKernel("relu_p0", "relu.cl");
  this->cl_helper_.AddKernel("relu_p1", "relu.cl");
-  const auto dim = const_cast<framework::CLImage*>(param->InputX())->ImageDims();
+  const auto dim =
-  param->getMidImage().InitEmptyImage(this->cl_helper_.CLContext(), this->cl_helper_.CLCommandQueue(), dim);
+      const_cast<framework::CLImage*>(param->InputX())->ImageDims();
+  param->getMidImage().InitEmptyImage(this->cl_helper_.CLContext(),
+                                      this->cl_helper_.CLCommandQueue(), dim);
  return true;
 }
@@ -37,17 +39,20 @@ void ReluKernel<GPU_CL, float>::Compute(const ReluParam<GPU_CL>& param) {
  auto default_work_size = this->cl_helper_.DefaultWorkSize(*output);
  auto inputImage = input->GetCLImage();
  auto outputImage = output->GetCLImage();
-  auto tImage = const_cast<ReluParam<GPU_CL>&>(param).getMidImage().GetCLImage();
+  auto tImage =
+      const_cast<ReluParam<GPU_CL>&>(param).getMidImage().GetCLImage();
  clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &inputImage);
  clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &tImage);
  clSetKernelArg(kernel_p1, 0, sizeof(cl_mem), &tImage);
  clSetKernelArg(kernel_p1, 1, sizeof(cl_mem), &outputImage);
  const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()};
-//  clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p0, 3, NULL,
+  //  clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p0, 3,
-//                         work_size, NULL, 0, NULL, NULL);
+  //  NULL,
-//  clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3, NULL,
+  //                         work_size, NULL, 0, NULL, NULL);
-//                         work_size, NULL, 0, NULL, NULL);
+  //  clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3,
+  //  NULL,
+  //                         work_size, NULL, 0, NULL, NULL);
 }
 template class ReluKernel<GPU_CL, float>;