Merge pull request #1090 from codeWorm2015/opencl

update batch norm

Merge pull request #1090 from codeWorm2015/opencl
update batch norm
408407f4 · Ray Liu · GitHub · 60c7e84b · 94e70a44 · 408407f4
4 changed file
--- a/src/framework/executor.cpp
+++ b/src/framework/executor.cpp
@@ -37,7 +37,7 @@ limitations under the License. */
 #include "framework/cl/cl_image.h"
 #endif
-int debug_to = 2;
+int debug_to = 4;
 namespace paddle_mobile {
 namespace framework {

--- a/src/operators/kernel/cl/batchnorm_kernel.cpp
+++ b/src/operators/kernel/cl/batchnorm_kernel.cpp
@@ -47,15 +47,20 @@ bool BatchNormKernel<GPU_CL, float>::Init(BatchNormParam<GPU_CL> *param) {
    new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
  }
-  delete[](new_scale_ptr);
-  delete[](new_bias_ptr);
  framework::CLImage *new_scale = new framework::CLImage();
+  new_scale->SetTensorData(new_scale_ptr, variance->dims());
+  new_scale->InitCLImage(this->cl_helper_.CLContext());
  framework::CLImage *new_bias = new framework::CLImage();
+  new_bias->SetTensorData(new_bias_ptr, variance->dims());
+  new_bias->InitCLImage(this->cl_helper_.CLContext());
  param->SetNewScale(new_scale);
  param->SetNewBias(new_bias);
+  delete[](new_scale_ptr);
+  delete[](new_bias_ptr);
  return true;
 }

--- a/src/operators/kernel/cl/cl_kernel/batchnorm_kernel.cl
+++ b/src/operators/kernel/cl/cl_kernel/batchnorm_kernel.cl
@@ -3,8 +3,8 @@
 __kernel void batchnorm(__private const int out_height,
                        __private const int out_width,
                        __read_only image2d_t input,
-                        __read_only image2d_t new_scale,
+                        __read_only image2d_t new_scale_image,
-                        __read_only image2d_t new_bias,
+                        __read_only image2d_t new_bias_image,
                        __write_only image2d_t output) {
  const int out_c = get_global_id(0);
  const int out_w = get_global_id(1);
@@ -13,12 +13,12 @@ __kernel void batchnorm(__private const int out_height,
  const sampler_t sampler =
      CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
-  half4 new_scale = read_imageh(bn_scale, sampler, (int2)(out_c, 0));
+  half4 new_scale = read_imageh(new_scale_image, sampler, (int2)(out_c, 0));
-  half4 new_bias = read_imageh(bn_bias, sampler, (int2)(out_c, 0));
+  half4 new_bias = read_imageh(new_bias_image, sampler, (int2)(out_c, 0));
  int pos_x = mad24(out_c, out_width, out_w);
  half4 in = read_imageh(input, sampler, (int2)(pos_x, out_nh));
  half4 out = mad(in, new_scale, new_bias);
-  write_imageh(output, (int2)(pos_x, nh), out);
+  write_imageh(output, (int2)(pos_x, out_nh), out);
 }
--- a/src/operators/kernel/cl/feed_kernel.cpp
+++ b/src/operators/kernel/cl/feed_kernel.cpp
@@ -61,7 +61,7 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
  size_t region[3] = {height, width, 1};
  clEnqueueReadImage(commandQueue, cl_image, CL_TRUE, origin, region, 0, 0, out,
                     0, NULL, NULL);
-  for (int i = 0; i < numel; i++) DLOG << Half2Float(out[i]);
+//  for (int i = 0; i < numel; i++) DLOG << Half2Float(out[i]);
 }
 template class FeedKernel<GPU_CL, float>;