Merge branch 'opencl' of https://github.com/PaddlePaddle/paddle-mobile into opencl

b629d3a5 · dolphin8 · 992aebe9 · c8ba9fcc · b629d3a5 · b629d3a5
5 changed file
--- a/src/framework/cl/cl_half.cpp
+++ b/src/framework/cl/cl_half.cpp
@@ -487,13 +487,13 @@ static const uint8_t shifttable[512] = {
    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d};
-half_t float2half(float f) {
+half_t Float2Half(float f) {
  uint32_t v = *reinterpret_cast<uint32_t *>(&f);
  return basetable[(v >> 23) & 0x1ff] +
         ((v & 0x007fffff) >> shifttable[(v >> 23) & 0x1ff]);
 }
-float half2float(half_t h) {
+float Half2Float(half_t h) {
  uint32_t v = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] +
               exponenttable[h >> 10];
  return *reinterpret_cast<float *>(&v);
@@ -501,12 +501,12 @@ float half2float(half_t h) {
 void FloatArray2HalfArray(float *f_array, half_t *h_array, int count) {
  for (int i = 0; i < count; ++i) {
-    h_array[i] = float2half(f_array[i]);
+    h_array[i] = Float2Half(f_array[i]);
  }
 }
 void HalfArray2FloatArray(half_t *h_array, float *f_array, int count) {
  for (int i = 0; i < count; ++i) {
-    f_array[i] = float2half(h_array[i]);
+    f_array[i] = Half2Float(h_array[i]);
  }
 }
--- a/src/framework/cl/cl_half.h
+++ b/src/framework/cl/cl_half.h
@@ -17,9 +17,9 @@ limitations under the License. */
 typedef uint16_t half_t;
-half_t float2half(float f);
+half_t Float2Half(float f);
-float half2float(half_t h);
+float Half2Float(half_t h);
 void FloatArray2HalfArray(float *f_array, half_t *h_array, int count);

--- a/src/framework/cl/cl_image.cpp
+++ b/src/framework/cl/cl_image.cpp
@@ -52,7 +52,7 @@ void CLImageToTensor(CLImage *cl_image, Tensor *tensor,
      for (int h = 0; h < H; h++) {
        size_t i2 = (i1 << 2) + c % 4;
        for (int w = 0; w < W; w++) {
-          *p = half2float(imageData[i2]);
+          *p = Half2Float(imageData[i2]);
          i2 += 4;
          p++;
        }
@@ -106,7 +106,7 @@ void TensorToCLImage(const Tensor *tensor, CLImage *cl_image,
      for (int h = 0; h < H; h++) {
        size_t i2 = (i1 << 2) + c % 4;
        for (int w = 0; w < W; w++) {
-          imageData[i2] = float2half(*p);
+          imageData[i2] = Float2Half(*p);
          i2 += 4;
          p++;
        }

--- a/src/framework/cl/cl_image.h
+++ b/src/framework/cl/cl_image.h
@@ -177,7 +177,7 @@ class CLImage {
              }
              assert(i2 < width * height * 4);
-              imageData[i2] = float2half(*p);
+              imageData[i2] = Float2Half(*p);
              i2 += 4;
              p++;
              //              count++;
@@ -219,7 +219,6 @@ class CLImage {
        &err);
    if (err != CL_SUCCESS) {
-      // TODO(HaiPeng): error handling
      CL_CHECK_ERRORS(err);
      PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error ");
    }

--- a/src/operators/kernel/cl/feed_kernel.cpp
+++ b/src/operators/kernel/cl/feed_kernel.cpp
@@ -29,23 +29,39 @@ template <>
 void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
  auto kernel = this->cl_helper_.KernelAt(0);
  cl_int status;
+  DLOG << " feed 0";
  auto output = param.Out();
+  DLOG << " feed 1";
  const Tensor *input = param.InputX();
+  DLOG << " feed 2";
  const float *input_data = nullptr;
+  DLOG << " feed 3";
  input_data = input->data<float>();
+  DLOG << " feed 4";
  cl_mem cl_image = output->GetCLImage();
+  DLOG << " feed 5";
  int height = output->dims()[2];
  int width = output->dims()[3];
  DLOG << output->dims();
  status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_data);
+  CL_CHECK_ERRORS(status);
  status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cl_image);
+  CL_CHECK_ERRORS(status);
  status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &width);
+  CL_CHECK_ERRORS(status);
  status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &height);
+  CL_CHECK_ERRORS(status);
  size_t global_work_size[2] = {height, width};
-  clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL,
+  status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
-                         global_work_size, NULL, 0, NULL, NULL);
+                                  NULL, global_work_size, NULL, 0, NULL, NULL);
+  CL_CHECK_ERRORS(status);
 }
 template class FeedKernel<GPU_CL, float>;