update feed

d9754c28 · liuruilong · a3e4c8de · d9754c28 · d9754c28 · d9754c28
5 changed file
--- a/src/framework/cl/cl_half.cpp
+++ b/src/framework/cl/cl_half.cpp
@@ -487,13 +487,13 @@ static const uint8_t shifttable[512] = {
    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d};

-half_t float2half(float f) {
+half_t Float2Half(float f) {
  uint32_t v = *reinterpret_cast<uint32_t *>(&f);
  return basetable[(v >> 23) & 0x1ff] +
         ((v & 0x007fffff) >> shifttable[(v >> 23) & 0x1ff]);
 }

-float half2float(half_t h) {
+float Half2Float(half_t h) {
  uint32_t v = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] +
               exponenttable[h >> 10];
  return *reinterpret_cast<float *>(&v);
@@ -501,12 +501,12 @@ float half2float(half_t h) {

 void FloatArray2HalfArray(float *f_array, half_t *h_array, int count) {
  for (int i = 0; i < count; ++i) {
-    h_array[i] = float2half(f_array[i]);
+    h_array[i] = Float2Half(f_array[i]);
  }
 }

 void HalfArray2FloatArray(half_t *h_array, float *f_array, int count) {
  for (int i = 0; i < count; ++i) {
-    f_array[i] = float2half(h_array[i]);
+    f_array[i] = Half2Float(h_array[i]);
  }
 }
--- a/src/framework/cl/cl_half.h
+++ b/src/framework/cl/cl_half.h
@@ -17,9 +17,9 @@ limitations under the License. */

 typedef uint16_t half_t;

-half_t float2half(float f);
+half_t Float2Half(float f);

-float half2float(half_t h);
+float Half2Float(half_t h);

 void FloatArray2HalfArray(float *f_array, half_t *h_array, int count);


--- a/src/framework/cl/cl_image.cpp
+++ b/src/framework/cl/cl_image.cpp
@@ -52,7 +52,7 @@ void CLImageToTensor(CLImage *cl_image, Tensor *tensor,
      for (int h = 0; h < H; h++) {
        size_t i2 = (i1 << 2) + c % 4;
        for (int w = 0; w < W; w++) {
-          *p = half2float(imageData[i2]);
+          *p = Half2Float(imageData[i2]);
          i2 += 4;
          p++;
        }
@@ -106,7 +106,7 @@ void TensorToCLImage(const Tensor *tensor, CLImage *cl_image,
      for (int h = 0; h < H; h++) {
        size_t i2 = (i1 << 2) + c % 4;
        for (int w = 0; w < W; w++) {
-          imageData[i2] = float2half(*p);
+          imageData[i2] = Float2Half(*p);
          i2 += 4;
          p++;
        }

--- a/src/framework/cl/cl_image.h
+++ b/src/framework/cl/cl_image.h
@@ -179,7 +179,7 @@ class CLImage {
              }
              assert(i2 < width * height * 4);

-              imageData[i2] = float2half(*p);
+              imageData[i2] = Float2Half(*p);
              i2 += 4;
              p++;
              //              count++;
@@ -206,7 +206,6 @@ class CLImage {
        &err);

    if (err != CL_SUCCESS) {
-      // TODO(HaiPeng): error handling
      CL_CHECK_ERRORS(err);
      PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error ");
    }

--- a/src/operators/kernel/cl/feed_kernel.cpp
+++ b/src/operators/kernel/cl/feed_kernel.cpp
@@ -29,23 +29,40 @@ template <>
 void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
  auto kernel = this->cl_helper_.KernelAt(0);
  cl_int status;
+  DLOG << " feed 0";
  auto output = param.Out();
+  DLOG << " feed 1";
  const Tensor *input = param.InputX();
+  DLOG << " feed 2";
  const float *input_data = nullptr;
+  DLOG << " feed 3";
  input_data = input->data<float>();
+  DLOG << " feed 4";

  cl_mem cl_image = output->GetCLImage();
+  DLOG << " feed 5";
+
  int height = output->dims()[2];
  int width = output->dims()[3];
+
  DLOG << output->dims();
  status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_data);
+  CL_CHECK_ERRORS(status);
+
  status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cl_image);
+  CL_CHECK_ERRORS(status);
+
  status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &width);
+  CL_CHECK_ERRORS(status);
+
  status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &height);
+  CL_CHECK_ERRORS(status);

  size_t global_work_size[2] = {height, width};
-  clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL,
+  status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL,
                         global_work_size, NULL, 0, NULL, NULL);
+  CL_CHECK_ERRORS(status);
+
 }

 template class FeedKernel<GPU_CL, float>;