imp CLImage printer function

cec396b3 · yangfei · e9c54cbb · cec396b3 · cec396b3 · cec396b3
5 changed file
--- a/src/framework/cl/cl_image.cpp
+++ b/src/framework/cl/cl_image.cpp
@@ -124,51 +124,88 @@ Print &operator<<(Print &printer, const CLImage &cl_image) {
  stride = stride > 0 ? stride : 1;
  float *data = new float[cl_image.numel()];
  DDim ddim = cl_image.dims();
-  size_t N, C, H, W;
-  if (ddim.size() == 4) {
-    N = ddim[0];
-    if (N < 0) {
+  size_t N, C, H, W, width, height;
+  if (cl_image.GetImageType() == Normal) {
+    if (ddim.size() == 4) {
+      N = ddim[0];
+      if (N < 0) {
+        N = 1;
+      }
+      C = ddim[1];
+      H = ddim[2];
+      W = ddim[3];
+      width = W * ((C + 3) / 4);
+      height = N * H;
+    } else if (ddim.size() == 2) {
+      width = ddim[1];
+      height = ddim[0];
+      N = 1;
+      C = 1;
+      H = ddim[0];
+      W = ddim[1];
+    } else if (ddim.size() == 1) {
+      width = ddim[0];
+      height = 1;
      N = 1;
+      C = 1;
+      H = 1;
+      W = ddim[0];
+    }
+    float *p = data;
+    half imageData[width * height * 4];
+    cl_int err;
+    cl_mem image = cl_image.GetCLImage();
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {width, height, 1};
+    err = clEnqueueReadImage(cl_image.CommandQueue(), image, CL_TRUE, origin,
+                             region, 0, 0, imageData, 0, NULL, NULL);
+    size_t i0 = 0;
+    for (int n = 0; n < N; n++) {
+      for (int c = 0; c < C; c++) {
+        size_t i1 = i0 + (c / 4) * W;
+        for (int h = 0; h < H; h++) {
+          size_t i2 = (i1 << 2) + c % 4;
+          for (int w = 0; w < W; w++) {
+            *p = Half2Float(imageData[i2]);
+            i2 += 4;
+            p++;
+          }
+          i1 += width;
+        }
+      }
+      i0 += width * H;
    }
-    C = ddim[1];
-    H = ddim[2];
-    W = ddim[3];
-  } else if (ddim.size() == 1) {
-    N = 1;
-    C = ddim[0];
-    H = 1;
-    W = 1;
-  }

-  size_t width = W * ((C + 3) / 4);
-  size_t height = H * N;
+    CL_CHECK_ERRORS(err);
+  } else {
+    if (ddim.size() == 2) {
+      width = (ddim[1] + 3) / 4;
+      height = ddim[0];
+      H = ddim[0];
+      W = ddim[1];

-  float *p = data;
-  half imageData[width * height * 4];
-  cl_int err;
-  cl_mem image = cl_image.GetCLImage();
-  size_t origin[3] = {0, 0, 0};
-  size_t region[3] = {width, height, 1};
-  err = clEnqueueReadImage(cl_image.CommandQueue(), image, CL_TRUE, origin,
-                           region, 0, 0, imageData, 0, NULL, NULL);
-  size_t i0 = 0;
-  for (int n = 0; n < N; n++) {
-    for (int c = 0; c < C; c++) {
-      size_t i1 = i0;
-      for (int h = 0; h < H; h++) {
-        size_t i2 = (i1 << 2) + c % 4;
-        for (int w = 0; w < W; w++) {
-          *p = Half2Float(imageData[i2]);
-          i2 += 4;
-          p++;
-        }
-        i1 += width;
+    } else if (ddim.size() == 1) {
+      width = (ddim[0] + 3) / 4;
+      height = 1;
+      H = 1;
+      W = ddim[0];
+    }
+    float *p = data;
+    half imageData[width * height * 4];
+    cl_int err;
+    cl_mem image = cl_image.GetCLImage();
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {width, height, 1};
+    err = clEnqueueReadImage(cl_image.CommandQueue(), image, CL_TRUE, origin,
+                             region, 0, 0, imageData, 0, NULL, NULL);
+    for (int h = 0; h < H; h++) {
+      for (int w = 0; w < W; w++) {
+        p[h * W + w] = Half2Float(imageData[(h * width + w / 4) * 4 + (w % 4)]);
      }
    }
-    i0 += width * H;
-  }

-  CL_CHECK_ERRORS(err);
+    CL_CHECK_ERRORS(err);
+  }

  for (int i = 0; i < cl_image.numel(); i += stride) {
    printer << data[i] << " ";

--- a/src/framework/cl/cl_image.h
+++ b/src/framework/cl/cl_image.h
@@ -26,6 +26,8 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace framework {

+enum ImageType { Normal, Folder };
+
 class CLImage {
 public:
  CLImage() = default;
@@ -60,6 +62,19 @@ class CLImage {
    initialized_ = true;
  }

+  /*
+   * need call SetTensorData first
+   * */
+  void InitCLImageNormal(cl_context context, cl_command_queue command_queue) {
+    if (tensor_data_ == nullptr) {
+      PADDLE_MOBILE_THROW_EXCEPTION(" need call SetTensorData first");
+    }
+    InitCLImage(context, command_queue, tensor_data_, tensor_dims_);
+    delete[](tensor_data_);
+    tensor_data_ = nullptr;
+    initialized_ = true;
+  }
+
  void InitEmptyImage(cl_context context, cl_command_queue command_queue,
                      const DDim &dim) {
    if (tensor_data_ != nullptr) {
@@ -124,9 +139,13 @@ class CLImage {
   * */
  const DDim &dims() const { return tensor_dims_; }

+  const ImageType GetImageType() const { type; }
+
 private:
+  ImageType type;
  void InitCLImage2C(cl_context context, cl_command_queue command_queue,
                     float *tensor_data, const DDim &dim) {
+    type = Folder;
    command_queue_ = command_queue;
    assert(dim.size() <= 2);
    int tdim[2] = {1, 1};
@@ -136,7 +155,7 @@ class CLImage {
      tdim[0] = dim[0];
      tdim[1] = dim[1];
    }
-    int width = tdim[1] + 3 / 4;
+    int width = (tdim[1] + 3) / 4;
    int height = tdim[0];
    std::unique_ptr<half_t[]> imageData{};
    if (tensor_data) {
@@ -181,6 +200,7 @@ class CLImage {
  }
  void InitCLImage(cl_context context, cl_command_queue command_queue,
                   float *tensor_data, const DDim &dim) {
+    type = Normal;
    DLOG << " tensor dim: " << dim;
    // NCHW -> [W * (C+3)/4, H * N]
    tensor_dims_ = dim;
@@ -224,7 +244,8 @@ class CLImage {
          for (int h = 0; h < H; h++) {
            size_t i2 = (i1 << 2) + c % 4;
            for (int w = 0; w < W; w++) {
-              // int x = (n * width * H + h * width + (c / 4) * W + w) * 4 + (c % 4);
+              // int x = (n * width * H + h * width + (c / 4) * W + w) * 4 + (c
+              // % 4);
              imageData[i2] = Float2Half(*p);
              i2 += 4;
              p++;

--- a/src/operators/kernel/cl/cl_kernel/channel_add_kernel.cl
+++ b/src/operators/kernel/cl/cl_kernel/channel_add_kernel.cl
@@ -21,7 +21,7 @@ __kernel void channel_add(__global image2d_t input, __global image2d_t bias,__wr
     coords.y = y;
     int2 coords_bias;
     coords_bias.x = x/w;
-     coords_bias.y = 1;
+     coords_bias.y = 0;
     half4 in = read_imageh(input, sampler, coords);
     half4 biase = read_imageh(bias, sampler, coords_bias);
     half4 output = in + biase;

--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -1235,7 +1235,7 @@ class ReluParamBase : public OpParam {

 public:
  ReluParamBase(const VariableNameMap &inputs, const VariableNameMap &outputs,
-            const AttributeMap &attrs, const Scope &scope) {
+                const AttributeMap &attrs, const Scope &scope) {
    input_x_ = InputXFrom<GType>(inputs, scope);
    out_ = OutFrom<GType>(outputs, scope);
  }
@@ -1251,18 +1251,17 @@ class ReluParamBase : public OpParam {

 template <typename Dtype>
 class ReluParam : public ReluParamBase<Dtype> {
-public:
+ public:
  using ReluParamBase<Dtype>::ReluParamBase;
 };

 template <>
 class ReluParam<GPU_CL> : public ReluParamBase<GPU_CL> {
-public:
+ public:
  using ReluParamBase<GPU_CL>::ReluParamBase;
-  framework::CLImage& getMidImage() {
-    return midImage;
-  }
-private:
+  framework::CLImage &getMidImage() { return midImage; }
+
+ private:
  framework::CLImage midImage;
 };


--- a/test/net/test_mobilenet_GPU.cpp
+++ b/test/net/test_mobilenet_GPU.cpp
@@ -23,7 +23,7 @@ int main() {
  //  auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
  //                     std::string(g_mobilenet_detect) + "/params", true);

-  auto isok = paddle_mobile.Load(g_mobilenet, true);
+  auto isok = paddle_mobile.Load(g_mobilenet, false);
  if (isok) {
    auto time2 = paddle_mobile::time();
    std::cout << "load cost :" << paddle_mobile::time_diff(time1, time1) << "ms"