From 5d5d06c2dbae27cc6a9e5db20ffcedfe82e96abf Mon Sep 17 00:00:00 2001
From: liuqi <liuqi10@xiaomi.com>
Date: Sat, 2 Dec 2017 11:18:20 +0800
Subject: [PATCH] Fix buffer to image bug for half type and refactor some cl
 apis.

---
 mace/kernels/opencl/addn.cc                   |  2 +-
 mace/kernels/opencl/batch_norm_opencl.cc      |  2 +-
 mace/kernels/opencl/buffer_to_image.cc        |  9 +++-
 mace/kernels/opencl/conv_2d_opencl_1x1.cc     |  4 +-
 mace/kernels/opencl/conv_2d_opencl_3x3.cc     |  4 +-
 .../opencl/depthwise_conv_opencl_3x3.cc       |  2 +-
 mace/kernels/opencl/helper.cc                 | 44 +++++++++++--------
 mace/kernels/opencl/helper.h                  |  7 ++-
 mace/kernels/opencl/pooling_opencl.cc         |  4 +-
 mace/kernels/opencl/relu_opencl.cc            |  2 +-
 mace/kernels/opencl/resize_bilinear_opencl.cc |  4 +-
 mace/kernels/opencl/space_to_batch_opecl.cc   |  2 +-
 mace/ops/buffer_to_image_test.cc              | 36 ++++++++++++++-
 13 files changed, 86 insertions(+), 36 deletions(-)
diff --git a/mace/kernels/opencl/addn.cc b/mace/kernels/opencl/addn.cc
index 6c5106db..9f1ed60a 100644
--- a/mace/kernels/opencl/addn.cc
+++ b/mace/kernels/opencl/addn.cc
@@ -17,7 +17,7 @@ static void Add2(const Tensor *input0, const Tensor *input1, Tensor *output) {
 
   auto runtime = OpenCLRuntime::Get();
   std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(output->dtype()));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(output->dtype()));
   auto addn_kernel = runtime->BuildKernel("addn", "add2", built_options);
 
   const uint32_t lws = runtime->GetKernelMaxWorkGroupSize(addn_kernel);
diff --git a/mace/kernels/opencl/batch_norm_opencl.cc b/mace/kernels/opencl/batch_norm_opencl.cc
index c7cd37e3..8b6804dc 100644
--- a/mace/kernels/opencl/batch_norm_opencl.cc
+++ b/mace/kernels/opencl/batch_norm_opencl.cc
@@ -30,7 +30,7 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()(
 
   auto runtime = OpenCLRuntime::Get();
   std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(input->dtype()));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
   auto bm_kernel = runtime->BuildKernel("batch_norm", "batch_norm", built_options);
 
   const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(bm_kernel);
diff --git a/mace/kernels/opencl/buffer_to_image.cc b/mace/kernels/opencl/buffer_to_image.cc
index 61faa995..f3af3d22 100644
--- a/mace/kernels/opencl/buffer_to_image.cc
+++ b/mace/kernels/opencl/buffer_to_image.cc
@@ -24,8 +24,13 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
   }
 
   std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(DataTypeToEnum<T>::value));
-  built_options.emplace("-DCMD_DATA_TYPE=" + DataTypeToOPENCLCMDDataType(DataTypeToEnum<T>::value));
+  if (buffer->dtype() == image->dtype()) {
+    built_options.emplace("-DDATA_TYPE=" + DtToCLDt(DataTypeToEnum<T>::value));
+    built_options.emplace("-DCMD_DATA_TYPE=" + DtToCLCMDDt(DataTypeToEnum<T>::value));
+  } else {
+    built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(DataTypeToEnum<T>::value));
+    built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(DataTypeToEnum<T>::value));
+  }
   auto runtime = OpenCLRuntime::Get();
   string kernel_name;
   switch (type) {
diff --git a/mace/kernels/opencl/conv_2d_opencl_1x1.cc b/mace/kernels/opencl/conv_2d_opencl_1x1.cc
index c3a17c7b..d759689c 100644
--- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc
+++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc
@@ -34,8 +34,8 @@ void Conv1x1(const Tensor *input,
   MACE_CHECK(input_batch == batch);
 
   std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(dt));
-  built_options.emplace("-DCMD_DATA_TYPE=" + DataTypeToOPENCLCMDDataType(dt));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
+  built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));
   built_options.emplace("-DSTRIDE=" + ToString(stride));
   if (bias != nullptr) {
     built_options.emplace("-DBIAS");
diff --git a/mace/kernels/opencl/conv_2d_opencl_3x3.cc b/mace/kernels/opencl/conv_2d_opencl_3x3.cc
index e29c4d92..24bf90a1 100644
--- a/mace/kernels/opencl/conv_2d_opencl_3x3.cc
+++ b/mace/kernels/opencl/conv_2d_opencl_3x3.cc
@@ -26,8 +26,8 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter,
   const index_t width_blocks = RoundUpDiv<index_t, 5>(width);
 
   std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(dt));
-  built_options.emplace("-DCMD_DATA_TYPE=" + DataTypeToOPENCLCMDDataType(dt));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
+  built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));
   built_options.emplace(bias != nullptr ? "-DBIAS" : "");
   built_options.emplace("-DSTRIDE=" + ToString(stride));
   if (fused_relu) {
diff --git a/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc b/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc
index 60ce2a82..1402131d 100644
--- a/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc
+++ b/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc
@@ -32,7 +32,7 @@ static void InnerDepthwiseConvOpenclK3x3S12(const Tensor *input,
 
   auto runtime = OpenCLRuntime::Get();
   std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(input->dtype()));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
   built_options.emplace(stride == 1 ? "-DSTRIDE_1" : "");
   built_options.emplace(bias != nullptr ? "-DBIAS" : "");
   auto conv_kernel  = runtime->BuildKernel("depthwise_conv_3x3", "depthwise_conv_3x3", built_options);
diff --git a/mace/kernels/opencl/helper.cc b/mace/kernels/opencl/helper.cc
index 4f4d1c56..2c1dc264 100644
--- a/mace/kernels/opencl/helper.cc
+++ b/mace/kernels/opencl/helper.cc
@@ -54,34 +54,42 @@ void CalImage2DShape(const std::vector<index_t> &shape, /* NHWC */
 }
 
 
-std::string DataTypeToCLType(const DataType dt) {
+std::string DtToCLDt(const DataType dt) {
+  switch (dt) {
+    case DT_FLOAT:
+      return "float";
+    case DT_HALF:
+      return "half";
+    default:
+      LOG(FATAL) << "Unsupported data type";
+      return "";
+  }
+}
+
+std::string DtToCLCMDDt(const DataType dt) {
+  switch (dt) {
+    case DT_FLOAT:
+      return "f";
+    case DT_HALF:
+      return "h";
+    default:
+      LOG(FATAL) << "Not supported data type for opencl cmd data type";
+      return "";
+  }
+}
+
+std::string DtToUpstreamCLDt(const DataType dt) {
   switch (dt) {
     case DT_FLOAT:
     case DT_HALF:
       return "float";
-    case DT_UINT8:
-      return "uchar";
-    case DT_INT8:
-      return "char";
-    case DT_DOUBLE:
-      return "double";
-    case DT_INT32:
-      return "int";
-    case DT_UINT32:
-      return "int";
-    case DT_UINT16:
-      return "ushort";
-    case DT_INT16:
-      return "short";
-    case DT_INT64:
-      return "long";
     default:
       LOG(FATAL) << "Unsupported data type";
       return "";
   }
 }
 
-std::string DataTypeToOPENCLCMDDataType(const DataType dt) {
+std::string DtToUpstreamCLCMDDt(const DataType dt) {
   switch (dt) {
     case DT_FLOAT:
     case DT_HALF:
diff --git a/mace/kernels/opencl/helper.h b/mace/kernels/opencl/helper.h
index 1ad94aa5..70d74e58 100644
--- a/mace/kernels/opencl/helper.h
+++ b/mace/kernels/opencl/helper.h
@@ -19,10 +19,13 @@ void CalImage2DShape(const std::vector<index_t> &shape, /* NHWC */
                      const BufferType type,
                      std::vector<size_t> &image_shape);
 
-std::string DataTypeToOPENCLCMDDataType(const DataType dt);
+std::string DtToCLCMDDt(const DataType dt);
 
-std::string DataTypeToCLType(const DataType dt);
+std::string DtToUpstreamCLCMDDt(const DataType dt);
 
+std::string DtToCLDt(const DataType dt);
+
+std::string DtToUpstreamCLDt(const DataType dt);
 
 }  // namespace kernels
 } //  namespace mace
diff --git a/mace/kernels/opencl/pooling_opencl.cc b/mace/kernels/opencl/pooling_opencl.cc
index 0aaa89ae..fb9216f7 100644
--- a/mace/kernels/opencl/pooling_opencl.cc
+++ b/mace/kernels/opencl/pooling_opencl.cc
@@ -32,7 +32,7 @@ static void Pooling3(const Tensor *input,
 
   auto runtime = OpenCLRuntime::Get();
   std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(input->dtype()));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
   built_options.emplace(stride[0] == 1 ? "-DSTRIDE_1" : "");
   auto pooling_kernel  = runtime->BuildKernel("pooling", "pooling3", built_options);
 
@@ -80,7 +80,7 @@ static void PoolingN(const Tensor *input,
 
   auto runtime = OpenCLRuntime::Get();
   std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(input->dtype()));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
   auto pooling_kernel  = runtime->BuildKernel("pooling", "poolingn", built_options);
 
   const uint32_t lws[3] = {1, 8, 128};
diff --git a/mace/kernels/opencl/relu_opencl.cc b/mace/kernels/opencl/relu_opencl.cc
index 1149b965..e7f527a5 100644
--- a/mace/kernels/opencl/relu_opencl.cc
+++ b/mace/kernels/opencl/relu_opencl.cc
@@ -23,7 +23,7 @@ void ReluFunctor<DeviceType::OPENCL, float>::operator()(const Tensor *input,
   auto program = runtime->program();
 
   std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(input->dtype()));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
   if (max_limit_ < 0) {
     auto relu_kernel  = runtime->BuildKernel("relu", "relu", built_options);
     const uint32_t lws = runtime->GetKernelMaxWorkGroupSize(relu_kernel);
diff --git a/mace/kernels/opencl/resize_bilinear_opencl.cc b/mace/kernels/opencl/resize_bilinear_opencl.cc
index 67d74ed3..27dd8e62 100644
--- a/mace/kernels/opencl/resize_bilinear_opencl.cc
+++ b/mace/kernels/opencl/resize_bilinear_opencl.cc
@@ -41,8 +41,8 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
   auto runtime = OpenCLRuntime::Get();
   std::set<std::string> built_options;
   auto dt = DataTypeToEnum<T>::value;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(dt));
-  built_options.emplace("-DCMD_DATA_TYPE=" + DataTypeToOPENCLCMDDataType(dt));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
+  built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));
   auto rb_kernel  = runtime->BuildKernel("resize_bilinear", "resize_bilinear_nocache", built_options);
 
   const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(rb_kernel);
diff --git a/mace/kernels/opencl/space_to_batch_opecl.cc b/mace/kernels/opencl/space_to_batch_opecl.cc
index 2716501c..72590be5 100644
--- a/mace/kernels/opencl/space_to_batch_opecl.cc
+++ b/mace/kernels/opencl/space_to_batch_opecl.cc
@@ -20,7 +20,7 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, float>::operator()(Tensor *space_te
                                                                 Tensor *batch_tensor) {
   auto runtime = OpenCLRuntime::Get();
   std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(space_tensor->dtype()));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(space_tensor->dtype()));
   auto s2b_kernel = runtime->BuildKernel("space_to_batch", "space_to_batch", built_options);
 
   uint32_t idx = 0;
diff --git a/mace/ops/buffer_to_image_test.cc b/mace/ops/buffer_to_image_test.cc
index 7bd667ca..3836a7ae 100644
--- a/mace/ops/buffer_to_image_test.cc
+++ b/mace/ops/buffer_to_image_test.cc
@@ -43,7 +43,7 @@ TEST(BufferToImageTest, ArgSmall) {
 }
 
 TEST(BufferToImageTest, ArgHalfSmall) {
-  TestBidirectionTransform<DeviceType::OPENCL, half>(kernels::ARGUMENT, {1});
+  TestBidirectionTransform<DeviceType::OPENCL, half>(kernels::ARGUMENT, {11});
 }
 
 TEST(BufferToImageTest, ArgMedia) {
@@ -97,3 +97,37 @@ TEST(BufferToImageTest, Filter3x3Meida) {
 TEST(BufferToImageTest, Filter3x3Large) {
   TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::FILTER, {3, 3, 128, 256});
 }
+
+template<DeviceType D, typename T>
+void TestDiffTypeBidirectionTransform(const int type, const std::vector<index_t> &input_shape) {
+  OpsTestNet net;
+  OpDefBuilder("BufferToImage", "BufferToImageTest")
+      .Input("Input")
+      .Output("B2IOutput")
+      .AddIntArg("buffer_type", type)
+      .AddIntArg("T", DataTypeToEnum<T>::value)
+      .Finalize(net.NewOperatorDef());
+
+  // Add input data
+  net.AddRandomInput<D, float>("Input", input_shape);
+
+  // Run
+  net.RunOp(D);
+
+  OpDefBuilder("ImageToBuffer", "ImageToBufferTest")
+      .Input("B2IOutput")
+      .Output("I2BOutput")
+      .AddIntArg("buffer_type", type)
+      .AddIntArg("T", DataTypeToEnum<T>::value)
+      .Finalize(net.NewOperatorDef());
+
+  // Run
+  net.RunOp(D);
+
+  // Check
+  ExpectTensorNear<float, T>(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"), 1e-2);
+}
+
+TEST(BufferToImageTest, ArgFloatToHalfSmall) {
+  TestDiffTypeBidirectionTransform<DeviceType::OPENCL, half>(kernels::ARGUMENT, {11});
+}
-- 
GitLab