Fix buffer to image bug for half type and refactor some cl apis.

5d5d06c2 · liuqi · 3272a989 · 5d5d06c2 · 5d5d06c2 · 5d5d06c2
13 changed file
--- a/mace/kernels/opencl/addn.cc
+++ b/mace/kernels/opencl/addn.cc
@@ -17,7 +17,7 @@ static void Add2(const Tensor *input0, const Tensor *input1, Tensor *output) {
  auto runtime = OpenCLRuntime::Get();
  std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(output->dtype()));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(output->dtype()));
  auto addn_kernel = runtime->BuildKernel("addn", "add2", built_options);
  const uint32_t lws = runtime->GetKernelMaxWorkGroupSize(addn_kernel);

--- a/mace/kernels/opencl/batch_norm_opencl.cc
+++ b/mace/kernels/opencl/batch_norm_opencl.cc
@@ -30,7 +30,7 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()(
  auto runtime = OpenCLRuntime::Get();
  std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(input->dtype()));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
  auto bm_kernel = runtime->BuildKernel("batch_norm", "batch_norm", built_options);
  const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(bm_kernel);

--- a/mace/kernels/opencl/buffer_to_image.cc
+++ b/mace/kernels/opencl/buffer_to_image.cc
@@ -24,8 +24,13 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
  }
  std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(DataTypeToEnum<T>::value));
+  if (buffer->dtype() == image->dtype()) {
-  built_options.emplace("-DCMD_DATA_TYPE=" + DataTypeToOPENCLCMDDataType(DataTypeToEnum<T>::value));
+    built_options.emplace("-DDATA_TYPE=" + DtToCLDt(DataTypeToEnum<T>::value));
+    built_options.emplace("-DCMD_DATA_TYPE=" + DtToCLCMDDt(DataTypeToEnum<T>::value));
+  } else {
+    built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(DataTypeToEnum<T>::value));
+    built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(DataTypeToEnum<T>::value));
+  }
  auto runtime = OpenCLRuntime::Get();
  string kernel_name;
  switch (type) {

--- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc
+++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc
@@ -34,8 +34,8 @@ void Conv1x1(const Tensor *input,
  MACE_CHECK(input_batch == batch);
  std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(dt));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
-  built_options.emplace("-DCMD_DATA_TYPE=" + DataTypeToOPENCLCMDDataType(dt));
+  built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));
  built_options.emplace("-DSTRIDE=" + ToString(stride));
  if (bias != nullptr) {
    built_options.emplace("-DBIAS");

--- a/mace/kernels/opencl/conv_2d_opencl_3x3.cc
+++ b/mace/kernels/opencl/conv_2d_opencl_3x3.cc
@@ -26,8 +26,8 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter,
  const index_t width_blocks = RoundUpDiv<index_t, 5>(width);
  std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(dt));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
-  built_options.emplace("-DCMD_DATA_TYPE=" + DataTypeToOPENCLCMDDataType(dt));
+  built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));
  built_options.emplace(bias != nullptr ? "-DBIAS" : "");
  built_options.emplace("-DSTRIDE=" + ToString(stride));
  if (fused_relu) {

--- a/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc
+++ b/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc
@@ -32,7 +32,7 @@ static void InnerDepthwiseConvOpenclK3x3S12(const Tensor *input,
  auto runtime = OpenCLRuntime::Get();
  std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(input->dtype()));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
  built_options.emplace(stride == 1 ? "-DSTRIDE_1" : "");
  built_options.emplace(bias != nullptr ? "-DBIAS" : "");
  auto conv_kernel  = runtime->BuildKernel("depthwise_conv_3x3", "depthwise_conv_3x3", built_options);

--- a/mace/kernels/opencl/helper.cc
+++ b/mace/kernels/opencl/helper.cc
@@ -54,34 +54,42 @@ void CalImage2DShape(const std::vector<index_t> &shape, /* NHWC */
 }
-std::string DataTypeToCLType(const DataType dt) {
+std::string DtToCLDt(const DataType dt) {
+  switch (dt) {
+    case DT_FLOAT:
+      return "float";
+    case DT_HALF:
+      return "half";
+    default:
+      LOG(FATAL) << "Unsupported data type";
+      return "";
+  }
+}
+std::string DtToCLCMDDt(const DataType dt) {
+  switch (dt) {
+    case DT_FLOAT:
+      return "f";
+    case DT_HALF:
+      return "h";
+    default:
+      LOG(FATAL) << "Not supported data type for opencl cmd data type";
+      return "";
+  }
+}
+std::string DtToUpstreamCLDt(const DataType dt) {
  switch (dt) {
    case DT_FLOAT:
    case DT_HALF:
      return "float";
-    case DT_UINT8:
-      return "uchar";
-    case DT_INT8:
-      return "char";
-    case DT_DOUBLE:
-      return "double";
-    case DT_INT32:
-      return "int";
-    case DT_UINT32:
-      return "int";
-    case DT_UINT16:
-      return "ushort";
-    case DT_INT16:
-      return "short";
-    case DT_INT64:
-      return "long";
    default:
      LOG(FATAL) << "Unsupported data type";
      return "";
  }
 }
-std::string DataTypeToOPENCLCMDDataType(const DataType dt) {
+std::string DtToUpstreamCLCMDDt(const DataType dt) {
  switch (dt) {
    case DT_FLOAT:
    case DT_HALF:

--- a/mace/kernels/opencl/helper.h
+++ b/mace/kernels/opencl/helper.h
@@ -19,10 +19,13 @@ void CalImage2DShape(const std::vector<index_t> &shape, /* NHWC */
                     const BufferType type,
                     std::vector<size_t> &image_shape);
-std::string DataTypeToOPENCLCMDDataType(const DataType dt);
+std::string DtToCLCMDDt(const DataType dt);
-std::string DataTypeToCLType(const DataType dt);
+std::string DtToUpstreamCLCMDDt(const DataType dt);
+std::string DtToCLDt(const DataType dt);
+std::string DtToUpstreamCLDt(const DataType dt);
 }  // namespace kernels
 } //  namespace mace

--- a/mace/kernels/opencl/pooling_opencl.cc
+++ b/mace/kernels/opencl/pooling_opencl.cc
@@ -32,7 +32,7 @@ static void Pooling3(const Tensor *input,
  auto runtime = OpenCLRuntime::Get();
  std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(input->dtype()));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
  built_options.emplace(stride[0] == 1 ? "-DSTRIDE_1" : "");
  auto pooling_kernel  = runtime->BuildKernel("pooling", "pooling3", built_options);
@@ -80,7 +80,7 @@ static void PoolingN(const Tensor *input,
  auto runtime = OpenCLRuntime::Get();
  std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(input->dtype()));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
  auto pooling_kernel  = runtime->BuildKernel("pooling", "poolingn", built_options);
  const uint32_t lws[3] = {1, 8, 128};

--- a/mace/kernels/opencl/relu_opencl.cc
+++ b/mace/kernels/opencl/relu_opencl.cc
@@ -23,7 +23,7 @@ void ReluFunctor<DeviceType::OPENCL, float>::operator()(const Tensor *input,
  auto program = runtime->program();
  std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(input->dtype()));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
  if (max_limit_ < 0) {
    auto relu_kernel  = runtime->BuildKernel("relu", "relu", built_options);
    const uint32_t lws = runtime->GetKernelMaxWorkGroupSize(relu_kernel);

--- a/mace/kernels/opencl/resize_bilinear_opencl.cc
+++ b/mace/kernels/opencl/resize_bilinear_opencl.cc
@@ -41,8 +41,8 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
  auto runtime = OpenCLRuntime::Get();
  std::set<std::string> built_options;
  auto dt = DataTypeToEnum<T>::value;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(dt));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
-  built_options.emplace("-DCMD_DATA_TYPE=" + DataTypeToOPENCLCMDDataType(dt));
+  built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));
  auto rb_kernel  = runtime->BuildKernel("resize_bilinear", "resize_bilinear_nocache", built_options);
  const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(rb_kernel);

--- a/mace/kernels/opencl/space_to_batch_opecl.cc
+++ b/mace/kernels/opencl/space_to_batch_opecl.cc
@@ -20,7 +20,7 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, float>::operator()(Tensor *space_te
                                                                Tensor *batch_tensor) {
  auto runtime = OpenCLRuntime::Get();
  std::set<std::string> built_options;
-  built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(space_tensor->dtype()));
+  built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(space_tensor->dtype()));
  auto s2b_kernel = runtime->BuildKernel("space_to_batch", "space_to_batch", built_options);
  uint32_t idx = 0;

--- a/mace/ops/buffer_to_image_test.cc
+++ b/mace/ops/buffer_to_image_test.cc
@@ -43,7 +43,7 @@ TEST(BufferToImageTest, ArgSmall) {
 }
 TEST(BufferToImageTest, ArgHalfSmall) {
-  TestBidirectionTransform<DeviceType::OPENCL, half>(kernels::ARGUMENT, {1});
+  TestBidirectionTransform<DeviceType::OPENCL, half>(kernels::ARGUMENT, {11});
 }
 TEST(BufferToImageTest, ArgMedia) {
@@ -97,3 +97,37 @@ TEST(BufferToImageTest, Filter3x3Meida) {
 TEST(BufferToImageTest, Filter3x3Large) {
  TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::FILTER, {3, 3, 128, 256});
 }
+template<DeviceType D, typename T>
+void TestDiffTypeBidirectionTransform(const int type, const std::vector<index_t> &input_shape) {
+  OpsTestNet net;
+  OpDefBuilder("BufferToImage", "BufferToImageTest")
+      .Input("Input")
+      .Output("B2IOutput")
+      .AddIntArg("buffer_type", type)
+      .AddIntArg("T", DataTypeToEnum<T>::value)
+      .Finalize(net.NewOperatorDef());
+  // Add input data
+  net.AddRandomInput<D, float>("Input", input_shape);
+  // Run
+  net.RunOp(D);
+  OpDefBuilder("ImageToBuffer", "ImageToBufferTest")
+      .Input("B2IOutput")
+      .Output("I2BOutput")
+      .AddIntArg("buffer_type", type)
+      .AddIntArg("T", DataTypeToEnum<T>::value)
+      .Finalize(net.NewOperatorDef());
+  // Run
+  net.RunOp(D);
+  // Check
+  ExpectTensorNear<float, T>(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"), 1e-2);
+}
+TEST(BufferToImageTest, ArgFloatToHalfSmall) {
+  TestDiffTypeBidirectionTransform<DeviceType::OPENCL, half>(kernels::ARGUMENT, {11});
+}