Merge branch 'fix_half_fault_in_batch_norm' into 'master'

fix half type error in batch_norm opencl op, add half type test in batch_norm See merge request !151

Merge branch 'fix_half_fault_in_batch_norm' into 'master'
fix half type error in batch_norm opencl op, add half type test in batch_norm See merge request !151
1635046e · 刘琦 · 509d29ec · 982aba93 · 1635046e · 1635046e
Showing with 135 addition and 3 deletion

mace/kernels/batch_norm.h mace/kernels/batch_norm.h +2 -2

mace/kernels/opencl/cl/batch_norm.cl mace/kernels/opencl/cl/batch_norm.cl +1 -1

mace/ops/batch_norm_test.cc mace/ops/batch_norm_test.cc +132 -0

未找到文件。
--- a/mace/kernels/batch_norm.h
+++ b/mace/kernels/batch_norm.h
@@ -13,7 +13,7 @@ namespace kernels {

 template <DeviceType D, typename T>
 struct BatchNormFunctor {
-  T epsilon_;
+  float epsilon_;

  void operator()(const Tensor *input,
                  const Tensor *scale,
@@ -84,7 +84,7 @@ void BatchNormFunctor<DeviceType::NEON, float>::operator()(

 template <typename T>
 struct BatchNormFunctor<DeviceType::OPENCL, T> {
-  T epsilon_;
+  float epsilon_;

  void operator()(const Tensor *input,
                  const Tensor *scale,

--- a/mace/kernels/opencl/cl/batch_norm.cl
+++ b/mace/kernels/opencl/cl/batch_norm.cl
@@ -5,7 +5,7 @@ __kernel void batch_norm(__read_only image2d_t input,
                         __read_only image2d_t offset,
                         __read_only image2d_t mean,
                         __read_only image2d_t var,
-                         __private const DATA_TYPE epsilon,
+                         __private const float epsilon,
                         __write_only image2d_t output) {
  const int ch_blk = get_global_id(0);
  const int w = get_global_id(1);

--- a/mace/ops/batch_norm_test.cc
+++ b/mace/ops/batch_norm_test.cc
@@ -227,6 +227,72 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
  ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
 }

+TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
+  srand(time(NULL));
+
+  // generate random input
+  index_t batch = 1 + rand() % 10;
+  index_t channels = 3 + rand() % 50;
+  index_t height = 64;
+  index_t width = 64;
+
+  // Construct graph
+  auto &net = test_net();
+  OpDefBuilder("BatchNorm", "BatchNormTest")
+      .Input("Input")
+      .Input("Scale")
+      .Input("Offset")
+      .Input("Mean")
+      .Input("Var")
+      .AddFloatArg("epsilon", 1e-3)
+      .Output("Output")
+      .Finalize(net.NewOperatorDef());
+
+  // Add input data
+  net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, height, width, channels});
+  net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
+  net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
+  net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels});
+  net.AddRandomInput<DeviceType::OPENCL, float>("Var", {channels}, true);
+
+  // run cpu
+  net.RunOp();
+
+  // Check
+  Tensor expected;
+  expected.Copy(*net.GetOutput("Output"));
+
+  // Run on opencl
+  BufferToImage<DeviceType::OPENCL, half>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
+  BufferToImage<DeviceType::OPENCL, half>(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT);
+  BufferToImage<DeviceType::OPENCL, half>(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT);
+  BufferToImage<DeviceType::OPENCL, half>(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT);
+  BufferToImage<DeviceType::OPENCL, half>(net, "Var", "VarImage", kernels::BufferType::ARGUMENT);
+
+  OpDefBuilder("BatchNorm", "BatchNormTest")
+      .Input("InputImage")
+      .Input("ScaleImage")
+      .Input("OffsetImage")
+      .Input("MeanImage")
+      .Input("VarImage")
+      .AddFloatArg("epsilon", 1e-3)
+      .Output("OutputImage")
+      .AddIntArg("T", static_cast<int>(DataType::DT_HALF))
+      .Finalize(net.NewOperatorDef());
+
+  // Tuning
+  setenv("MACE_TUNING", "1", 1);
+  net.RunOp(DeviceType::OPENCL);
+  unsetenv("MACE_TUNING");
+
+  // Run on opencl
+  net.RunOp(DeviceType::OPENCL);
+  net.Sync();
+
+  ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
+  ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5);
+}
+
 TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
  srand(time(NULL));

@@ -293,4 +359,70 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
  ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
 }

+TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
+  srand(time(NULL));
+
+  // generate random input
+  index_t batch = 1 + rand() % 10;
+  index_t channels = 3 + rand() % 50;
+  index_t height = 103;
+  index_t width = 113;
+
+  // Construct graph
+  auto &net = test_net();
+  OpDefBuilder("BatchNorm", "BatchNormTest")
+      .Input("Input")
+      .Input("Scale")
+      .Input("Offset")
+      .Input("Mean")
+      .Input("Var")
+      .AddFloatArg("epsilon", 1e-3)
+      .Output("Output")
+      .Finalize(net.NewOperatorDef());
+
+  // Add input data
+  net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, height, width, channels});
+  net.AddRandomInput<DeviceType::OPENCL, float>("Scale", {channels});
+  net.AddRandomInput<DeviceType::OPENCL, float>("Offset", {channels});
+  net.AddRandomInput<DeviceType::OPENCL, float>("Mean", {channels});
+  net.AddRandomInput<DeviceType::OPENCL, float>("Var", {channels}, true);
+
+  // run cpu
+  net.RunOp();
+
+  // Check
+  Tensor expected;
+  expected.Copy(*net.GetOutput("Output"));
+
+
+  // Run on opencl
+  BufferToImage<DeviceType::OPENCL, half>(net, "Input", "InputImage", kernels::BufferType::IN_OUT);
+  BufferToImage<DeviceType::OPENCL, half>(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT);
+  BufferToImage<DeviceType::OPENCL, half>(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT);
+  BufferToImage<DeviceType::OPENCL, half>(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT);
+  BufferToImage<DeviceType::OPENCL, half>(net, "Var", "VarImage", kernels::BufferType::ARGUMENT);
+
+  OpDefBuilder("BatchNorm", "BatchNormTest")
+      .Input("InputImage")
+      .Input("ScaleImage")
+      .Input("OffsetImage")
+      .Input("MeanImage")
+      .Input("VarImage")
+      .AddFloatArg("epsilon", 1e-3)
+      .Output("OutputImage")
+      .AddIntArg("T", static_cast<int>(DataType::DT_HALF))
+      .Finalize(net.NewOperatorDef());
+
+  // tuning
+  setenv("MACE_TUNING", "1", 1);
+  net.RunOp(DeviceType::OPENCL);
+  unsetenv("MACE_TUNING");
+
+  // Run on opencl
+  net.RunOp(DeviceType::OPENCL);
+  net.Sync();
+
+  ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT);
+  ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5);
+}
 }