diff --git a/mace/kernels/batch_norm.h b/mace/kernels/batch_norm.h index f2084be73274b0a3463ee1f2c900f2c0b3a271f6..e56302e461af155ed5dbd82451e2d35dc195ed8c 100644 --- a/mace/kernels/batch_norm.h +++ b/mace/kernels/batch_norm.h @@ -13,7 +13,7 @@ namespace kernels { template struct BatchNormFunctor { - T epsilon_; + float epsilon_; void operator()(const Tensor *input, const Tensor *scale, @@ -84,7 +84,7 @@ void BatchNormFunctor::operator()( template struct BatchNormFunctor { - T epsilon_; + float epsilon_; void operator()(const Tensor *input, const Tensor *scale, diff --git a/mace/kernels/opencl/cl/batch_norm.cl b/mace/kernels/opencl/cl/batch_norm.cl index f8b2f8a271837be04516e942098d6ac7d17a0526..93d249ea25a113c0bb1437b43631e1c9e3372488 100644 --- a/mace/kernels/opencl/cl/batch_norm.cl +++ b/mace/kernels/opencl/cl/batch_norm.cl @@ -5,7 +5,7 @@ __kernel void batch_norm(__read_only image2d_t input, __read_only image2d_t offset, __read_only image2d_t mean, __read_only image2d_t var, - __private const DATA_TYPE epsilon, + __private const float epsilon, __write_only image2d_t output) { const int ch_blk = get_global_id(0); const int w = get_global_id(1); diff --git a/mace/ops/batch_norm_test.cc b/mace/ops/batch_norm_test.cc index dc807ffbbf753fb07a55bab06066b8b8eed94e2d..1093cbb9a45bde6057c9429cc038016e56073865 100644 --- a/mace/ops/batch_norm_test.cc +++ b/mace/ops/batch_norm_test.cc @@ -227,6 +227,72 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2); } +TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { + srand(time(NULL)); + + // generate random input + index_t batch = 1 + rand() % 10; + index_t channels = 3 + rand() % 50; + index_t height = 64; + index_t width = 64; + + // Construct graph + auto &net = test_net(); + OpDefBuilder("BatchNorm", "BatchNormTest") + .Input("Input") + .Input("Scale") + .Input("Offset") + .Input("Mean") + .Input("Var") + .AddFloatArg("epsilon", 1e-3) + .Output("Output") + .Finalize(net.NewOperatorDef()); + + // Add input data + net.AddRandomInput("Input", {batch, height, width, channels}); + net.AddRandomInput("Scale", {channels}); + net.AddRandomInput("Offset", {channels}); + net.AddRandomInput("Mean", {channels}); + net.AddRandomInput("Var", {channels}, true); + + // run cpu + net.RunOp(); + + // Check + Tensor expected; + expected.Copy(*net.GetOutput("Output")); + + // Run on opencl + BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); + + OpDefBuilder("BatchNorm", "BatchNormTest") + .Input("InputImage") + .Input("ScaleImage") + .Input("OffsetImage") + .Input("MeanImage") + .Input("VarImage") + .AddFloatArg("epsilon", 1e-3) + .Output("OutputImage") + .AddIntArg("T", static_cast(DataType::DT_HALF)) + .Finalize(net.NewOperatorDef()); + + // Tuning + setenv("MACE_TUNING", "1", 1); + net.RunOp(DeviceType::OPENCL); + unsetenv("MACE_TUNING"); + + // Run on opencl + net.RunOp(DeviceType::OPENCL); + net.Sync(); + + ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.5); +} + TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { srand(time(NULL)); @@ -293,4 +359,70 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2); } +TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { + srand(time(NULL)); + + // generate random input + index_t batch = 1 + rand() % 10; + index_t channels = 3 + rand() % 50; + index_t height = 103; + index_t width = 113; + + // Construct graph + auto &net = test_net(); + OpDefBuilder("BatchNorm", "BatchNormTest") + .Input("Input") + .Input("Scale") + .Input("Offset") + .Input("Mean") + .Input("Var") + .AddFloatArg("epsilon", 1e-3) + .Output("Output") + .Finalize(net.NewOperatorDef()); + + // Add input data + net.AddRandomInput("Input", {batch, height, width, channels}); + net.AddRandomInput("Scale", {channels}); + net.AddRandomInput("Offset", {channels}); + net.AddRandomInput("Mean", {channels}); + net.AddRandomInput("Var", {channels}, true); + + // run cpu + net.RunOp(); + + // Check + Tensor expected; + expected.Copy(*net.GetOutput("Output")); + + + // Run on opencl + BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); + + OpDefBuilder("BatchNorm", "BatchNormTest") + .Input("InputImage") + .Input("ScaleImage") + .Input("OffsetImage") + .Input("MeanImage") + .Input("VarImage") + .AddFloatArg("epsilon", 1e-3) + .Output("OutputImage") + .AddIntArg("T", static_cast(DataType::DT_HALF)) + .Finalize(net.NewOperatorDef()); + + // tuning + setenv("MACE_TUNING", "1", 1); + net.RunOp(DeviceType::OPENCL); + unsetenv("MACE_TUNING"); + + // Run on opencl + net.RunOp(DeviceType::OPENCL); + net.Sync(); + + ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.5); +} }