From e34d6183586b3d1da2b5dd5d410005087036bf84 Mon Sep 17 00:00:00 2001 From: liuqi Date: Fri, 20 Apr 2018 09:38:49 +0800 Subject: [PATCH] Fix BUILD warning and conv test precision. --- mace/core/mace.cc | 8 ++- mace/kernels/BUILD | 18 +++-- mace/ops/BUILD | 27 +++++--- mace/ops/fused_conv_2d_test.cc | 118 +++++++++++++++++---------------- mace/test/BUILD | 5 +- 5 files changed, 98 insertions(+), 78 deletions(-) diff --git a/mace/core/mace.cc b/mace/core/mace.cc index ca926aa5..04f66bac 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -193,9 +193,11 @@ MaceStatus MaceEngine::Impl::Run( input_tensors.push_back(input_tensor); } for (auto &output : *outputs) { - MACE_CHECK(output.second.shape().size() == 4, - "The outputs' shape must be 4-dimension with NHWC format," - " please use 1 to fill missing dimensions"); + if (device_type_ == DeviceType::OPENCL) { + MACE_CHECK(output.second.shape().size() == 4, + "The outputs' shape must be 4-dimension with NHWC format," + " please use 1 to fill missing dimensions"); + } Tensor *output_tensor = ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0")); output_tensors.push_back(output_tensor); diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD index 4eb4b850..50ab5c95 100644 --- a/mace/kernels/BUILD +++ b/mace/kernels/BUILD @@ -28,9 +28,12 @@ cc_library( "opencl/*.h", "arm/*.h", ]), - copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ - "-DMACE_ENABLE_OPENCL", - ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), + copts = if_openmp_enabled(["-fopenmp"]) + + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + + if_android_armv7(["-mfpu=neon"]) + + if_android_armv7(["-mfloat-abi=softfp"]) + + if_android(["-DMACE_ENABLE_OPENCL"]) + + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), linkopts = if_android(["-lm"]), deps = [ "//mace/core", @@ -48,9 +51,12 @@ cc_test( "opencl/*_test.cc", ], ), - copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ - "-DMACE_ENABLE_OPENCL", - ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), + copts = if_openmp_enabled(["-fopenmp"]) + + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + + if_android_armv7(["-mfpu=neon"]) + + if_android_armv7(["-mfloat-abi=softfp"]) + + if_android(["-DMACE_ENABLE_OPENCL"]) + + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), linkopts = ["-fopenmp"], linkstatic = 1, deps = [ diff --git a/mace/ops/BUILD b/mace/ops/BUILD index 131beceb..ba39f5af 100644 --- a/mace/ops/BUILD +++ b/mace/ops/BUILD @@ -34,9 +34,12 @@ cc_library( ["*.h"], exclude = ["ops_test_util.h"], ), - copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ - "-DMACE_ENABLE_OPENCL", - ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), + copts = if_openmp_enabled(["-fopenmp"]) + + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + + if_android_armv7(["-mfpu=neon"]) + + if_android_armv7(["-mfloat-abi=softfp"]) + + if_android(["-DMACE_ENABLE_OPENCL"]) + + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), deps = [ "//mace/kernels", ], @@ -49,9 +52,12 @@ cc_test( srcs = glob( ["*_test.cc"], ), - copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ - "-DMACE_ENABLE_OPENCL", - ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), + copts = if_openmp_enabled(["-fopenmp"]) + + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + + if_android_armv7(["-mfpu=neon"]) + + if_android_armv7(["-mfloat-abi=softfp"]) + + if_android(["-DMACE_ENABLE_OPENCL"]) + + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), linkopts = ["-fopenmp"], linkstatic = 1, deps = [ @@ -65,9 +71,12 @@ cc_test( name = "ops_benchmark", testonly = 1, srcs = glob(["*_benchmark.cc"]), - copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ - "-DMACE_ENABLE_OPENCL", - ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), + copts = if_openmp_enabled(["-fopenmp"]) + + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + + if_android_armv7(["-mfpu=neon"]) + + if_android_armv7(["-mfloat-abi=softfp"]) + + if_android(["-DMACE_ENABLE_OPENCL"]) + + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), linkopts = ["-fopenmp"], linkstatic = 1, deps = [ diff --git a/mace/ops/fused_conv_2d_test.cc b/mace/ops/fused_conv_2d_test.cc index d02953b2..afe889be 100644 --- a/mace/ops/fused_conv_2d_test.cc +++ b/mace/ops/fused_conv_2d_test.cc @@ -375,90 +375,92 @@ TEST_F(FusedConv2dOpTest, OPENCLUnalignedConvNxNS12) { namespace { template -void TestHalfComplexConvNxNS12(const std::vector &shape) { +void TestHalfComplexConvNxNS12(const std::vector &shape, + const int kernel, const int stride, + Padding type) { testing::internal::LogToStderr(); - auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, - Padding type) { - // generate random input - static unsigned int seed = time(NULL); - index_t batch = 3 + (rand_r(&seed) % 10); - index_t height = shape[0]; - index_t width = shape[1]; - index_t input_channels = shape[2] + (rand_r(&seed) % 10); - index_t output_channels = shape[3] + (rand_r(&seed) % 10); - // Construct graph - OpsTestNet net; - OpDefBuilder("FusedConv2D", "FusedConv2dTest") + // generate random input + srand(time(NULL)); + index_t batch = 3; + index_t height = shape[0]; + index_t width = shape[1]; + index_t input_channels = shape[2]; + index_t output_channels = shape[3]; + // Construct graph + OpsTestNet net; + OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("Input") .Input("Filter") .Input("Bias") .Output("Output") - .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntsArg("strides", {stride, stride}) .AddIntArg("padding", type) .AddIntsArg("dilations", {1, 1}) .Finalize(net.NewOperatorDef()); - std::vector float_input_data; - GenerateRandomRealTypeData({batch, height, width, input_channels}, - &float_input_data); - std::vector float_filter_data; - GenerateRandomRealTypeData( - {kernel_h, kernel_w, output_channels, input_channels}, + std::vector float_input_data; + GenerateRandomRealTypeData({batch, height, width, input_channels}, + &float_input_data); + std::vector float_filter_data; + GenerateRandomRealTypeData( + {kernel, kernel, output_channels, input_channels}, &float_filter_data); - std::vector float_bias_data; - GenerateRandomRealTypeData({output_channels}, &float_bias_data); - // Add input data - net.AddInputFromArray( + std::vector float_bias_data; + GenerateRandomRealTypeData({output_channels}, &float_bias_data); + // Add input data + net.AddInputFromArray( "Input", {batch, height, width, input_channels}, float_input_data); - net.AddInputFromArray( - "Filter", {kernel_h, kernel_w, output_channels, input_channels}, + net.AddInputFromArray( + "Filter", {kernel, kernel, output_channels, input_channels}, float_filter_data); - net.AddInputFromArray("Bias", {output_channels}, float_bias_data); - - // run on cpu - net.RunOp(); - // Check - Tensor expected; - expected.Copy(*net.GetOutput("Output")); + net.AddInputFromArray("Bias", {output_channels}, float_bias_data); - // run on gpu - BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); - BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::CONV2D_FILTER); - BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); - - OpDefBuilder("FusedConv2D", "FusedConv2dTest") + // run on cpu + net.RunOp(); + // Check + Tensor expected; + expected.Copy(*net.GetOutput("Output")); + + // run on gpu + BufferToImage(&net, "Input", "InputImage", + kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(&net, "Filter", "FilterImage", + kernels::BufferType::CONV2D_FILTER); + BufferToImage(&net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); + + OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") .Input("FilterImage") .Input("BiasImage") .Output("OutputImage") - .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntsArg("strides", {stride, stride}) .AddIntArg("padding", type) .AddIntsArg("dilations", {1, 1}) .AddIntArg("T", static_cast(DataType::DT_HALF)) .Finalize(net.NewOperatorDef()); - // Run on device - net.RunOp(D); + // Run on device + net.RunOp(D); - ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); - - ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), - 1e-2, 1e-1); - }; + ImageToBuffer(&net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT_CHANNEL); - for (int kernel_size : {1, 3}) { - for (int stride : {1, 2}) { - func(kernel_size, kernel_size, stride, stride, VALID); - } - } + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), + 1e-2, 1e-1); } } // namespace -TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConvNxNS12) { - TestHalfComplexConvNxNS12({32, 32, 32, 64}); +TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConv1x1S12) { + TestHalfComplexConvNxNS12({32, 32, 32, 64}, 1, 1, VALID); + TestHalfComplexConvNxNS12({31, 37, 31, 37}, 1, 1, SAME); + TestHalfComplexConvNxNS12({32, 32, 32, 64}, 1, 2, VALID); + TestHalfComplexConvNxNS12({31, 37, 31, 37}, 1, 2, SAME); +} +TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConv3x3S12) { + TestHalfComplexConvNxNS12({32, 32, 32, 64}, 3, 1, VALID); + TestHalfComplexConvNxNS12({31, 37, 31, 37}, 3, 1, SAME); + TestHalfComplexConvNxNS12({32, 32, 32, 64}, 3, 2, VALID); + TestHalfComplexConvNxNS12({31, 37, 31, 37}, 3, 2, SAME); } namespace { diff --git a/mace/test/BUILD b/mace/test/BUILD index e23a1700..f3345cfa 100644 --- a/mace/test/BUILD +++ b/mace/test/BUILD @@ -15,8 +15,9 @@ cc_test( srcs = ["mace_api_test.cc"], copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + - if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + - if_android(["-DMACE_ENABLE_OPENCL",]) + + if_android_armv7(["-mfpu=neon"]) + + if_android_armv7(["-mfloat-abi=softfp"]) + + if_android(["-DMACE_ENABLE_OPENCL"]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), linkopts = ["-fopenmp"], linkstatic = 1, -- GitLab