提交 e34d6183 编写于 作者: L liuqi

Fix BUILD warning and conv test precision.

上级 b0bfe7f6
...@@ -193,9 +193,11 @@ MaceStatus MaceEngine::Impl::Run( ...@@ -193,9 +193,11 @@ MaceStatus MaceEngine::Impl::Run(
input_tensors.push_back(input_tensor); input_tensors.push_back(input_tensor);
} }
for (auto &output : *outputs) { for (auto &output : *outputs) {
MACE_CHECK(output.second.shape().size() == 4, if (device_type_ == DeviceType::OPENCL) {
"The outputs' shape must be 4-dimension with NHWC format," MACE_CHECK(output.second.shape().size() == 4,
" please use 1 to fill missing dimensions"); "The outputs' shape must be 4-dimension with NHWC format,"
" please use 1 to fill missing dimensions");
}
Tensor *output_tensor = Tensor *output_tensor =
ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0")); ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0"));
output_tensors.push_back(output_tensor); output_tensors.push_back(output_tensor);
......
...@@ -28,9 +28,12 @@ cc_library( ...@@ -28,9 +28,12 @@ cc_library(
"opencl/*.h", "opencl/*.h",
"arm/*.h", "arm/*.h",
]), ]),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ copts = if_openmp_enabled(["-fopenmp"]) +
"-DMACE_ENABLE_OPENCL", if_neon_enabled(["-DMACE_ENABLE_NEON"]) +
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), if_android_armv7(["-mfpu=neon"]) +
if_android_armv7(["-mfloat-abi=softfp"]) +
if_android(["-DMACE_ENABLE_OPENCL"]) +
if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = if_android(["-lm"]), linkopts = if_android(["-lm"]),
deps = [ deps = [
"//mace/core", "//mace/core",
...@@ -48,9 +51,12 @@ cc_test( ...@@ -48,9 +51,12 @@ cc_test(
"opencl/*_test.cc", "opencl/*_test.cc",
], ],
), ),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ copts = if_openmp_enabled(["-fopenmp"]) +
"-DMACE_ENABLE_OPENCL", if_neon_enabled(["-DMACE_ENABLE_NEON"]) +
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), if_android_armv7(["-mfpu=neon"]) +
if_android_armv7(["-mfloat-abi=softfp"]) +
if_android(["-DMACE_ENABLE_OPENCL"]) +
if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = ["-fopenmp"], linkopts = ["-fopenmp"],
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
......
...@@ -34,9 +34,12 @@ cc_library( ...@@ -34,9 +34,12 @@ cc_library(
["*.h"], ["*.h"],
exclude = ["ops_test_util.h"], exclude = ["ops_test_util.h"],
), ),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ copts = if_openmp_enabled(["-fopenmp"]) +
"-DMACE_ENABLE_OPENCL", if_neon_enabled(["-DMACE_ENABLE_NEON"]) +
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), if_android_armv7(["-mfpu=neon"]) +
if_android_armv7(["-mfloat-abi=softfp"]) +
if_android(["-DMACE_ENABLE_OPENCL"]) +
if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
deps = [ deps = [
"//mace/kernels", "//mace/kernels",
], ],
...@@ -49,9 +52,12 @@ cc_test( ...@@ -49,9 +52,12 @@ cc_test(
srcs = glob( srcs = glob(
["*_test.cc"], ["*_test.cc"],
), ),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ copts = if_openmp_enabled(["-fopenmp"]) +
"-DMACE_ENABLE_OPENCL", if_neon_enabled(["-DMACE_ENABLE_NEON"]) +
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), if_android_armv7(["-mfpu=neon"]) +
if_android_armv7(["-mfloat-abi=softfp"]) +
if_android(["-DMACE_ENABLE_OPENCL"]) +
if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = ["-fopenmp"], linkopts = ["-fopenmp"],
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
...@@ -65,9 +71,12 @@ cc_test( ...@@ -65,9 +71,12 @@ cc_test(
name = "ops_benchmark", name = "ops_benchmark",
testonly = 1, testonly = 1,
srcs = glob(["*_benchmark.cc"]), srcs = glob(["*_benchmark.cc"]),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ copts = if_openmp_enabled(["-fopenmp"]) +
"-DMACE_ENABLE_OPENCL", if_neon_enabled(["-DMACE_ENABLE_NEON"]) +
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), if_android_armv7(["-mfpu=neon"]) +
if_android_armv7(["-mfloat-abi=softfp"]) +
if_android(["-DMACE_ENABLE_OPENCL"]) +
if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = ["-fopenmp"], linkopts = ["-fopenmp"],
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
......
...@@ -375,90 +375,92 @@ TEST_F(FusedConv2dOpTest, OPENCLUnalignedConvNxNS12) { ...@@ -375,90 +375,92 @@ TEST_F(FusedConv2dOpTest, OPENCLUnalignedConvNxNS12) {
namespace { namespace {
template<DeviceType D> template<DeviceType D>
void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) { void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape,
const int kernel, const int stride,
Padding type) {
testing::internal::LogToStderr(); testing::internal::LogToStderr();
auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, // generate random input
Padding type) { srand(time(NULL));
// generate random input index_t batch = 3;
static unsigned int seed = time(NULL); index_t height = shape[0];
index_t batch = 3 + (rand_r(&seed) % 10); index_t width = shape[1];
index_t height = shape[0]; index_t input_channels = shape[2];
index_t width = shape[1]; index_t output_channels = shape[3];
index_t input_channels = shape[2] + (rand_r(&seed) % 10); // Construct graph
index_t output_channels = shape[3] + (rand_r(&seed) % 10); OpsTestNet net;
// Construct graph OpDefBuilder("FusedConv2D", "FusedConv2dTest")
OpsTestNet net;
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("Input") .Input("Input")
.Input("Filter") .Input("Filter")
.Input("Bias") .Input("Bias")
.Output("Output") .Output("Output")
.AddIntsArg("strides", {stride_h, stride_w}) .AddIntsArg("strides", {stride, stride})
.AddIntArg("padding", type) .AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1}) .AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
std::vector<float> float_input_data; std::vector<float> float_input_data;
GenerateRandomRealTypeData({batch, height, width, input_channels}, GenerateRandomRealTypeData({batch, height, width, input_channels},
&float_input_data); &float_input_data);
std::vector<float> float_filter_data; std::vector<float> float_filter_data;
GenerateRandomRealTypeData( GenerateRandomRealTypeData(
{kernel_h, kernel_w, output_channels, input_channels}, {kernel, kernel, output_channels, input_channels},
&float_filter_data); &float_filter_data);
std::vector<float> float_bias_data; std::vector<float> float_bias_data;
GenerateRandomRealTypeData({output_channels}, &float_bias_data); GenerateRandomRealTypeData({output_channels}, &float_bias_data);
// Add input data // Add input data
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
"Input", {batch, height, width, input_channels}, float_input_data); "Input", {batch, height, width, input_channels}, float_input_data);
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
"Filter", {kernel_h, kernel_w, output_channels, input_channels}, "Filter", {kernel, kernel, output_channels, input_channels},
float_filter_data); float_filter_data);
net.AddInputFromArray<D, float>("Bias", {output_channels}, float_bias_data); net.AddInputFromArray<D, float>("Bias", {output_channels}, float_bias_data);
// run on cpu
net.RunOp();
// Check
Tensor expected;
expected.Copy(*net.GetOutput("Output"));
// run on gpu // run on cpu
BufferToImage<D, half>(&net, "Input", "InputImage", net.RunOp();
kernels::BufferType::IN_OUT_CHANNEL); // Check
BufferToImage<D, half>(&net, "Filter", "FilterImage", Tensor expected;
kernels::BufferType::CONV2D_FILTER); expected.Copy(*net.GetOutput("Output"));
BufferToImage<D, half>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); // run on gpu
BufferToImage<D, half>(&net, "Input", "InputImage",
OpDefBuilder("FusedConv2D", "FusedConv2dTest") kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, half>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, half>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
.Input("BiasImage") .Input("BiasImage")
.Output("OutputImage") .Output("OutputImage")
.AddIntsArg("strides", {stride_h, stride_w}) .AddIntsArg("strides", {stride, stride})
.AddIntArg("padding", type) .AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1}) .AddIntsArg("dilations", {1, 1})
.AddIntArg("T", static_cast<int>(DataType::DT_HALF)) .AddIntArg("T", static_cast<int>(DataType::DT_HALF))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"),
1e-2, 1e-1);
};
for (int kernel_size : {1, 3}) { ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"),
for (int stride : {1, 2}) { 1e-2, 1e-1);
func(kernel_size, kernel_size, stride, stride, VALID);
}
}
} }
} // namespace } // namespace
TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConvNxNS12) { TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConv1x1S12) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32, 32, 64}); TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32, 32, 64}, 1, 1, VALID);
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({31, 37, 31, 37}, 1, 1, SAME);
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32, 32, 64}, 1, 2, VALID);
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({31, 37, 31, 37}, 1, 2, SAME);
}
TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConv3x3S12) {
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32, 32, 64}, 3, 1, VALID);
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({31, 37, 31, 37}, 3, 1, SAME);
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({32, 32, 32, 64}, 3, 2, VALID);
TestHalfComplexConvNxNS12<DeviceType::OPENCL>({31, 37, 31, 37}, 3, 2, SAME);
} }
namespace { namespace {
......
...@@ -15,8 +15,9 @@ cc_test( ...@@ -15,8 +15,9 @@ cc_test(
srcs = ["mace_api_test.cc"], srcs = ["mace_api_test.cc"],
copts = if_openmp_enabled(["-fopenmp"]) + copts = if_openmp_enabled(["-fopenmp"]) +
if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) +
if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android_armv7(["-mfpu=neon"]) +
if_android(["-DMACE_ENABLE_OPENCL",]) + if_android_armv7(["-mfloat-abi=softfp"]) +
if_android(["-DMACE_ENABLE_OPENCL"]) +
if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = ["-fopenmp"], linkopts = ["-fopenmp"],
linkstatic = 1, linkstatic = 1,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册