// Copyright 2018 Xiaomi, Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include "mace/ops/conv_2d.h" #include "mace/ops/ops_test_util.h" namespace mace { namespace ops { namespace test { class Conv2dOpTest : public OpsTestBase {}; namespace { template void TestNHWCSimple3x3VALID() { OpsTestNet net; // Add input data net.AddInputFromArray( "Input", {1, 3, 3, 2}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); net.AddInputFromArray( "Filter", {3, 3, 1, 2}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); net.AddInputFromArray("Bias", {1}, {0.1f}); if (D == DeviceType::OPENCL) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", kernels::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") .Input("BiasImage") .Output("OutputImage") .AddIntsArg("strides", {1, 1}) .AddIntArg("padding", Padding::VALID) .AddIntsArg("dilations", {1, 1}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); net.RunOp(D); // Transfer output ImageToBuffer(&net, "OutputImage", "Output", kernels::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Conv2D", "Conv2dTest") .Input("Input") .Input("Filter") .Input("Bias") .Output("Output") .AddIntsArg("strides", {1, 1}) .AddIntArg("padding", Padding::VALID) .AddIntsArg("dilations", {1, 1}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); } auto expected = CreateTensor({1, 1, 1, 1}, {18.1f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } template void TestNHWCSimple3x3SAME() { OpsTestNet net; // Add input data net.AddInputFromArray( "Input", {1, 3, 3, 2}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); net.AddInputFromArray( "Filter", {3, 3, 1, 2}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); net.AddInputFromArray("Bias", {1}, {0.1f}); if (D == DeviceType::OPENCL) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", kernels::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") .Input("BiasImage") .Output("OutputImage") .AddIntsArg("strides", {1, 1}) .AddIntArg("padding", Padding::SAME) .AddIntsArg("dilations", {1, 1}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); // Transfer output ImageToBuffer(&net, "OutputImage", "Output", kernels::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Conv2D", "Conv2dTest") .Input("Input") .Input("Filter") .Input("Bias") .Output("Output") .AddIntsArg("strides", {1, 1}) .AddIntArg("padding", Padding::SAME) .AddIntsArg("dilations", {1, 1}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); } auto expected = CreateTensor( {1, 3, 3, 1}, {8.1f, 12.1f, 8.1f, 12.1f, 18.1f, 12.1f, 8.1f, 12.1f, 8.1f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } } // namespace TEST_F(Conv2dOpTest, CPUSimple) { TestNHWCSimple3x3VALID(); TestNHWCSimple3x3SAME(); } TEST_F(Conv2dOpTest, OPENCLSimple) { TestNHWCSimple3x3VALID(); TestNHWCSimple3x3SAME(); } namespace { template void TestNHWCSimple3x3WithoutBias() { OpsTestNet net; // Add input data net.AddInputFromArray( "Input", {1, 3, 3, 2}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); net.AddInputFromArray( "Filter", {3, 3, 1, 2}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); if (D == DeviceType::OPENCL) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", kernels::BufferType::CONV2D_FILTER); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") .Output("OutputImage") .AddIntsArg("strides", {1, 1}) .AddIntArg("padding", Padding::VALID) .AddIntsArg("dilations", {1, 1}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); // Transfer output ImageToBuffer(&net, "OutputImage", "Output", kernels::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Conv2D", "Conv2dTest") .Input("Input") .Input("Filter") .Output("Output") .AddIntsArg("strides", {1, 1}) .AddIntArg("padding", Padding::VALID) .AddIntsArg("dilations", {1, 1}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); } // Check auto expected = CreateTensor({1, 1, 1, 1}, {18.0f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } } // namespace TEST_F(Conv2dOpTest, CPUWithoutBias) { TestNHWCSimple3x3WithoutBias(); } TEST_F(Conv2dOpTest, OPENCLWithoutBias) { TestNHWCSimple3x3WithoutBias(); } namespace { template void TestNHWCCombined3x3() { // Construct graph OpsTestNet net; // Add input data net.AddInputFromArray( "Input", {1, 5, 5, 2}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); net.AddInputFromArray( "Filter", {3, 3, 2, 2}, {1.0f, 1.0f, 0.5f, 0.5f, 1.0f, 1.0f, 0.5f, 0.5f, 1.0f, 1.0f, 0.5f, 0.5f, 1.0f, 1.0f, 0.5f, 0.5f, 1.0f, 1.0f, 0.5f, 0.5f, 1.0f, 1.0f, 0.5f, 0.5f, 1.0f, 1.0f, 0.5f, 0.5f, 1.0f, 1.0f, 0.5f, 0.5f, 1.0f, 1.0f, 0.5f, 0.5f}); net.AddInputFromArray("Bias", {2}, {0.1f, 0.2f}); if (D == DeviceType::OPENCL) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", kernels::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2DTest") .Input("InputImage") .Input("FilterImage") .Input("BiasImage") .Output("OutputImage") .AddIntsArg("strides", {2, 2}) .AddIntArg("padding", Padding::SAME) .AddIntsArg("dilations", {1, 1}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); ImageToBuffer(&net, "OutputImage", "Output", kernels::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Conv2D", "Conv2DTest") .Input("Input") .Input("Filter") .Input("Bias") .Output("Output") .AddIntsArg("strides", {2, 2}) .AddIntArg("padding", Padding::SAME) .AddIntsArg("dilations", {1, 1}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); } // Check auto expected = CreateTensor( {1, 3, 3, 2}, {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 18.1f, 9.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } } // namespace TEST_F(Conv2dOpTest, CPUStride2) { TestNHWCCombined3x3(); } TEST_F(Conv2dOpTest, OPENCLStride2) { TestNHWCCombined3x3(); } namespace { template void TestConv1x1() { // Construct graph OpsTestNet net; // Add input data net.AddInputFromArray( "Input", {1, 3, 10, 5}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); net.AddInputFromArray( "Filter", {1, 1, 2, 5}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}); net.AddInputFromArray("Bias", {2}, {0.1f, 0.2f}); if (D == DeviceType::OPENCL) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", kernels::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2DTest") .Input("InputImage") .Input("FilterImage") .Input("BiasImage") .Output("OutputImage") .AddIntsArg("strides", {1, 1}) .AddIntArg("padding", Padding::VALID) .AddIntsArg("dilations", {1, 1}) .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); ImageToBuffer(&net, "OutputImage", "Output", kernels::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Conv2D", "Conv2DTest") .Input("Input") .Input("Filter") .Input("Bias") .Output("Output") .AddIntsArg("strides", {1, 1}) .AddIntArg("padding", Padding::VALID) .AddIntsArg("dilations", {1, 1}) .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); } // Check auto expected = CreateTensor( {1, 3, 10, 2}, {5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } } // namespace TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1(); } TEST_F(Conv2dOpTest, OPENCLConv1x1) { TestConv1x1(); } namespace { template void TestComplexConvNxNS12(const std::vector &shape, const int stride) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, Padding type) { // generate random input static unsigned int seed = time(NULL); index_t batch = 3 + (rand_r(&seed) % 10); index_t height = shape[0]; index_t width = shape[1]; index_t input_channels = shape[2] + (rand_r(&seed) % 10); index_t output_channels = shape[3] + (rand_r(&seed) % 10); // Construct graph OpsTestNet net; OpDefBuilder("Conv2D", "Conv2dTest") .Input("Input") .Input("Filter") .Input("Bias") .Output("Output") .AddIntsArg("strides", {stride_h, stride_w}) .AddIntArg("padding", type) .AddIntsArg("dilations", {1, 1}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); // Add input data net.AddRandomInput("Input", {batch, height, width, input_channels}); net.AddRandomInput( "Filter", {kernel_h, kernel_w, output_channels, input_channels}); net.AddRandomInput("Bias", {output_channels}); // run on cpu net.RunOp(); // Check Tensor expected; expected.Copy(*net.GetOutput("Output")); // run on gpu BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", kernels::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") .Input("BiasImage") .Output("OutputImage") .AddIntsArg("strides", {stride_h, stride_w}) .AddIntArg("padding", type) .AddIntsArg("dilations", {1, 1}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); // Run on device net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-4, 1e-4); }; for (int kernel_size : {1, 3, 7}) { func(kernel_size, kernel_size, stride, stride, VALID); func(kernel_size, kernel_size, stride, stride, SAME); } } } // namespace TEST_F(Conv2dOpTest, OPENCLAlignedConvNxNS12) { TestComplexConvNxNS12({32, 16, 16, 32}, 1); TestComplexConvNxNS12({32, 16, 16, 32}, 2); } TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS12) { TestComplexConvNxNS12({17, 113, 5, 7}, 1); TestComplexConvNxNS12({17, 113, 5, 7}, 2); } TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS34) { TestComplexConvNxNS12({31, 113, 13, 17}, 3); TestComplexConvNxNS12({32, 32, 13, 17}, 4); } namespace { template void TestHalfComplexConvNxNS12(const std::vector &input_shape, const std::vector &filter_shape, const std::vector &dilations) { testing::internal::LogToStderr(); srand(time(NULL)); auto func = [&](int stride_h, int stride_w, Padding padding) { // generate random input index_t batch = 3; index_t height = input_shape[0]; index_t width = input_shape[1]; index_t kernel_h = filter_shape[0]; index_t kernel_w = filter_shape[1]; index_t input_channels = filter_shape[2]; index_t output_channels = filter_shape[3]; // Construct graph OpsTestNet net; OpDefBuilder("Conv2D", "Conv2dTest") .Input("Input") .Input("Filter") .Input("Bias") .Output("Output") .AddIntsArg("strides", {stride_h, stride_w}) .AddIntArg("padding", padding) .AddIntsArg("dilations", {dilations[0], dilations[1]}) .Finalize(net.NewOperatorDef()); std::vector float_input_data; GenerateRandomRealTypeData({batch, height, width, input_channels}, &float_input_data); std::vector float_filter_data; GenerateRandomRealTypeData( {kernel_h, kernel_w, output_channels, input_channels}, &float_filter_data); std::vector float_bias_data; GenerateRandomRealTypeData({output_channels}, &float_bias_data); // Add input data net.AddInputFromArray( "Input", {batch, height, width, input_channels}, float_input_data); net.AddInputFromArray( "Filter", {kernel_h, kernel_w, output_channels, input_channels}, float_filter_data); net.AddInputFromArray("Bias", {output_channels}, float_bias_data); // run on cpu net.RunOp(); // Check Tensor expected; expected.Copy(*net.GetOutput("Output")); // run on gpu BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", kernels::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") .Input("BiasImage") .Output("OutputImage") .AddIntsArg("strides", {stride_h, stride_w}) .AddIntArg("padding", padding) .AddIntsArg("dilations", {dilations[0], dilations[1]}) .AddIntArg("T", static_cast(DataType::DT_HALF)) .Finalize(net.NewOperatorDef()); // Run on device net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2, 1e-1); }; func(1, 1, VALID); func(1, 1, SAME); if (dilations[0] == 1) { func(2, 2, VALID); func(2, 2, SAME); } } } // namespace TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x1S12) { TestHalfComplexConvNxNS12({32, 32}, {1, 1, 32, 64}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv3x3S12) { TestHalfComplexConvNxNS12({32, 32}, {3, 3, 32, 64}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv15x1S12) { TestHalfComplexConvNxNS12({32, 32}, {15, 1, 256, 2}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x15S12) { TestHalfComplexConvNxNS12({32, 32}, {1, 15, 256, 2}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv7x75S12) { TestHalfComplexConvNxNS12({32, 32}, {7, 7, 3, 64}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv1x1S12) { TestHalfComplexConvNxNS12({107, 113}, {1, 1, 5, 7}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv3x3S12) { TestHalfComplexConvNxNS12({107, 113}, {3, 3, 5, 7}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfConv5x5Dilation2) { TestHalfComplexConvNxNS12({64, 64}, {5, 5, 16, 16}, {2, 2}); } TEST_F(Conv2dOpTest, OPENCLHalfConv7x7Dilation2) { TestHalfComplexConvNxNS12({64, 64}, {7, 7, 16, 16}, {2, 2}); } TEST_F(Conv2dOpTest, OPENCLHalfConv7x7Dilation4) { TestHalfComplexConvNxNS12({63, 67}, {7, 7, 16, 16}, {4, 4}); } namespace { template void TestDilationConvNxN(const std::vector &shape, const int dilation_rate) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, Padding type) { srand(time(NULL)); // generate random input index_t batch = 1; index_t height = shape[0]; index_t width = shape[1]; index_t input_channels = shape[2]; index_t output_channels = shape[3]; // Construct graph OpsTestNet net; OpDefBuilder("Conv2D", "Conv2dTest") .Input("Input") .Input("Filter") .Input("Bias") .Output("Output") .AddIntsArg("strides", {stride_h, stride_w}) .AddIntArg("padding", type) .AddIntsArg("dilations", {dilation_rate, dilation_rate}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); // Add input data net.AddRandomInput("Input", {batch, height, width, input_channels}); net.AddRandomInput( "Filter", {kernel_h, kernel_w, output_channels, input_channels}); net.AddRandomInput("Bias", {output_channels}); // run on cpu net.RunOp(); // Check Tensor expected; expected.Copy(*net.GetOutput("Output")); // run on gpu BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", kernels::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") .Input("BiasImage") .Output("OutputImage") .AddIntsArg("strides", {stride_h, stride_w}) .AddIntArg("padding", type) .AddIntsArg("dilations", {dilation_rate, dilation_rate}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); // Run on device net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-4, 1e-4); }; for (int kernel_size : {3}) { for (int stride : {1}) { func(kernel_size, kernel_size, stride, stride, VALID); func(kernel_size, kernel_size, stride, stride, SAME); } } } } // namespace TEST_F(Conv2dOpTest, OPENCLAlignedDilation2) { TestDilationConvNxN({32, 32, 32, 64}, 2); } TEST_F(Conv2dOpTest, OPENCLAligned2Dilation4) { TestDilationConvNxN({128, 128, 16, 16}, 4); } TEST_F(Conv2dOpTest, OPENCLUnalignedDilation4) { TestDilationConvNxN({107, 113, 5, 7}, 4); } namespace { template void TestArbitraryPadConvNxN(const std::vector &shape, const std::vector &paddings) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w) { srand(time(NULL)); // generate random input index_t batch = 1; index_t height = shape[0]; index_t width = shape[1]; index_t input_channels = shape[2]; index_t output_channels = shape[3]; // Construct graph OpsTestNet net; OpDefBuilder("Conv2D", "Conv2dTest") .Input("Input") .Input("Filter") .Input("Bias") .Output("Output") .AddIntsArg("strides", {stride_h, stride_w}) .AddIntsArg("padding_values", paddings) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); // Add input data net.AddRandomInput("Input", {batch, height, width, input_channels}); net.AddRandomInput( "Filter", {kernel_h, kernel_w, output_channels, input_channels}); net.AddRandomInput("Bias", {output_channels}); // run on cpu net.RunOp(); // Check Tensor expected; expected.Copy(*net.GetOutput("Output")); // run on gpu BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", kernels::BufferType::CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") .Input("BiasImage") .Output("OutputImage") .AddIntsArg("strides", {stride_h, stride_w}) .AddIntsArg("padding_values", paddings) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); // Run on device net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-4, 1e-4); }; for (int kernel_size : {3, 5}) { for (int stride : {2, 3}) { func(kernel_size, kernel_size, stride, stride); } } } } // namespace TEST_F(Conv2dOpTest, OPENCLAlignedPad1) { TestArbitraryPadConvNxN({32, 32, 32, 64}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLAlignedPad2) { TestArbitraryPadConvNxN({128, 128, 16, 16}, {2, 2}); } TEST_F(Conv2dOpTest, OPENCLUnalignedPad4) { TestArbitraryPadConvNxN({107, 113, 5, 7}, {4, 4}); } static void TestNeonArbitraryPadConvNxN(const std::vector &shape, const std::vector &paddings) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w) { srand(time(NULL)); // generate random input index_t batch = 1; index_t height = shape[0]; index_t width = shape[1]; index_t input_channels = shape[2]; index_t output_channels = shape[3]; // Construct graph OpsTestNet net; OpDefBuilder("Conv2D", "Conv2dTestCPU") .Input("Input") .Input("Filter") .Input("Bias") .Output("Output") .AddIntsArg("strides", {stride_h, stride_w}) .AddIntsArg("padding_values", paddings) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); // Add input data net.AddRandomInput("Input", {batch, height, width, input_channels}); net.AddRandomInput( "Filter", {kernel_h, kernel_w, output_channels, input_channels}); net.AddRandomInput("Bias", {output_channels}); // run cpu net.RunOp(); // run neon OpDefBuilder("Conv2D", "Conv2dTestNEON") .Input("InputNeon") .Input("FilterNeon") .Input("Bias") .Output("OutputNeon") .AddIntsArg("strides", {stride_h, stride_w}) .AddIntsArg("padding_values", paddings) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); net.FillNHWCInputToNCHWInput("InputNeon", "Input"); net.FillHWOIInputToOIHWInput("FilterNeon", "Filter"); // Run on device net.RunOp(DeviceType::NEON); net.FillNHWCInputToNCHWInput("OutputExptected", "Output"); ExpectTensorNear(*net.GetOutput("OutputExptected"), *net.GetOutput("OutputNeon"), 1e-5, 1e-3); }; for (int kernel_size : {1, 3, 5}) { for (int stride : {1, 2}) { if (stride < kernel_size) { func(kernel_size, kernel_size, stride, stride); } } } } TEST_F(Conv2dOpTest, NEONTest) { TestNeonArbitraryPadConvNxN({32, 34, 32, 64}, {0, 0}); TestNeonArbitraryPadConvNxN({32, 32, 32, 64}, {1, 1}); TestNeonArbitraryPadConvNxN({128, 128, 16, 16}, {2, 2}); TestNeonArbitraryPadConvNxN({107, 113, 5, 7}, {4, 4}); } } // namespace test } // namespace ops } // namespace mace