diff --git a/mace/ops/conv_2d_benchmark.cc b/mace/ops/conv_2d_benchmark.cc index 98575c819ada9747c4bb8e316051086e53c92398..d54ad7095044f2af2c268f67c19e0e443bea48f7 100644 --- a/mace/ops/conv_2d_benchmark.cc +++ b/mace/ops/conv_2d_benchmark.cc @@ -27,15 +27,15 @@ static void Conv2d(int iters, OpsTestNet net; // Add input data - net.AddRandomInput("Input", {batch, height, width, channels}); - net.AddRandomInput("Filter", + net.AddRandomInput("Input", {batch, height, width, channels}); + net.AddRandomInput("Filter", {kernel_h, kernel_w, channels, output_channels}); - net.AddRandomInput("Bias", {output_channels}); + net.AddRandomInput("Bias", {output_channels}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") @@ -55,6 +55,7 @@ static void Conv2d(int iters, .AddIntsArg("strides", {stride, stride}) .AddIntArg("padding", padding) .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); } @@ -92,15 +93,15 @@ static void Conv2d(int iters, BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL); // ICNet -BM_CONV_2D(1, 512, 15, 15, 1, 1, 1, VALID, 1024, half); +BM_CONV_2D(1, 512, 15, 15, 1, 1, 1, VALID, 1024, float); // SNPE GPU ExecutionDuration = 448us, % ALU Utilization = 105 -BM_CONV_2D(1, 64, 60, 60, 1, 1, 1, VALID, 128, half); +BM_CONV_2D(1, 64, 60, 60, 1, 1, 1, VALID, 128, float); // SNPE GPU ExecutionDuration = 258us, % ALU Utilization = 108 -BM_CONV_2D(1, 32, 60, 60, 1, 1, 1, VALID, 128, half); +BM_CONV_2D(1, 32, 60, 60, 1, 1, 1, VALID, 128, float); -BM_CONV_2D(1, 128, 60, 60, 3, 3, 1, VALID, 128, half); +BM_CONV_2D(1, 128, 60, 60, 3, 3, 1, VALID, 128, float); // SNPE GPU ExecutionDuration = 506us, % ALU Utilization = 106.8 -BM_CONV_2D(1, 32, 60, 60, 3, 3, 1, SAME, 32, half); +BM_CONV_2D(1, 32, 60, 60, 3, 3, 1, SAME, 32, float); // Test RGB <-> YUV //BM_CONV_2D(1, 3, 2160, 1080, 1, 1, 1, VALID, 3, float); diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index 1cda801747df6e57284673f00c8e5266723743dd..faaf508c5d2f576da8632c800cadfbfdd4cae389 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -634,9 +634,9 @@ static void TestHalfComplexConvNxNS12(const std::vector &shape) { // Run on device net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); - ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.2); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.2); }; for (int kernel_size : {1, 3}) {