From a729014252f09d71dcdbaf7afbbd7cbc6a777a6f Mon Sep 17 00:00:00 2001 From: liuqi Date: Thu, 2 Nov 2017 18:07:09 +0800 Subject: [PATCH] Use iter sync when benchmark conv2d --- mace/ops/conv_2d_benchmark.cc | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/mace/ops/conv_2d_benchmark.cc b/mace/ops/conv_2d_benchmark.cc index d408b05b..dd4f4f7d 100644 --- a/mace/ops/conv_2d_benchmark.cc +++ b/mace/ops/conv_2d_benchmark.cc @@ -3,6 +3,7 @@ // #include +#include #include "mace/core/operator.h" #include "mace/core/testing/test_benchmark.h" @@ -13,6 +14,7 @@ namespace mace { template static void Conv2d(int iters, + int iters_to_sync, int batch, int channels, int height, @@ -30,17 +32,15 @@ static void Conv2d(int iters, .Input("Filter") .Input("Bias") .Output("Output") - .Finalize(net.operator_def()); - - // Add args - net.AddIntsArg("strides", {stride, stride}); - net.AddIntArg("padding", padding); - net.AddIntsArg("dilations", {1, 1}); + .AddIntsArg("strides", {stride, stride}) + .AddIntArg("padding", padding) + .AddIntsArg("dilations", {1, 1}) + .Finalize(net.NewOperatorDef()); // Add input data net.AddRandomInput("Input", {batch, channels, height, width}); net.AddRandomInput("Filter", - {output_channels, channels, kernel_h, kernel_w}); + {output_channels, channels, kernel_h, kernel_w}); net.AddRandomInput("Bias", {output_channels}); // Warm-up @@ -52,10 +52,17 @@ static void Conv2d(int iters, mace::testing::StartTiming(); while (iters--) { net.RunOp(D); + if (iters % iters_to_sync == 0) { + net.Sync(); + } } - net.Sync(); } +// In common network, there are usually more than 1 layers, this is used to +// approximate the amortized latency. The OpenCL runtime for Mali/Adreno is +// in-order. +constexpr int kItersToSync = 10; + #define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \ static void \ BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \ @@ -63,8 +70,8 @@ static void Conv2d(int iters, const int64_t tot = static_cast(iters) * N * C * H * W; \ mace::testing::ItemsProcessed(tot); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ - Conv2d(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, \ - OC); \ + Conv2d(iters, kItersToSync, N, C, H, W, KH, KW, STRIDE, \ + mace::Padding::P, OC); \ } \ BENCHMARK( \ BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE) -- GitLab