提交 a7290142 编写于 作者: L liuqi

Use iter sync when benchmark conv2d

上级 52cc0540
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
// //
#include <algorithm> #include <algorithm>
#include <sstream>
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
...@@ -13,6 +14,7 @@ namespace mace { ...@@ -13,6 +14,7 @@ namespace mace {
template <DeviceType D, typename T> template <DeviceType D, typename T>
static void Conv2d(int iters, static void Conv2d(int iters,
int iters_to_sync,
int batch, int batch,
int channels, int channels,
int height, int height,
...@@ -30,17 +32,15 @@ static void Conv2d(int iters, ...@@ -30,17 +32,15 @@ static void Conv2d(int iters,
.Input("Filter") .Input("Filter")
.Input("Bias") .Input("Bias")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntsArg("strides", {stride, stride})
.AddIntArg("padding", padding)
// Add args .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {stride, stride}); .Finalize(net.NewOperatorDef());
net.AddIntArg("padding", padding);
net.AddIntsArg("dilations", {1, 1});
// Add input data // Add input data
net.AddRandomInput<D, float>("Input", {batch, channels, height, width}); net.AddRandomInput<D, float>("Input", {batch, channels, height, width});
net.AddRandomInput<D, float>("Filter", net.AddRandomInput<D, float>("Filter",
{output_channels, channels, kernel_h, kernel_w}); {output_channels, channels, kernel_h, kernel_w});
net.AddRandomInput<D, float>("Bias", {output_channels}); net.AddRandomInput<D, float>("Bias", {output_channels});
// Warm-up // Warm-up
...@@ -52,10 +52,17 @@ static void Conv2d(int iters, ...@@ -52,10 +52,17 @@ static void Conv2d(int iters,
mace::testing::StartTiming(); mace::testing::StartTiming();
while (iters--) { while (iters--) {
net.RunOp(D); net.RunOp(D);
if (iters % iters_to_sync == 0) {
net.Sync();
}
} }
net.Sync();
} }
// In common network, there are usually more than 1 layers, this is used to
// approximate the amortized latency. The OpenCL runtime for Mali/Adreno is
// in-order.
constexpr int kItersToSync = 10;
#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \ #define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \
static void \ static void \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \ BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \
...@@ -63,8 +70,8 @@ static void Conv2d(int iters, ...@@ -63,8 +70,8 @@ static void Conv2d(int iters,
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \ const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \ mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, \ Conv2d<DEVICE, TYPE>(iters, kItersToSync, N, C, H, W, KH, KW, STRIDE, \
OC); \ mace::Padding::P, OC); \
} \ } \
BENCHMARK( \ BENCHMARK( \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE) BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册