diff --git a/mace/core/testing/test_benchmark.cc b/mace/core/testing/test_benchmark.cc index d7f44af8345b97a8cb2d9b6c5c3f7b71220a4134..97848c97f97869719476fcc0ca60ea31ed243e25 100644 --- a/mace/core/testing/test_benchmark.cc +++ b/mace/core/testing/test_benchmark.cc @@ -17,40 +17,16 @@ namespace mace { namespace testing { static std::vector *all_benchmarks = nullptr; -static std::string label; static int64_t bytes_processed; static int64_t macc_processed; static int64_t accum_time = 0; static int64_t start_time = 0; -Benchmark::Benchmark(const char *name, void (*fn)(int)) - : name_(name), num_args_(0), fn0_(fn) { - args_.push_back(std::make_pair(-1, -1)); +Benchmark::Benchmark(const char *name, void (*benchmark_func)(int)) + : name_(name), benchmark_func_(benchmark_func) { Register(); } -Benchmark::Benchmark(const char *name, void (*fn)(int, int)) - : name_(name), num_args_(1), fn1_(fn) { - Register(); -} - -Benchmark::Benchmark(const char *name, void (*fn)(int, int, int)) - : name_(name), num_args_(2), fn2_(fn) { - Register(); -} - -Benchmark *Benchmark::Arg(int x) { - MACE_CHECK(num_args_ == 1); - args_.push_back(std::make_pair(x, -1)); - return this; -} - -Benchmark *Benchmark::ArgPair(int x, int y) { - MACE_CHECK(num_args_ == 2); - args_.push_back(std::make_pair(x, y)); - return this; -} - // Run all benchmarks void Benchmark::Run() { Run("all"); } @@ -68,17 +44,7 @@ void Benchmark::Run(const char *pattern) { std::smatch match; for (auto b : *all_benchmarks) { if (!std::regex_match(b->name_, match, regex)) continue; - for (auto arg : b->args_) { - strcpy(name, b->name_.c_str()); - if (arg.first >= 0) { - sprintf(name, "%s/%d", name, arg.first); - if (arg.second >= 0) { - sprintf(name, "%s/%d", name, arg.second); - } - } - - width = std::max(width, strlen(name)); - } + width = std::max(width, b->name_.length()); } printf("%-*s %10s %10s %10s %10s\n", width, "Benchmark", "Time(ns)", @@ -86,25 +52,14 @@ void Benchmark::Run(const char *pattern) { printf("%s\n", std::string(width + 44, '-').c_str()); for (auto b : *all_benchmarks) { if (!std::regex_match(b->name_, match, regex)) continue; - for (auto arg : b->args_) { - strcpy(name, b->name_.c_str()); - if (arg.first >= 0) { - sprintf(name, "%s/%d", name, arg.first); - if (arg.second >= 0) { - sprintf(name, "%s/%d", name, arg.second); - } - } - - int iters; - double seconds; - b->Run(arg.first, arg.second, &iters, &seconds); - - float mbps = (bytes_processed * 1e-6) / seconds; - // MACCs or other computations - float gmaccs = (macc_processed * 1e-9) / seconds; - printf("%-*s %10.0f %10d %10.2f %10.2f\n", width, name, - seconds * 1e9 / iters, iters, mbps, gmaccs); - } + int iters; + double seconds; + b->Run(&iters, &seconds); + float mbps = (bytes_processed * 1e-6) / seconds; + // MACCs or other computations + float gmaccs = (macc_processed * 1e-9) / seconds; + printf("%-*s %10.0f %10d %10.2f %10.2f\n", width, b->name_.c_str(), + seconds * 1e9 / iters, iters, mbps, gmaccs); } } @@ -113,24 +68,16 @@ void Benchmark::Register() { all_benchmarks->push_back(this); } -void Benchmark::Run(int arg1, int arg2, int *run_count, double *run_seconds) { +void Benchmark::Run(int *run_count, double *run_seconds) { static const int64_t kMinIters = 10; static const int64_t kMaxIters = 1000000000; static const double kMinTime = 0.5; int64_t iters = kMinIters; while (true) { - accum_time = 0; - start_time = NowMicros(); bytes_processed = -1; macc_processed = -1; - label.clear(); - if (fn0_) { - (*fn0_)(iters); - } else if (fn1_) { - (*fn1_)(iters, arg1); - } else { - (*fn2_)(iters, arg1, arg2); - } + RestartTiming(); + (*benchmark_func_)(iters); StopTiming(); const double seconds = accum_time * 1e-6; if (seconds >= kMinTime || iters >= kMaxIters) { @@ -139,20 +86,21 @@ void Benchmark::Run(int arg1, int arg2, int *run_count, double *run_seconds) { return; } - // Update number of iterations. Overshoot by 40% in an attempt - // to succeed the next time. - double multiplier = 1.4 * kMinTime / std::max(seconds, 1e-9); - multiplier = std::min(10.0, multiplier); - if (multiplier <= 1.0) multiplier *= 2.0; - iters = std::max(multiplier * iters, iters + 1); - iters = std::min(iters, kMaxIters); + // Update number of iterations. + // Overshoot by 100% in an attempt to succeed the next time. + double multiplier = 2.0 * kMinTime / std::max(seconds, 1e-9); + iters = std::min(multiplier * iters, kMaxIters); } } void BytesProcessed(int64_t n) { bytes_processed = n; } void MaccProcessed(int64_t n) { macc_processed = n; } +void RestartTiming() { + accum_time = 0; + start_time = NowMicros(); +} void StartTiming() { - if (start_time == 0) start_time = NowMicros(); + start_time = NowMicros(); } void StopTiming() { if (start_time != 0) { diff --git a/mace/core/testing/test_benchmark.h b/mace/core/testing/test_benchmark.h index 7ecd3ea820b450579b839dd31f3a8d42bb6b7d48..790305932d3e79264d581dc1869fc6c32bdf4c40 100644 --- a/mace/core/testing/test_benchmark.h +++ b/mace/core/testing/test_benchmark.h @@ -20,30 +20,22 @@ namespace testing { class Benchmark { public: - Benchmark(const char *name, void (*fn)(int)); - Benchmark(const char *name, void (*fn)(int, int)); - Benchmark(const char *name, void (*fn)(int, int, int)); - Benchmark *Arg(int x); - Benchmark *ArgPair(int x, int y); + Benchmark(const char *name, void (*benchmark_func)(int)); static void Run(); static void Run(const char *pattern); private: std::string name_; - int num_args_; - std::vector> args_; - void (*fn0_)(int) = nullptr; - void (*fn1_)(int, int) = nullptr; - void (*fn2_)(int, int, int) = nullptr; + void (*benchmark_func_)(int iters) = nullptr; void Register(); - void Run(int arg1, int arg2, int *run_count, double *run_seconds); + void Run(int *run_count, double *run_seconds); }; -void RunBenchmarks(); void BytesProcessed(int64_t); void MaccProcessed(int64_t); +void RestartTiming(); void StartTiming(); void StopTiming(); diff --git a/mace/ops/concat_benchmark.cc b/mace/ops/concat_benchmark.cc index bcdfcf19f85b278079e57ac0db5b8e7557c5afc4..849a91bc34744db25e5ef384cd4a68962d384634 100644 --- a/mace/ops/concat_benchmark.cc +++ b/mace/ops/concat_benchmark.cc @@ -37,15 +37,17 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) { } } -static void BM_CONCAT_Dim0Float(int iters, int dim1) { - ConcatHelper(iters, 0, dim1); -} - -static void BM_CONCAT_Dim1Float(int iters, int dim1) { - ConcatHelper(iters, 1, dim1); -} -BENCHMARK(BM_CONCAT_Dim0Float)->Arg(1000)->Arg(100000); -BENCHMARK(BM_CONCAT_Dim1Float)->Arg(1000)->Arg(100000); +#define BM_CONCAT_CPU_MACRO(DIM0, DIM1) \ + static void BM_CONCAT_CPU_##DIM0##_##DIM1( \ + int iters) { \ + ConcatHelper(iters, DIM0, DIM1); \ + } \ + BENCHMARK(BM_CONCAT_CPU_##DIM0##_##DIM1) + +BM_CONCAT_CPU_MACRO(0, 1000); +BM_CONCAT_CPU_MACRO(0, 100000); +BM_CONCAT_CPU_MACRO(1, 1000); +BM_CONCAT_CPU_MACRO(1, 100000); template static void OpenclConcatHelper(int iters, @@ -88,17 +90,23 @@ static void OpenclConcatHelper(int iters, } } -static void BM_CONCATOPENCLFloat(int iters, int dim1) { - std::vector shape = {3, 32, 32, dim1}; - OpenclConcatHelper(iters, shape, shape, 3); -} -static void BM_CONCATOPENCLHalf(int iters, int dim1) { - std::vector shape = {3, 32, 32, dim1}; - OpenclConcatHelper(iters, shape, shape, 3); -} +#define BM_CONCAT_OPENCL_MACRO(N, C, H, W, TYPE) \ + static void BM_CONCAT_OPENCL_##N##_##C##_##H##_##W##_##TYPE( \ + int iters) { \ + std::vector shape = {N, H, W, C}; \ + OpenclConcatHelper(iters, shape, shape, 3); \ + } \ + BENCHMARK(BM_CONCAT_OPENCL_##N##_##C##_##H##_##W##_##TYPE) + +BM_CONCAT_OPENCL_MACRO(3, 32, 32, 32, float); +BM_CONCAT_OPENCL_MACRO(3, 32, 32, 64, float); +BM_CONCAT_OPENCL_MACRO(3, 32, 32, 128, float); +BM_CONCAT_OPENCL_MACRO(3, 32, 32, 256, float); -BENCHMARK(BM_CONCATOPENCLFloat)->Arg(32)->Arg(64)->Arg(128)->Arg(256); -BENCHMARK(BM_CONCATOPENCLHalf)->Arg(32)->Arg(64)->Arg(128)->Arg(256); +BM_CONCAT_OPENCL_MACRO(3, 32, 32, 32, half); +BM_CONCAT_OPENCL_MACRO(3, 32, 32, 64, half); +BM_CONCAT_OPENCL_MACRO(3, 32, 32, 128, half); +BM_CONCAT_OPENCL_MACRO(3, 32, 32, 256, half); -} // namespace mace \ No newline at end of file +} // namespace mace