提交 3ba98c78 编写于 作者: L Liangliang He

Merge branch 'feature_wuch' into 'master'

modify benchmark

See merge request !254
......@@ -17,40 +17,16 @@ namespace mace {
namespace testing {
static std::vector<Benchmark *> *all_benchmarks = nullptr;
static std::string label;
static int64_t bytes_processed;
static int64_t macc_processed;
static int64_t accum_time = 0;
static int64_t start_time = 0;
Benchmark::Benchmark(const char *name, void (*fn)(int))
: name_(name), num_args_(0), fn0_(fn) {
args_.push_back(std::make_pair(-1, -1));
Benchmark::Benchmark(const char *name, void (*benchmark_func)(int))
: name_(name), benchmark_func_(benchmark_func) {
Register();
}
Benchmark::Benchmark(const char *name, void (*fn)(int, int))
: name_(name), num_args_(1), fn1_(fn) {
Register();
}
Benchmark::Benchmark(const char *name, void (*fn)(int, int, int))
: name_(name), num_args_(2), fn2_(fn) {
Register();
}
Benchmark *Benchmark::Arg(int x) {
MACE_CHECK(num_args_ == 1);
args_.push_back(std::make_pair(x, -1));
return this;
}
Benchmark *Benchmark::ArgPair(int x, int y) {
MACE_CHECK(num_args_ == 2);
args_.push_back(std::make_pair(x, y));
return this;
}
// Run all benchmarks
void Benchmark::Run() { Run("all"); }
......@@ -68,17 +44,7 @@ void Benchmark::Run(const char *pattern) {
std::smatch match;
for (auto b : *all_benchmarks) {
if (!std::regex_match(b->name_, match, regex)) continue;
for (auto arg : b->args_) {
strcpy(name, b->name_.c_str());
if (arg.first >= 0) {
sprintf(name, "%s/%d", name, arg.first);
if (arg.second >= 0) {
sprintf(name, "%s/%d", name, arg.second);
}
}
width = std::max<int>(width, strlen(name));
}
width = std::max<int>(width, b->name_.length());
}
printf("%-*s %10s %10s %10s %10s\n", width, "Benchmark", "Time(ns)",
......@@ -86,25 +52,14 @@ void Benchmark::Run(const char *pattern) {
printf("%s\n", std::string(width + 44, '-').c_str());
for (auto b : *all_benchmarks) {
if (!std::regex_match(b->name_, match, regex)) continue;
for (auto arg : b->args_) {
strcpy(name, b->name_.c_str());
if (arg.first >= 0) {
sprintf(name, "%s/%d", name, arg.first);
if (arg.second >= 0) {
sprintf(name, "%s/%d", name, arg.second);
}
}
int iters;
double seconds;
b->Run(arg.first, arg.second, &iters, &seconds);
float mbps = (bytes_processed * 1e-6) / seconds;
// MACCs or other computations
float gmaccs = (macc_processed * 1e-9) / seconds;
printf("%-*s %10.0f %10d %10.2f %10.2f\n", width, name,
seconds * 1e9 / iters, iters, mbps, gmaccs);
}
int iters;
double seconds;
b->Run(&iters, &seconds);
float mbps = (bytes_processed * 1e-6) / seconds;
// MACCs or other computations
float gmaccs = (macc_processed * 1e-9) / seconds;
printf("%-*s %10.0f %10d %10.2f %10.2f\n", width, b->name_.c_str(),
seconds * 1e9 / iters, iters, mbps, gmaccs);
}
}
......@@ -113,24 +68,16 @@ void Benchmark::Register() {
all_benchmarks->push_back(this);
}
void Benchmark::Run(int arg1, int arg2, int *run_count, double *run_seconds) {
void Benchmark::Run(int *run_count, double *run_seconds) {
static const int64_t kMinIters = 10;
static const int64_t kMaxIters = 1000000000;
static const double kMinTime = 0.5;
int64_t iters = kMinIters;
while (true) {
accum_time = 0;
start_time = NowMicros();
bytes_processed = -1;
macc_processed = -1;
label.clear();
if (fn0_) {
(*fn0_)(iters);
} else if (fn1_) {
(*fn1_)(iters, arg1);
} else {
(*fn2_)(iters, arg1, arg2);
}
RestartTiming();
(*benchmark_func_)(iters);
StopTiming();
const double seconds = accum_time * 1e-6;
if (seconds >= kMinTime || iters >= kMaxIters) {
......@@ -139,20 +86,21 @@ void Benchmark::Run(int arg1, int arg2, int *run_count, double *run_seconds) {
return;
}
// Update number of iterations. Overshoot by 40% in an attempt
// to succeed the next time.
double multiplier = 1.4 * kMinTime / std::max(seconds, 1e-9);
multiplier = std::min(10.0, multiplier);
if (multiplier <= 1.0) multiplier *= 2.0;
iters = std::max<int64_t>(multiplier * iters, iters + 1);
iters = std::min(iters, kMaxIters);
// Update number of iterations.
// Overshoot by 100% in an attempt to succeed the next time.
double multiplier = 2.0 * kMinTime / std::max(seconds, 1e-9);
iters = std::min<int64_t>(multiplier * iters, kMaxIters);
}
}
void BytesProcessed(int64_t n) { bytes_processed = n; }
void MaccProcessed(int64_t n) { macc_processed = n; }
void RestartTiming() {
accum_time = 0;
start_time = NowMicros();
}
void StartTiming() {
if (start_time == 0) start_time = NowMicros();
start_time = NowMicros();
}
void StopTiming() {
if (start_time != 0) {
......
......@@ -20,30 +20,22 @@ namespace testing {
class Benchmark {
public:
Benchmark(const char *name, void (*fn)(int));
Benchmark(const char *name, void (*fn)(int, int));
Benchmark(const char *name, void (*fn)(int, int, int));
Benchmark *Arg(int x);
Benchmark *ArgPair(int x, int y);
Benchmark(const char *name, void (*benchmark_func)(int));
static void Run();
static void Run(const char *pattern);
private:
std::string name_;
int num_args_;
std::vector<std::pair<int, int>> args_;
void (*fn0_)(int) = nullptr;
void (*fn1_)(int, int) = nullptr;
void (*fn2_)(int, int, int) = nullptr;
void (*benchmark_func_)(int iters) = nullptr;
void Register();
void Run(int arg1, int arg2, int *run_count, double *run_seconds);
void Run(int *run_count, double *run_seconds);
};
void RunBenchmarks();
void BytesProcessed(int64_t);
void MaccProcessed(int64_t);
void RestartTiming();
void StartTiming();
void StopTiming();
......
......@@ -37,15 +37,17 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) {
}
}
static void BM_CONCAT_Dim0Float(int iters, int dim1) {
ConcatHelper<DeviceType::CPU, float>(iters, 0, dim1);
}
static void BM_CONCAT_Dim1Float(int iters, int dim1) {
ConcatHelper<DeviceType::CPU, float>(iters, 1, dim1);
}
BENCHMARK(BM_CONCAT_Dim0Float)->Arg(1000)->Arg(100000);
BENCHMARK(BM_CONCAT_Dim1Float)->Arg(1000)->Arg(100000);
#define BM_CONCAT_CPU_MACRO(DIM0, DIM1) \
static void BM_CONCAT_CPU_##DIM0##_##DIM1( \
int iters) { \
ConcatHelper<DeviceType::CPU, float>(iters, DIM0, DIM1); \
} \
BENCHMARK(BM_CONCAT_CPU_##DIM0##_##DIM1)
BM_CONCAT_CPU_MACRO(0, 1000);
BM_CONCAT_CPU_MACRO(0, 100000);
BM_CONCAT_CPU_MACRO(1, 1000);
BM_CONCAT_CPU_MACRO(1, 100000);
template <typename T>
static void OpenclConcatHelper(int iters,
......@@ -88,17 +90,23 @@ static void OpenclConcatHelper(int iters,
}
}
static void BM_CONCATOPENCLFloat(int iters, int dim1) {
std::vector<index_t> shape = {3, 32, 32, dim1};
OpenclConcatHelper<float>(iters, shape, shape, 3);
}
static void BM_CONCATOPENCLHalf(int iters, int dim1) {
std::vector<index_t> shape = {3, 32, 32, dim1};
OpenclConcatHelper<half>(iters, shape, shape, 3);
}
#define BM_CONCAT_OPENCL_MACRO(N, C, H, W, TYPE) \
static void BM_CONCAT_OPENCL_##N##_##C##_##H##_##W##_##TYPE( \
int iters) { \
std::vector<index_t> shape = {N, H, W, C}; \
OpenclConcatHelper<TYPE>(iters, shape, shape, 3); \
} \
BENCHMARK(BM_CONCAT_OPENCL_##N##_##C##_##H##_##W##_##TYPE)
BM_CONCAT_OPENCL_MACRO(3, 32, 32, 32, float);
BM_CONCAT_OPENCL_MACRO(3, 32, 32, 64, float);
BM_CONCAT_OPENCL_MACRO(3, 32, 32, 128, float);
BM_CONCAT_OPENCL_MACRO(3, 32, 32, 256, float);
BENCHMARK(BM_CONCATOPENCLFloat)->Arg(32)->Arg(64)->Arg(128)->Arg(256);
BENCHMARK(BM_CONCATOPENCLHalf)->Arg(32)->Arg(64)->Arg(128)->Arg(256);
BM_CONCAT_OPENCL_MACRO(3, 32, 32, 32, half);
BM_CONCAT_OPENCL_MACRO(3, 32, 32, 64, half);
BM_CONCAT_OPENCL_MACRO(3, 32, 32, 128, half);
BM_CONCAT_OPENCL_MACRO(3, 32, 32, 256, half);
} // namespace mace
\ No newline at end of file
} // namespace mace
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册