From 32fc47fecd4a85bdbf9783c01b85f473f3918faa Mon Sep 17 00:00:00 2001 From: yejianwu Date: Thu, 23 Nov 2017 10:21:31 +0800 Subject: [PATCH] remove static for func in OpenCLRuntime, update kernel time in tuner --- mace/core/net.cc | 10 +++++----- mace/core/runtime/opencl/opencl_runtime.cc | 5 ++++- mace/core/runtime/opencl/opencl_runtime.h | 6 +++--- mace/kernels/opencl/addn.cc | 2 +- mace/kernels/opencl/batch_norm_opencl.cc | 2 +- mace/kernels/opencl/conv_2d_opencl_1x1.cc | 4 ++-- mace/kernels/opencl/conv_2d_opencl_3x3.cc | 2 +- mace/kernels/opencl/depthwise_conv_opencl_3x3.cc | 2 +- mace/kernels/opencl/pooling_opencl.cc | 4 ++-- mace/kernels/opencl/relu_opencl.cc | 4 ++-- mace/kernels/opencl/resize_bilinear_opencl.cc | 2 +- mace/kernels/opencl/space_to_batch_opecl.cc | 2 +- mace/ops/batch_norm_benchmark.cc | 6 +++++- mace/utils/tuner.h | 9 +++++---- 14 files changed, 34 insertions(+), 26 deletions(-) diff --git a/mace/core/net.cc b/mace/core/net.cc index 6c1533a2..55a1c830 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -36,7 +36,7 @@ bool SimpleNet::Run(RunMetadata *run_metadata) { VLOG(1) << "Running operator " << op->debug_def().name() << "(" << op->debug_def().type() << ")."; OperatorStats *op_stats = nullptr; - if (device_type_ != DeviceType::OPENCL && run_metadata) { + if (run_metadata && device_type_ != DeviceType::OPENCL) { op_stats = run_metadata->add_op_stats(); op_stats->set_operator_name(op->debug_def().name()); op_stats->set_type(op->debug_def().type()); @@ -57,16 +57,16 @@ bool SimpleNet::Run(RunMetadata *run_metadata) { op_stats->set_type(op->debug_def().type()); op_stats->set_all_start_micros( - OpenCLRuntime::GetEventProfilingStartInfo() / 1000); + OpenCLRuntime::Get()->GetEventProfilingStartInfo() / 1000); op_stats->set_op_start_rel_micros( - OpenCLRuntime::GetEventProfilingStartInfo() / 1000 - + OpenCLRuntime::Get()->GetEventProfilingStartInfo() / 1000 - op_stats->all_start_micros()); op_stats->set_op_end_rel_micros( - OpenCLRuntime::GetEventProfilingEndInfo() / 1000 - + OpenCLRuntime::Get()->GetEventProfilingEndInfo() / 1000 - op_stats->all_start_micros()); op_stats->set_all_end_rel_micros( - OpenCLRuntime::GetEventProfilingEndInfo() / 1000 - + OpenCLRuntime::Get()->GetEventProfilingEndInfo() / 1000 - op_stats->all_start_micros()); } else { op_stats->set_op_end_rel_micros(NowInMicroSec() - diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index e925e9fb..ae67c099 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -119,7 +119,10 @@ OpenCLRuntime::OpenCLRuntime(cl::Context context, kernel_path_ = std::string(kernel_path == nullptr ? "" : kernel_path) + "/"; } -OpenCLRuntime::~OpenCLRuntime() { delete profiling_ev_; } +OpenCLRuntime::~OpenCLRuntime() { + if (profiling_ev_) + delete profiling_ev_; +} cl::Context &OpenCLRuntime::context() { return context_; } diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h index 647fe172..88086998 100644 --- a/mace/core/runtime/opencl/opencl_runtime.h +++ b/mace/core/runtime/opencl/opencl_runtime.h @@ -19,10 +19,10 @@ class OpenCLRuntime { static OpenCLRuntime *Get(); static void EnableProfiling(); - static cl::Event *GetDefaultEvent(); + cl::Event *GetDefaultEvent(); - static cl_ulong GetEventProfilingStartInfo(); - static cl_ulong GetEventProfilingEndInfo(); + cl_ulong GetEventProfilingStartInfo(); + cl_ulong GetEventProfilingEndInfo(); cl::Context &context(); diff --git a/mace/kernels/opencl/addn.cc b/mace/kernels/opencl/addn.cc index c6e21010..d1ee123d 100644 --- a/mace/kernels/opencl/addn.cc +++ b/mace/kernels/opencl/addn.cc @@ -31,7 +31,7 @@ static void Add2(const Tensor *input0, const Tensor *input1, Tensor *output) { addn_kernel, cl::NullRange, cl::NDRange(gws), cl::NDRange(lws), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } diff --git a/mace/kernels/opencl/batch_norm_opencl.cc b/mace/kernels/opencl/batch_norm_opencl.cc index 06d2e196..c7997d4e 100644 --- a/mace/kernels/opencl/batch_norm_opencl.cc +++ b/mace/kernels/opencl/batch_norm_opencl.cc @@ -62,7 +62,7 @@ void BatchNormFunctor::operator()( bm_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(params[0], params[1], params[2]), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; return error; diff --git a/mace/kernels/opencl/conv_2d_opencl_1x1.cc b/mace/kernels/opencl/conv_2d_opencl_1x1.cc index 8f234be1..8f019207 100644 --- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc +++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc @@ -91,7 +91,7 @@ void Conv1x1V2(const Tensor *input, cl::NDRange(static_cast(batch), static_cast(channel_blocks), static_cast(pixel_blocks)), cl::NDRange(1, 2, kwg_size / 2), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS, error); } @@ -178,7 +178,7 @@ void Conv1x1V3(const Tensor *input, cl::NDRange(static_cast(channel_blocks), static_cast(height), static_cast(width)), cl::NDRange(1, 2, kwg_size / 2), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS, error); } diff --git a/mace/kernels/opencl/conv_2d_opencl_3x3.cc b/mace/kernels/opencl/conv_2d_opencl_3x3.cc index 3078e4b8..c2f6ba7f 100644 --- a/mace/kernels/opencl/conv_2d_opencl_3x3.cc +++ b/mace/kernels/opencl/conv_2d_opencl_3x3.cc @@ -52,7 +52,7 @@ static void InnerConv2dK3x3S12(const Tensor *input, const Tensor *filter, conv_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(lws[0], lws[1], lws[2]), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } diff --git a/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc b/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc index 01a2fa1b..da581da8 100644 --- a/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc +++ b/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc @@ -60,7 +60,7 @@ static void InnerDepthwiseConvOpenclK3x3S12(const Tensor *input, conv_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(lws[0], lws[1], lws[2]), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } diff --git a/mace/kernels/opencl/pooling_opencl.cc b/mace/kernels/opencl/pooling_opencl.cc index 8c85b78a..c8050782 100644 --- a/mace/kernels/opencl/pooling_opencl.cc +++ b/mace/kernels/opencl/pooling_opencl.cc @@ -52,7 +52,7 @@ static void Pooling3(const Tensor *input, pooling_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(lws[0], lws[1], lws[2]), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } @@ -101,7 +101,7 @@ static void PoolingN(const Tensor *input, pooling_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(lws[0], lws[1], lws[2]), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } diff --git a/mace/kernels/opencl/relu_opencl.cc b/mace/kernels/opencl/relu_opencl.cc index 086a653f..60281afa 100644 --- a/mace/kernels/opencl/relu_opencl.cc +++ b/mace/kernels/opencl/relu_opencl.cc @@ -36,7 +36,7 @@ void ReluFunctor::operator()(const Tensor *input, relu_kernel, cl::NullRange, cl::NDRange(gws), cl::NDRange(lws), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } else { auto relu_kernel = runtime->BuildKernel("relu", "relux", built_options); @@ -53,7 +53,7 @@ void ReluFunctor::operator()(const Tensor *input, relu_kernel, cl::NullRange, cl::NDRange(gws), cl::NDRange(lws), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } } diff --git a/mace/kernels/opencl/resize_bilinear_opencl.cc b/mace/kernels/opencl/resize_bilinear_opencl.cc index 11b6ee01..1e4c2c54 100644 --- a/mace/kernels/opencl/resize_bilinear_opencl.cc +++ b/mace/kernels/opencl/resize_bilinear_opencl.cc @@ -50,7 +50,7 @@ void ResizeBilinearFunctor::operator()( static_cast(out_height), static_cast(out_width)), // TODO (heliangliang) tuning and fix when kwg_size < devisor cl::NDRange(1, 16, kwg_size / 16), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS, error); } diff --git a/mace/kernels/opencl/space_to_batch_opecl.cc b/mace/kernels/opencl/space_to_batch_opecl.cc index 52e6de37..dc7058f9 100644 --- a/mace/kernels/opencl/space_to_batch_opecl.cc +++ b/mace/kernels/opencl/space_to_batch_opecl.cc @@ -45,7 +45,7 @@ void SpaceToBatchFunctor::operator()(Tensor *space_te s2b_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(lws[0], lws[1], lws[2]), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } diff --git a/mace/ops/batch_norm_benchmark.cc b/mace/ops/batch_norm_benchmark.cc index 499af6f2..e0d56173 100644 --- a/mace/ops/batch_norm_benchmark.cc +++ b/mace/ops/batch_norm_benchmark.cc @@ -3,6 +3,7 @@ // #include "mace/core/operator.h" +#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -12,6 +13,9 @@ static void BatchNorm( int iters, int batch, int channels, int height, int width) { mace::testing::StopTiming(); + if ( D == OPENCL ) + OpenCLRuntime::EnableProfiling(); + OpsTestNet net; OpDefBuilder("BatchNorm", "BatchNormBM") .Input("Input") @@ -77,4 +81,4 @@ BM_BATCH_NORM(1, 512, 14, 14, float); BM_BATCH_NORM(1, 1024, 7, 7, float); BM_BATCH_NORM(32, 1, 256, 256, float); BM_BATCH_NORM(32, 3, 256, 256, float); -} // namespace mace \ No newline at end of file +} // namespace mace diff --git a/mace/utils/tuner.h b/mace/utils/tuner.h index 1d36f7f5..38c29a8f 100644 --- a/mace/utils/tuner.h +++ b/mace/utils/tuner.h @@ -131,13 +131,14 @@ class Tuner { double &time_us) { RetType res; int64_t total_time_us = 0; - const int64_t start_time = NowInMicroSec(); for (int i = 0; i < num_runs; ++i) { res = func(params); + OpenCLRuntime::Get()->command_queue().finish(); + + double start_time = OpenCLRuntime::Get()->GetEventProfilingStartInfo() / 1000.0; + double end_time = OpenCLRuntime::Get()->GetEventProfilingEndInfo() / 1000.0; + total_time_us += end_time - start_time; } - OpenCLRuntime::Get()->command_queue().finish(); - const int64_t end_time = NowInMicroSec(); - total_time_us += end_time - start_time; time_us = total_time_us * 1.0 / num_runs; return res; -- GitLab