diff --git a/mace/core/net.cc b/mace/core/net.cc index 6c1533a2bff14e95c0f4c85f055c4df64ef29de1..55a1c830804795f8ab6e3c7e7507d3542a052c87 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -36,7 +36,7 @@ bool SimpleNet::Run(RunMetadata *run_metadata) { VLOG(1) << "Running operator " << op->debug_def().name() << "(" << op->debug_def().type() << ")."; OperatorStats *op_stats = nullptr; - if (device_type_ != DeviceType::OPENCL && run_metadata) { + if (run_metadata && device_type_ != DeviceType::OPENCL) { op_stats = run_metadata->add_op_stats(); op_stats->set_operator_name(op->debug_def().name()); op_stats->set_type(op->debug_def().type()); @@ -57,16 +57,16 @@ bool SimpleNet::Run(RunMetadata *run_metadata) { op_stats->set_type(op->debug_def().type()); op_stats->set_all_start_micros( - OpenCLRuntime::GetEventProfilingStartInfo() / 1000); + OpenCLRuntime::Get()->GetEventProfilingStartInfo() / 1000); op_stats->set_op_start_rel_micros( - OpenCLRuntime::GetEventProfilingStartInfo() / 1000 - + OpenCLRuntime::Get()->GetEventProfilingStartInfo() / 1000 - op_stats->all_start_micros()); op_stats->set_op_end_rel_micros( - OpenCLRuntime::GetEventProfilingEndInfo() / 1000 - + OpenCLRuntime::Get()->GetEventProfilingEndInfo() / 1000 - op_stats->all_start_micros()); op_stats->set_all_end_rel_micros( - OpenCLRuntime::GetEventProfilingEndInfo() / 1000 - + OpenCLRuntime::Get()->GetEventProfilingEndInfo() / 1000 - op_stats->all_start_micros()); } else { op_stats->set_op_end_rel_micros(NowInMicroSec() - diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index e925e9fb2a79e79802502758bfa33270a39d8a0a..ae67c09973ca897cd9c35f45f8645e3f468f533c 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -119,7 +119,10 @@ OpenCLRuntime::OpenCLRuntime(cl::Context context, kernel_path_ = std::string(kernel_path == nullptr ? "" : kernel_path) + "/"; } -OpenCLRuntime::~OpenCLRuntime() { delete profiling_ev_; } +OpenCLRuntime::~OpenCLRuntime() { + if (profiling_ev_) + delete profiling_ev_; +} cl::Context &OpenCLRuntime::context() { return context_; } diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h index 647fe172faf0d3062f0027b270badb2ae84326df..88086998d0779d8c7688de28e77d908611ba36ba 100644 --- a/mace/core/runtime/opencl/opencl_runtime.h +++ b/mace/core/runtime/opencl/opencl_runtime.h @@ -19,10 +19,10 @@ class OpenCLRuntime { static OpenCLRuntime *Get(); static void EnableProfiling(); - static cl::Event *GetDefaultEvent(); + cl::Event *GetDefaultEvent(); - static cl_ulong GetEventProfilingStartInfo(); - static cl_ulong GetEventProfilingEndInfo(); + cl_ulong GetEventProfilingStartInfo(); + cl_ulong GetEventProfilingEndInfo(); cl::Context &context(); diff --git a/mace/kernels/opencl/addn.cc b/mace/kernels/opencl/addn.cc index c6e2101042f67ca72d697ec087585848bebd0ebd..d1ee123d9e7b6cffc5ebea5ec605c104232d2eb0 100644 --- a/mace/kernels/opencl/addn.cc +++ b/mace/kernels/opencl/addn.cc @@ -31,7 +31,7 @@ static void Add2(const Tensor *input0, const Tensor *input1, Tensor *output) { addn_kernel, cl::NullRange, cl::NDRange(gws), cl::NDRange(lws), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } diff --git a/mace/kernels/opencl/batch_norm_opencl.cc b/mace/kernels/opencl/batch_norm_opencl.cc index 06d2e196fdb446f44e9642a2c0fdf30482562490..c7997d4e13a571c7831c153c979d257b52b29788 100644 --- a/mace/kernels/opencl/batch_norm_opencl.cc +++ b/mace/kernels/opencl/batch_norm_opencl.cc @@ -62,7 +62,7 @@ void BatchNormFunctor::operator()( bm_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(params[0], params[1], params[2]), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; return error; diff --git a/mace/kernels/opencl/conv_2d_opencl_1x1.cc b/mace/kernels/opencl/conv_2d_opencl_1x1.cc index 8f234be1b98757440b1e1fb0a40211e61891739f..8f019207d2d570b7222acf08b2d1cd0d1e960eda 100644 --- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc +++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc @@ -91,7 +91,7 @@ void Conv1x1V2(const Tensor *input, cl::NDRange(static_cast(batch), static_cast(channel_blocks), static_cast(pixel_blocks)), cl::NDRange(1, 2, kwg_size / 2), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS, error); } @@ -178,7 +178,7 @@ void Conv1x1V3(const Tensor *input, cl::NDRange(static_cast(channel_blocks), static_cast(height), static_cast(width)), cl::NDRange(1, 2, kwg_size / 2), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS, error); } diff --git a/mace/kernels/opencl/conv_2d_opencl_3x3.cc b/mace/kernels/opencl/conv_2d_opencl_3x3.cc index 3078e4b8eb1e51687bbe23eb2102c226480170b8..c2f6ba7f20309de689d43a724185440e7fae0917 100644 --- a/mace/kernels/opencl/conv_2d_opencl_3x3.cc +++ b/mace/kernels/opencl/conv_2d_opencl_3x3.cc @@ -52,7 +52,7 @@ static void InnerConv2dK3x3S12(const Tensor *input, const Tensor *filter, conv_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(lws[0], lws[1], lws[2]), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } diff --git a/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc b/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc index 01a2fa1b0d1b8d8e2d1fb7cee598f8f958e85952..da581da895f4c9f64a34d7179337bcf101fd122a 100644 --- a/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc +++ b/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc @@ -60,7 +60,7 @@ static void InnerDepthwiseConvOpenclK3x3S12(const Tensor *input, conv_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(lws[0], lws[1], lws[2]), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } diff --git a/mace/kernels/opencl/pooling_opencl.cc b/mace/kernels/opencl/pooling_opencl.cc index 8c85b78ad5a808e748045c5c2de40c4ead70b1ec..c805078264acc004b2fa1591257b565876dc5a5c 100644 --- a/mace/kernels/opencl/pooling_opencl.cc +++ b/mace/kernels/opencl/pooling_opencl.cc @@ -52,7 +52,7 @@ static void Pooling3(const Tensor *input, pooling_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(lws[0], lws[1], lws[2]), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } @@ -101,7 +101,7 @@ static void PoolingN(const Tensor *input, pooling_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(lws[0], lws[1], lws[2]), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } diff --git a/mace/kernels/opencl/relu_opencl.cc b/mace/kernels/opencl/relu_opencl.cc index 086a653fb68bfbb18750781f9722b46b1e2f3432..60281afa65dd2fd041dd13afc03a3bc43a5f1699 100644 --- a/mace/kernels/opencl/relu_opencl.cc +++ b/mace/kernels/opencl/relu_opencl.cc @@ -36,7 +36,7 @@ void ReluFunctor::operator()(const Tensor *input, relu_kernel, cl::NullRange, cl::NDRange(gws), cl::NDRange(lws), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } else { auto relu_kernel = runtime->BuildKernel("relu", "relux", built_options); @@ -53,7 +53,7 @@ void ReluFunctor::operator()(const Tensor *input, relu_kernel, cl::NullRange, cl::NDRange(gws), cl::NDRange(lws), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } } diff --git a/mace/kernels/opencl/resize_bilinear_opencl.cc b/mace/kernels/opencl/resize_bilinear_opencl.cc index 11b6ee016456f0c4a440b3c344e0d1ba948f7a6a..1e4c2c5461fb4aa2067cf99584f4a0f0b606669f 100644 --- a/mace/kernels/opencl/resize_bilinear_opencl.cc +++ b/mace/kernels/opencl/resize_bilinear_opencl.cc @@ -50,7 +50,7 @@ void ResizeBilinearFunctor::operator()( static_cast(out_height), static_cast(out_width)), // TODO (heliangliang) tuning and fix when kwg_size < devisor cl::NDRange(1, 16, kwg_size / 16), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS, error); } diff --git a/mace/kernels/opencl/space_to_batch_opecl.cc b/mace/kernels/opencl/space_to_batch_opecl.cc index 52e6de377ae07f393ec4e803d56ee7c0437341e2..dc7058f981b99643fc38f3601ebb65441b2adf91 100644 --- a/mace/kernels/opencl/space_to_batch_opecl.cc +++ b/mace/kernels/opencl/space_to_batch_opecl.cc @@ -45,7 +45,7 @@ void SpaceToBatchFunctor::operator()(Tensor *space_te s2b_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(lws[0], lws[1], lws[2]), - NULL, OpenCLRuntime::GetDefaultEvent()); + NULL, OpenCLRuntime::Get()->GetDefaultEvent()); MACE_CHECK(error == CL_SUCCESS); } diff --git a/mace/ops/batch_norm_benchmark.cc b/mace/ops/batch_norm_benchmark.cc index 499af6f29c5f1918f8233ef1e11ba155e35cc869..e0d56173d20e89799e7c2f1a9df33a90dbca47bd 100644 --- a/mace/ops/batch_norm_benchmark.cc +++ b/mace/ops/batch_norm_benchmark.cc @@ -3,6 +3,7 @@ // #include "mace/core/operator.h" +#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -12,6 +13,9 @@ static void BatchNorm( int iters, int batch, int channels, int height, int width) { mace::testing::StopTiming(); + if ( D == OPENCL ) + OpenCLRuntime::EnableProfiling(); + OpsTestNet net; OpDefBuilder("BatchNorm", "BatchNormBM") .Input("Input") @@ -77,4 +81,4 @@ BM_BATCH_NORM(1, 512, 14, 14, float); BM_BATCH_NORM(1, 1024, 7, 7, float); BM_BATCH_NORM(32, 1, 256, 256, float); BM_BATCH_NORM(32, 3, 256, 256, float); -} // namespace mace \ No newline at end of file +} // namespace mace diff --git a/mace/utils/tuner.h b/mace/utils/tuner.h index 1d36f7f5b170fc109bc7596bb556b0e8e3ed6959..38c29a8fe7e81a4ffc72bf048780d306ed1dd578 100644 --- a/mace/utils/tuner.h +++ b/mace/utils/tuner.h @@ -131,13 +131,14 @@ class Tuner { double &time_us) { RetType res; int64_t total_time_us = 0; - const int64_t start_time = NowInMicroSec(); for (int i = 0; i < num_runs; ++i) { res = func(params); + OpenCLRuntime::Get()->command_queue().finish(); + + double start_time = OpenCLRuntime::Get()->GetEventProfilingStartInfo() / 1000.0; + double end_time = OpenCLRuntime::Get()->GetEventProfilingEndInfo() / 1000.0; + total_time_us += end_time - start_time; } - OpenCLRuntime::Get()->command_queue().finish(); - const int64_t end_time = NowInMicroSec(); - total_time_us += end_time - start_time; time_us = total_time_us * 1.0 / num_runs; return res;