提交 32fc47fe 编写于 作者: Y yejianwu

remove static for func in OpenCLRuntime, update kernel time in tuner

上级 5bc942ad
......@@ -36,7 +36,7 @@ bool SimpleNet::Run(RunMetadata *run_metadata) {
VLOG(1) << "Running operator " << op->debug_def().name() << "("
<< op->debug_def().type() << ").";
OperatorStats *op_stats = nullptr;
if (device_type_ != DeviceType::OPENCL && run_metadata) {
if (run_metadata && device_type_ != DeviceType::OPENCL) {
op_stats = run_metadata->add_op_stats();
op_stats->set_operator_name(op->debug_def().name());
op_stats->set_type(op->debug_def().type());
......@@ -57,16 +57,16 @@ bool SimpleNet::Run(RunMetadata *run_metadata) {
op_stats->set_type(op->debug_def().type());
op_stats->set_all_start_micros(
OpenCLRuntime::GetEventProfilingStartInfo() / 1000);
OpenCLRuntime::Get()->GetEventProfilingStartInfo() / 1000);
op_stats->set_op_start_rel_micros(
OpenCLRuntime::GetEventProfilingStartInfo() / 1000 -
OpenCLRuntime::Get()->GetEventProfilingStartInfo() / 1000 -
op_stats->all_start_micros());
op_stats->set_op_end_rel_micros(
OpenCLRuntime::GetEventProfilingEndInfo() / 1000 -
OpenCLRuntime::Get()->GetEventProfilingEndInfo() / 1000 -
op_stats->all_start_micros());
op_stats->set_all_end_rel_micros(
OpenCLRuntime::GetEventProfilingEndInfo() / 1000 -
OpenCLRuntime::Get()->GetEventProfilingEndInfo() / 1000 -
op_stats->all_start_micros());
} else {
op_stats->set_op_end_rel_micros(NowInMicroSec() -
......
......@@ -119,7 +119,10 @@ OpenCLRuntime::OpenCLRuntime(cl::Context context,
kernel_path_ = std::string(kernel_path == nullptr ? "" : kernel_path) + "/";
}
OpenCLRuntime::~OpenCLRuntime() { delete profiling_ev_; }
OpenCLRuntime::~OpenCLRuntime() {
if (profiling_ev_)
delete profiling_ev_;
}
cl::Context &OpenCLRuntime::context() { return context_; }
......
......@@ -19,10 +19,10 @@ class OpenCLRuntime {
static OpenCLRuntime *Get();
static void EnableProfiling();
static cl::Event *GetDefaultEvent();
cl::Event *GetDefaultEvent();
static cl_ulong GetEventProfilingStartInfo();
static cl_ulong GetEventProfilingEndInfo();
cl_ulong GetEventProfilingStartInfo();
cl_ulong GetEventProfilingEndInfo();
cl::Context &context();
......
......@@ -31,7 +31,7 @@ static void Add2(const Tensor *input0, const Tensor *input1, Tensor *output) {
addn_kernel, cl::NullRange,
cl::NDRange(gws),
cl::NDRange(lws),
NULL, OpenCLRuntime::GetDefaultEvent());
NULL, OpenCLRuntime::Get()->GetDefaultEvent());
MACE_CHECK(error == CL_SUCCESS);
}
......
......@@ -62,7 +62,7 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()(
bm_kernel, cl::NullRange,
cl::NDRange(gws[0], gws[1], gws[2]),
cl::NDRange(params[0], params[1], params[2]),
NULL, OpenCLRuntime::GetDefaultEvent());
NULL, OpenCLRuntime::Get()->GetDefaultEvent());
MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error;
return error;
......
......@@ -91,7 +91,7 @@ void Conv1x1V2(const Tensor *input,
cl::NDRange(static_cast<int>(batch), static_cast<int>(channel_blocks),
static_cast<int>(pixel_blocks)),
cl::NDRange(1, 2, kwg_size / 2),
NULL, OpenCLRuntime::GetDefaultEvent());
NULL, OpenCLRuntime::Get()->GetDefaultEvent());
MACE_CHECK(error == CL_SUCCESS, error);
}
......@@ -178,7 +178,7 @@ void Conv1x1V3(const Tensor *input,
cl::NDRange(static_cast<int>(channel_blocks), static_cast<int>(height),
static_cast<int>(width)),
cl::NDRange(1, 2, kwg_size / 2),
NULL, OpenCLRuntime::GetDefaultEvent());
NULL, OpenCLRuntime::Get()->GetDefaultEvent());
MACE_CHECK(error == CL_SUCCESS, error);
}
......
......@@ -52,7 +52,7 @@ static void InnerConv2dK3x3S12(const Tensor *input, const Tensor *filter,
conv_kernel, cl::NullRange,
cl::NDRange(gws[0], gws[1], gws[2]),
cl::NDRange(lws[0], lws[1], lws[2]),
NULL, OpenCLRuntime::GetDefaultEvent());
NULL, OpenCLRuntime::Get()->GetDefaultEvent());
MACE_CHECK(error == CL_SUCCESS);
}
......
......@@ -60,7 +60,7 @@ static void InnerDepthwiseConvOpenclK3x3S12(const Tensor *input,
conv_kernel, cl::NullRange,
cl::NDRange(gws[0], gws[1], gws[2]),
cl::NDRange(lws[0], lws[1], lws[2]),
NULL, OpenCLRuntime::GetDefaultEvent());
NULL, OpenCLRuntime::Get()->GetDefaultEvent());
MACE_CHECK(error == CL_SUCCESS);
}
......
......@@ -52,7 +52,7 @@ static void Pooling3(const Tensor *input,
pooling_kernel, cl::NullRange,
cl::NDRange(gws[0], gws[1], gws[2]),
cl::NDRange(lws[0], lws[1], lws[2]),
NULL, OpenCLRuntime::GetDefaultEvent());
NULL, OpenCLRuntime::Get()->GetDefaultEvent());
MACE_CHECK(error == CL_SUCCESS);
}
......@@ -101,7 +101,7 @@ static void PoolingN(const Tensor *input,
pooling_kernel, cl::NullRange,
cl::NDRange(gws[0], gws[1], gws[2]),
cl::NDRange(lws[0], lws[1], lws[2]),
NULL, OpenCLRuntime::GetDefaultEvent());
NULL, OpenCLRuntime::Get()->GetDefaultEvent());
MACE_CHECK(error == CL_SUCCESS);
}
......
......@@ -36,7 +36,7 @@ void ReluFunctor<DeviceType::OPENCL, float>::operator()(const Tensor *input,
relu_kernel, cl::NullRange,
cl::NDRange(gws),
cl::NDRange(lws),
NULL, OpenCLRuntime::GetDefaultEvent());
NULL, OpenCLRuntime::Get()->GetDefaultEvent());
MACE_CHECK(error == CL_SUCCESS);
} else {
auto relu_kernel = runtime->BuildKernel("relu", "relux", built_options);
......@@ -53,7 +53,7 @@ void ReluFunctor<DeviceType::OPENCL, float>::operator()(const Tensor *input,
relu_kernel, cl::NullRange,
cl::NDRange(gws),
cl::NDRange(lws),
NULL, OpenCLRuntime::GetDefaultEvent());
NULL, OpenCLRuntime::Get()->GetDefaultEvent());
MACE_CHECK(error == CL_SUCCESS);
}
}
......
......@@ -50,7 +50,7 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, float>::operator()(
static_cast<int>(out_height), static_cast<int>(out_width)),
// TODO (heliangliang) tuning and fix when kwg_size < devisor
cl::NDRange(1, 16, kwg_size / 16),
NULL, OpenCLRuntime::GetDefaultEvent());
NULL, OpenCLRuntime::Get()->GetDefaultEvent());
MACE_CHECK(error == CL_SUCCESS, error);
}
......
......@@ -45,7 +45,7 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, float>::operator()(Tensor *space_te
s2b_kernel, cl::NullRange,
cl::NDRange(gws[0], gws[1], gws[2]),
cl::NDRange(lws[0], lws[1], lws[2]),
NULL, OpenCLRuntime::GetDefaultEvent());
NULL, OpenCLRuntime::Get()->GetDefaultEvent());
MACE_CHECK(error == CL_SUCCESS);
}
......
......@@ -3,6 +3,7 @@
//
#include "mace/core/operator.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h"
......@@ -12,6 +13,9 @@ static void BatchNorm(
int iters, int batch, int channels, int height, int width) {
mace::testing::StopTiming();
if ( D == OPENCL )
OpenCLRuntime::EnableProfiling();
OpsTestNet net;
OpDefBuilder("BatchNorm", "BatchNormBM")
.Input("Input")
......@@ -77,4 +81,4 @@ BM_BATCH_NORM(1, 512, 14, 14, float);
BM_BATCH_NORM(1, 1024, 7, 7, float);
BM_BATCH_NORM(32, 1, 256, 256, float);
BM_BATCH_NORM(32, 3, 256, 256, float);
} // namespace mace
\ No newline at end of file
} // namespace mace
......@@ -131,13 +131,14 @@ class Tuner {
double &time_us) {
RetType res;
int64_t total_time_us = 0;
const int64_t start_time = NowInMicroSec();
for (int i = 0; i < num_runs; ++i) {
res = func(params);
OpenCLRuntime::Get()->command_queue().finish();
double start_time = OpenCLRuntime::Get()->GetEventProfilingStartInfo() / 1000.0;
double end_time = OpenCLRuntime::Get()->GetEventProfilingEndInfo() / 1000.0;
total_time_us += end_time - start_time;
}
OpenCLRuntime::Get()->command_queue().finish();
const int64_t end_time = NowInMicroSec();
total_time_us += end_time - start_time;
time_us = total_time_us * 1.0 / num_runs;
return res;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册