From a1422f7349fd1815a576486f38ece12136ceb3fc Mon Sep 17 00:00:00 2001 From: Yuan Shuai Date: Wed, 20 May 2020 02:18:41 -0500 Subject: [PATCH] [LITE][PROFILE] Enhance Profiler for OpenCL. test=develop (#3668) --- .../opencl/activation_buffer_compute.cc | 48 +++++++++++++------ .../opencl/activation_image_compute.cc | 26 +++++++--- .../opencl/bilinear_interp_image_compute.cc | 18 ++++--- .../kernels/opencl/box_coder_image_compute.cc | 26 +++++++--- lite/kernels/opencl/concat_buffer_compute.cc | 40 ++++++++++------ lite/kernels/opencl/concat_image_compute.cc | 12 +++++ lite/kernels/opencl/conv_buffer_compute.h | 12 +++++ .../opencl/depthwise_conv2d_buffer_compute.cc | 12 +++++ lite/kernels/opencl/dropout_image_compute.cc | 26 +++++++--- .../opencl/elementwise_add_buffer_compute.h | 12 +++++ .../opencl/elementwise_add_image_compute.h | 5 +- .../kernels/opencl/elementwise_mul_compute.cc | 14 +++--- .../opencl/elementwise_mul_image_compute.cc | 18 ++++--- .../opencl/elementwise_sub_image_compute.cc | 9 +++- .../opencl/elementwise_sub_image_compute.h | 12 +++++ lite/kernels/opencl/fc_buffer_compute.cc | 26 +++++++--- .../opencl/grid_sampler_image_compute.cc | 26 +++++++--- .../opencl/instance_norm_image_compute.cc | 40 ++++++++++------ lite/kernels/opencl/lrn_image_compute.cc | 26 +++++++--- lite/kernels/opencl/mul_buffer_compute.cc | 26 +++++++--- .../opencl/nearest_interp_image_compute.cc | 26 +++++++--- lite/kernels/opencl/pad2d_image_compute.cc | 26 +++++++--- lite/kernels/opencl/pool_buffer_compute.cc | 26 +++++++--- lite/kernels/opencl/pool_image_compute.cc | 5 +- lite/kernels/opencl/reshape_image_compute.cc | 5 +- lite/kernels/opencl/scale_image_compute.cc | 26 +++++++--- lite/kernels/opencl/slice_image_compute.cc | 26 +++++++--- 27 files changed, 422 insertions(+), 152 deletions(-) diff --git a/lite/kernels/opencl/activation_buffer_compute.cc b/lite/kernels/opencl/activation_buffer_compute.cc index 1e341952c4..69c9385162 100644 --- a/lite/kernels/opencl/activation_buffer_compute.cc +++ b/lite/kernels/opencl/activation_buffer_compute.cc @@ -18,6 +18,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -63,16 +67,24 @@ class ReluCompute auto global_work_size = cl::NDRange{count}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"relu"}; std::string build_options_{"-DCL_DTYPE_float -DRELU"}; @@ -120,16 +132,24 @@ class SigmoidCompute auto global_work_size = cl::NDRange{count}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"sigmoid"}; std::string build_options_{"-DCL_DTYPE_float -DSIGMOID"}; diff --git a/lite/kernels/opencl/activation_image_compute.cc b/lite/kernels/opencl/activation_image_compute.cc index da957d8bde..52a0e43a1e 100644 --- a/lite/kernels/opencl/activation_image_compute.cc +++ b/lite/kernels/opencl/activation_image_compute.cc @@ -19,6 +19,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -148,16 +152,24 @@ class ActivationComputeImageDefault auto& context = ctx_->As(); CHECK(context.cl_context() != nullptr); - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size_, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size_, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: param_t* act_param_{nullptr}; DDim x_img_shape_ = DDim(std::vector( diff --git a/lite/kernels/opencl/bilinear_interp_image_compute.cc b/lite/kernels/opencl/bilinear_interp_image_compute.cc index 84fd3312c3..f0747b6511 100644 --- a/lite/kernels/opencl/bilinear_interp_image_compute.cc +++ b/lite/kernels/opencl/bilinear_interp_image_compute.cc @@ -23,6 +23,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -142,13 +146,13 @@ class BilinearInterpImageCompute static_cast(default_work_size[1]), static_cast(default_work_size[2])}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); #ifdef LITE_WITH_LOG VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " " diff --git a/lite/kernels/opencl/box_coder_image_compute.cc b/lite/kernels/opencl/box_coder_image_compute.cc index 84298b29d4..269d79a18f 100644 --- a/lite/kernels/opencl/box_coder_image_compute.cc +++ b/lite/kernels/opencl/box_coder_image_compute.cc @@ -23,6 +23,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -121,13 +125,13 @@ class BoxCoderComputeImage : public KernelLite(default_work_size[0]), static_cast(default_work_size[2])}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); #ifdef LITE_WITH_LOG @@ -138,6 +142,14 @@ class BoxCoderComputeImage : public KernelLitekernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + param_t* boxcoder_param_{nullptr}; std::string kernel_func_name_{}; std::string build_options_{" -DCL_DTYPE_half"}; diff --git a/lite/kernels/opencl/concat_buffer_compute.cc b/lite/kernels/opencl/concat_buffer_compute.cc index 5b7c745f31..c9d7fc1cb8 100644 --- a/lite/kernels/opencl/concat_buffer_compute.cc +++ b/lite/kernels/opencl/concat_buffer_compute.cc @@ -18,6 +18,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -124,13 +128,13 @@ class ConcatCompute : public KernelLiteGetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } else { auto start = 0; @@ -157,13 +161,13 @@ class ConcatCompute : public KernelLiteGetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); start += size; } @@ -172,6 +176,14 @@ class ConcatCompute : public KernelLitekernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + int axis_size_ = 1; int post_size_ = 1; int pre_size_ = 1; diff --git a/lite/kernels/opencl/concat_image_compute.cc b/lite/kernels/opencl/concat_image_compute.cc index 5787163dca..25830b6a08 100644 --- a/lite/kernels/opencl/concat_image_compute.cc +++ b/lite/kernels/opencl/concat_image_compute.cc @@ -19,6 +19,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -246,6 +250,14 @@ class ConcatComputeImage : public KernelLitekernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + int axis_size_ = 1; int axis_ = 1; int flag_ = 1; diff --git a/lite/kernels/opencl/conv_buffer_compute.h b/lite/kernels/opencl/conv_buffer_compute.h index 80131777c3..f61bf9ac9c 100644 --- a/lite/kernels/opencl/conv_buffer_compute.h +++ b/lite/kernels/opencl/conv_buffer_compute.h @@ -23,6 +23,10 @@ #include "lite/core/tensor.h" #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -39,6 +43,14 @@ class ConvCompute void Run() override; +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_names_[0]; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: void GemmlikeConv2d(); void Conv2d1x1(); diff --git a/lite/kernels/opencl/depthwise_conv2d_buffer_compute.cc b/lite/kernels/opencl/depthwise_conv2d_buffer_compute.cc index ae03c2a182..8b466be758 100644 --- a/lite/kernels/opencl/depthwise_conv2d_buffer_compute.cc +++ b/lite/kernels/opencl/depthwise_conv2d_buffer_compute.cc @@ -20,6 +20,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -119,6 +123,14 @@ class DepthwiseConv2dCompute CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"depthwise_conv2d"}; std::string build_options_{"-DCL_DTYPE_float"}; diff --git a/lite/kernels/opencl/dropout_image_compute.cc b/lite/kernels/opencl/dropout_image_compute.cc index ff9d18430b..c3fdba3c13 100644 --- a/lite/kernels/opencl/dropout_image_compute.cc +++ b/lite/kernels/opencl/dropout_image_compute.cc @@ -21,6 +21,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -89,16 +93,24 @@ class DropoutComputeImage2D : public KernelLite(default_work_size.data()[1]), static_cast(default_work_size.data()[2])}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"dropout"}; std::string build_options_{"-DCL_DTYPE_half"}; diff --git a/lite/kernels/opencl/elementwise_add_buffer_compute.h b/lite/kernels/opencl/elementwise_add_buffer_compute.h index c60b8512c3..e451bf9200 100644 --- a/lite/kernels/opencl/elementwise_add_buffer_compute.h +++ b/lite/kernels/opencl/elementwise_add_buffer_compute.h @@ -19,6 +19,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/cp_logging.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -38,6 +42,14 @@ class ElementwiseAddCompute return "ElementwiseAdd using cl::Buffer, kFloat"; } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + protected: void UpdateParams(); diff --git a/lite/kernels/opencl/elementwise_add_image_compute.h b/lite/kernels/opencl/elementwise_add_image_compute.h index 83972d3286..ba87ef4433 100644 --- a/lite/kernels/opencl/elementwise_add_image_compute.h +++ b/lite/kernels/opencl/elementwise_add_image_compute.h @@ -17,11 +17,14 @@ #include #include #include "lite/backends/opencl/cl_half.h" -#include "lite/backends/opencl/cl_utility.h" #include "lite/core/kernel.h" #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/cp_logging.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { diff --git a/lite/kernels/opencl/elementwise_mul_compute.cc b/lite/kernels/opencl/elementwise_mul_compute.cc index 25764f1dc2..d0e8bc92d5 100644 --- a/lite/kernels/opencl/elementwise_mul_compute.cc +++ b/lite/kernels/opencl/elementwise_mul_compute.cc @@ -153,13 +153,13 @@ void ElementwiseMulFloatImageCompute::Run() { auto global_work_size = cl::NDRange{static_cast(x_img_width), static_cast(x_img_height)}; - auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + auto status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); std::string time_stamp_{GetTimeStamp()}; diff --git a/lite/kernels/opencl/elementwise_mul_image_compute.cc b/lite/kernels/opencl/elementwise_mul_image_compute.cc index dcedee86de..1da8554670 100644 --- a/lite/kernels/opencl/elementwise_mul_image_compute.cc +++ b/lite/kernels/opencl/elementwise_mul_image_compute.cc @@ -23,6 +23,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -186,13 +190,13 @@ class ElementwiseMulImageCompute cl::NDRange{static_cast(x_img_width), static_cast(x_img_height)}; - auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + auto status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); #ifdef LITE_WITH_LOG VLOG(4) << "global_work_size:[2D]:" << x_img_width << " " << x_img_height; diff --git a/lite/kernels/opencl/elementwise_sub_image_compute.cc b/lite/kernels/opencl/elementwise_sub_image_compute.cc index 8a29cde6a4..61d75f4d02 100644 --- a/lite/kernels/opencl/elementwise_sub_image_compute.cc +++ b/lite/kernels/opencl/elementwise_sub_image_compute.cc @@ -138,8 +138,13 @@ void ElementwiseSubImageCompute::Run() { VLOG(4) << "global_work_size:[2D]:" << x_img_width << " " << x_img_height; #endif - auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, cl::NullRange, global_work_size, cl::NullRange, nullptr, nullptr); + auto status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } diff --git a/lite/kernels/opencl/elementwise_sub_image_compute.h b/lite/kernels/opencl/elementwise_sub_image_compute.h index cc1ce505c6..8af4cee730 100644 --- a/lite/kernels/opencl/elementwise_sub_image_compute.h +++ b/lite/kernels/opencl/elementwise_sub_image_compute.h @@ -20,6 +20,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/cp_logging.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -41,6 +45,14 @@ class ElementwiseSubImageCompute return "ElementwiseSub using cl::Image2D, kFP16"; } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + protected: param_t* ele_param_{nullptr}; std::string kernel_func_name_{"elementwise_sub"}; diff --git a/lite/kernels/opencl/fc_buffer_compute.cc b/lite/kernels/opencl/fc_buffer_compute.cc index 38ca4fb796..9763faf2f3 100644 --- a/lite/kernels/opencl/fc_buffer_compute.cc +++ b/lite/kernels/opencl/fc_buffer_compute.cc @@ -20,6 +20,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -124,16 +128,24 @@ class FcCompute auto& context = ctx_->As(); CHECK(context.cl_context() != nullptr); - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size_, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size_, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: int m_, n_, k_; param_t* fc_param_{nullptr}; diff --git a/lite/kernels/opencl/grid_sampler_image_compute.cc b/lite/kernels/opencl/grid_sampler_image_compute.cc index e9151e18ef..ff0b2481bf 100644 --- a/lite/kernels/opencl/grid_sampler_image_compute.cc +++ b/lite/kernels/opencl/grid_sampler_image_compute.cc @@ -23,6 +23,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -131,16 +135,24 @@ class GridSamplerImageCompute : public KernelLiteAs(); CHECK(context.cl_context() != nullptr); - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size_, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size_, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + protected: param_t* grid_param_{nullptr}; bool first_epoch_for_reinit_{true}; diff --git a/lite/kernels/opencl/instance_norm_image_compute.cc b/lite/kernels/opencl/instance_norm_image_compute.cc index d014588941..205575cf61 100644 --- a/lite/kernels/opencl/instance_norm_image_compute.cc +++ b/lite/kernels/opencl/instance_norm_image_compute.cc @@ -23,6 +23,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -137,13 +141,13 @@ class InstanceNormImageCompute : public KernelLiteGetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - local_work_size, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + local_work_size, + nullptr, + event_); CL_CHECK_FATAL(status); } @@ -258,17 +262,25 @@ class InstanceNormImageCompute : public KernelLiteGetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - local_work_size, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + local_work_size, + nullptr, + event_); CL_CHECK_FATAL(status); } #endif +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + protected: param_t* instance_norm_param_{nullptr}; std::string kernel_func_name_{"instance_norm_onnx"}; diff --git a/lite/kernels/opencl/lrn_image_compute.cc b/lite/kernels/opencl/lrn_image_compute.cc index 8e70189b88..48f20fa79a 100644 --- a/lite/kernels/opencl/lrn_image_compute.cc +++ b/lite/kernels/opencl/lrn_image_compute.cc @@ -23,6 +23,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -128,13 +132,13 @@ class LrnImageCompute : public KernelLite(default_work_size[1]), static_cast(default_work_size[2])}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); #ifdef LITE_WITH_LOG VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " " @@ -142,6 +146,14 @@ class LrnImageCompute : public KernelLitekernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + protected: param_t* lrn_param_{nullptr}; int n_{5}; diff --git a/lite/kernels/opencl/mul_buffer_compute.cc b/lite/kernels/opencl/mul_buffer_compute.cc index 7877a7fde6..87249b0079 100644 --- a/lite/kernels/opencl/mul_buffer_compute.cc +++ b/lite/kernels/opencl/mul_buffer_compute.cc @@ -20,6 +20,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -92,16 +96,24 @@ class MulCompute auto global_work_size = cl::NDRange{static_cast((m_ + 3) / 4), static_cast((n_ + 3) / 4)}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: int m_, n_, k_; std::string kernel_func_name_{"mat_mul"}; diff --git a/lite/kernels/opencl/nearest_interp_image_compute.cc b/lite/kernels/opencl/nearest_interp_image_compute.cc index eb0c84f25d..a3c202bbb4 100644 --- a/lite/kernels/opencl/nearest_interp_image_compute.cc +++ b/lite/kernels/opencl/nearest_interp_image_compute.cc @@ -19,6 +19,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -110,16 +114,24 @@ class NearestInterpComputeImageDefault static_cast(default_work_size.data()[1]), static_cast(default_work_size.data()[2])}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"nearest_interp"}; std::string build_options_{" -DCL_DTYPE_half"}; diff --git a/lite/kernels/opencl/pad2d_image_compute.cc b/lite/kernels/opencl/pad2d_image_compute.cc index 49489ea3b4..dc0590ee47 100644 --- a/lite/kernels/opencl/pad2d_image_compute.cc +++ b/lite/kernels/opencl/pad2d_image_compute.cc @@ -23,6 +23,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -142,13 +146,13 @@ class Pad2dCompute : public KernelLite(default_work_size[1]), static_cast(default_work_size[2])}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); #ifdef LITE_WITH_LOG VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " " @@ -156,6 +160,14 @@ class Pad2dCompute : public KernelLitekernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + protected: param_t* pad2d_param_{nullptr}; std::string kernel_func_name_{}; diff --git a/lite/kernels/opencl/pool_buffer_compute.cc b/lite/kernels/opencl/pool_buffer_compute.cc index 9be0775d99..5b81d8586c 100644 --- a/lite/kernels/opencl/pool_buffer_compute.cc +++ b/lite/kernels/opencl/pool_buffer_compute.cc @@ -20,6 +20,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -106,16 +110,24 @@ class PoolCompute CL_CHECK_FATAL(status); auto global_work_size = cl::NDRange(static_cast(numel)); - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"pool_"}; std::string build_options_{"-DCL_DTYPE_float"}; diff --git a/lite/kernels/opencl/pool_image_compute.cc b/lite/kernels/opencl/pool_image_compute.cc index db27bf2aed..7a89b33841 100644 --- a/lite/kernels/opencl/pool_image_compute.cc +++ b/lite/kernels/opencl/pool_image_compute.cc @@ -16,13 +16,16 @@ #include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_include.h" -#include "lite/backends/opencl/cl_utility.h" #include "lite/core/kernel.h" #include "lite/core/op_registry.h" #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" #undef LITE_WITH_LOG diff --git a/lite/kernels/opencl/reshape_image_compute.cc b/lite/kernels/opencl/reshape_image_compute.cc index bcaa46ba3d..0ee55d13f8 100644 --- a/lite/kernels/opencl/reshape_image_compute.cc +++ b/lite/kernels/opencl/reshape_image_compute.cc @@ -14,13 +14,16 @@ #include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_include.h" -#include "lite/backends/opencl/cl_utility.h" #include "lite/core/kernel.h" #include "lite/core/op_registry.h" #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" #undef LITE_WITH_LOG diff --git a/lite/kernels/opencl/scale_image_compute.cc b/lite/kernels/opencl/scale_image_compute.cc index bb1d6f8e66..169fd25a83 100644 --- a/lite/kernels/opencl/scale_image_compute.cc +++ b/lite/kernels/opencl/scale_image_compute.cc @@ -21,6 +21,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -93,16 +97,24 @@ class ScaleComputeImage2D : public KernelLiteGetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size_, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size_, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"scale"}; std::string build_options_{"-DCL_DTYPE_half"}; diff --git a/lite/kernels/opencl/slice_image_compute.cc b/lite/kernels/opencl/slice_image_compute.cc index 5bef5bfe09..e9ae7e4a12 100644 --- a/lite/kernels/opencl/slice_image_compute.cc +++ b/lite/kernels/opencl/slice_image_compute.cc @@ -21,6 +21,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -96,16 +100,24 @@ class SliceComputeImage2D : public KernelLite(default_work_size.data()[1]), static_cast(default_work_size.data()[2])}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"slice"}; std::string build_options_{"-DCL_DTYPE_half"}; -- GitLab