diff --git a/lite/kernels/opencl/activation_buffer_compute.cc b/lite/kernels/opencl/activation_buffer_compute.cc index 1e341952c43115e8db62c3398455ac8cbef83724..69c9385162dc3ff59ad76dda4ce61ce2ef72d5a5 100644 --- a/lite/kernels/opencl/activation_buffer_compute.cc +++ b/lite/kernels/opencl/activation_buffer_compute.cc @@ -18,6 +18,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -63,16 +67,24 @@ class ReluCompute auto global_work_size = cl::NDRange{count}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"relu"}; std::string build_options_{"-DCL_DTYPE_float -DRELU"}; @@ -120,16 +132,24 @@ class SigmoidCompute auto global_work_size = cl::NDRange{count}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"sigmoid"}; std::string build_options_{"-DCL_DTYPE_float -DSIGMOID"}; diff --git a/lite/kernels/opencl/activation_image_compute.cc b/lite/kernels/opencl/activation_image_compute.cc index da957d8bdec8a4689740fb996010968c14d95b16..52a0e43a1ecba2d3d00faa0a597e618ac77c4114 100644 --- a/lite/kernels/opencl/activation_image_compute.cc +++ b/lite/kernels/opencl/activation_image_compute.cc @@ -19,6 +19,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -148,16 +152,24 @@ class ActivationComputeImageDefault auto& context = ctx_->As(); CHECK(context.cl_context() != nullptr); - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size_, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size_, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: param_t* act_param_{nullptr}; DDim x_img_shape_ = DDim(std::vector( diff --git a/lite/kernels/opencl/bilinear_interp_image_compute.cc b/lite/kernels/opencl/bilinear_interp_image_compute.cc index 84fd3312c3b965c2856780aaab6d9ecb9122ccfc..f0747b65118a5e5cd8ed407334c6b718a2a7215c 100644 --- a/lite/kernels/opencl/bilinear_interp_image_compute.cc +++ b/lite/kernels/opencl/bilinear_interp_image_compute.cc @@ -23,6 +23,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -142,13 +146,13 @@ class BilinearInterpImageCompute static_cast(default_work_size[1]), static_cast(default_work_size[2])}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); #ifdef LITE_WITH_LOG VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " " diff --git a/lite/kernels/opencl/box_coder_image_compute.cc b/lite/kernels/opencl/box_coder_image_compute.cc index 84298b29d4f8ce99a0bacc2dbb5acf545a49617c..269d79a18f4b8c9d2c64308572fa5e481cde5bab 100644 --- a/lite/kernels/opencl/box_coder_image_compute.cc +++ b/lite/kernels/opencl/box_coder_image_compute.cc @@ -23,6 +23,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -121,13 +125,13 @@ class BoxCoderComputeImage : public KernelLite(default_work_size[0]), static_cast(default_work_size[2])}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); #ifdef LITE_WITH_LOG @@ -138,6 +142,14 @@ class BoxCoderComputeImage : public KernelLitekernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + param_t* boxcoder_param_{nullptr}; std::string kernel_func_name_{}; std::string build_options_{" -DCL_DTYPE_half"}; diff --git a/lite/kernels/opencl/concat_buffer_compute.cc b/lite/kernels/opencl/concat_buffer_compute.cc index 5b7c745f31160e8d561ea07546953827fae4cd96..c9d7fc1cb84f89fe476462dbada773df75fc2c2c 100644 --- a/lite/kernels/opencl/concat_buffer_compute.cc +++ b/lite/kernels/opencl/concat_buffer_compute.cc @@ -18,6 +18,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -124,13 +128,13 @@ class ConcatCompute : public KernelLiteGetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } else { auto start = 0; @@ -157,13 +161,13 @@ class ConcatCompute : public KernelLiteGetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); start += size; } @@ -172,6 +176,14 @@ class ConcatCompute : public KernelLitekernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + int axis_size_ = 1; int post_size_ = 1; int pre_size_ = 1; diff --git a/lite/kernels/opencl/concat_image_compute.cc b/lite/kernels/opencl/concat_image_compute.cc index 5787163dca42bcb6ccfa8fc872902581d853a627..25830b6a08b9ba96ebb64095a42f0ab53f264da4 100644 --- a/lite/kernels/opencl/concat_image_compute.cc +++ b/lite/kernels/opencl/concat_image_compute.cc @@ -19,6 +19,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -246,6 +250,14 @@ class ConcatComputeImage : public KernelLitekernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + int axis_size_ = 1; int axis_ = 1; int flag_ = 1; diff --git a/lite/kernels/opencl/conv_buffer_compute.h b/lite/kernels/opencl/conv_buffer_compute.h index 80131777c3cf676a78ad318a2f889be983ade0f4..f61bf9ac9cec9b378779d36b2c97fa98ed2232fa 100644 --- a/lite/kernels/opencl/conv_buffer_compute.h +++ b/lite/kernels/opencl/conv_buffer_compute.h @@ -23,6 +23,10 @@ #include "lite/core/tensor.h" #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -39,6 +43,14 @@ class ConvCompute void Run() override; +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_names_[0]; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: void GemmlikeConv2d(); void Conv2d1x1(); diff --git a/lite/kernels/opencl/depthwise_conv2d_buffer_compute.cc b/lite/kernels/opencl/depthwise_conv2d_buffer_compute.cc index ae03c2a1828a4993d136c30182d25607fea3230b..8b466be7586c1d9cb3a63da3fe47af772628b753 100644 --- a/lite/kernels/opencl/depthwise_conv2d_buffer_compute.cc +++ b/lite/kernels/opencl/depthwise_conv2d_buffer_compute.cc @@ -20,6 +20,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -119,6 +123,14 @@ class DepthwiseConv2dCompute CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"depthwise_conv2d"}; std::string build_options_{"-DCL_DTYPE_float"}; diff --git a/lite/kernels/opencl/dropout_image_compute.cc b/lite/kernels/opencl/dropout_image_compute.cc index ff9d18430bc06f0800086484698cce1405c56167..c3fdba3c1363141b5dec4a73fa86985120a1e48a 100644 --- a/lite/kernels/opencl/dropout_image_compute.cc +++ b/lite/kernels/opencl/dropout_image_compute.cc @@ -21,6 +21,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -89,16 +93,24 @@ class DropoutComputeImage2D : public KernelLite(default_work_size.data()[1]), static_cast(default_work_size.data()[2])}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"dropout"}; std::string build_options_{"-DCL_DTYPE_half"}; diff --git a/lite/kernels/opencl/elementwise_add_buffer_compute.h b/lite/kernels/opencl/elementwise_add_buffer_compute.h index c60b8512c3ede9e7c1919dc3f140faabe7204544..e451bf920054e41881f33a4fd9d2eeaee2096a3a 100644 --- a/lite/kernels/opencl/elementwise_add_buffer_compute.h +++ b/lite/kernels/opencl/elementwise_add_buffer_compute.h @@ -19,6 +19,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/cp_logging.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -38,6 +42,14 @@ class ElementwiseAddCompute return "ElementwiseAdd using cl::Buffer, kFloat"; } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + protected: void UpdateParams(); diff --git a/lite/kernels/opencl/elementwise_add_image_compute.h b/lite/kernels/opencl/elementwise_add_image_compute.h index 83972d3286b98c456444617ac018049112bc83e8..ba87ef4433fb34dd56043ac266cb272fa9e1739a 100644 --- a/lite/kernels/opencl/elementwise_add_image_compute.h +++ b/lite/kernels/opencl/elementwise_add_image_compute.h @@ -17,11 +17,14 @@ #include #include #include "lite/backends/opencl/cl_half.h" -#include "lite/backends/opencl/cl_utility.h" #include "lite/core/kernel.h" #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/cp_logging.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { diff --git a/lite/kernels/opencl/elementwise_mul_compute.cc b/lite/kernels/opencl/elementwise_mul_compute.cc index 25764f1dc2bc2965f9f0be74bf6b86e9f4266318..d0e8bc92d56dfd5926596ab328b353040e579e65 100644 --- a/lite/kernels/opencl/elementwise_mul_compute.cc +++ b/lite/kernels/opencl/elementwise_mul_compute.cc @@ -153,13 +153,13 @@ void ElementwiseMulFloatImageCompute::Run() { auto global_work_size = cl::NDRange{static_cast(x_img_width), static_cast(x_img_height)}; - auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + auto status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); std::string time_stamp_{GetTimeStamp()}; diff --git a/lite/kernels/opencl/elementwise_mul_image_compute.cc b/lite/kernels/opencl/elementwise_mul_image_compute.cc index dcedee86de08d6df46c9e71ec23eddebe4f32376..1da8554670883b00e9695099de81c1c9ec0f7b27 100644 --- a/lite/kernels/opencl/elementwise_mul_image_compute.cc +++ b/lite/kernels/opencl/elementwise_mul_image_compute.cc @@ -23,6 +23,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -186,13 +190,13 @@ class ElementwiseMulImageCompute cl::NDRange{static_cast(x_img_width), static_cast(x_img_height)}; - auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + auto status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); #ifdef LITE_WITH_LOG VLOG(4) << "global_work_size:[2D]:" << x_img_width << " " << x_img_height; diff --git a/lite/kernels/opencl/elementwise_sub_image_compute.cc b/lite/kernels/opencl/elementwise_sub_image_compute.cc index 8a29cde6a4bbc1fe56b42e4541936b3ce56df264..61d75f4d029a6123106d8434d02bf1a583a553ab 100644 --- a/lite/kernels/opencl/elementwise_sub_image_compute.cc +++ b/lite/kernels/opencl/elementwise_sub_image_compute.cc @@ -138,8 +138,13 @@ void ElementwiseSubImageCompute::Run() { VLOG(4) << "global_work_size:[2D]:" << x_img_width << " " << x_img_height; #endif - auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, cl::NullRange, global_work_size, cl::NullRange, nullptr, nullptr); + auto status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } diff --git a/lite/kernels/opencl/elementwise_sub_image_compute.h b/lite/kernels/opencl/elementwise_sub_image_compute.h index cc1ce505c63b58e92a587f2f45eb9f945ddffeb0..8af4cee73080f6f88761312c358c6586ca376b6e 100644 --- a/lite/kernels/opencl/elementwise_sub_image_compute.h +++ b/lite/kernels/opencl/elementwise_sub_image_compute.h @@ -20,6 +20,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/cp_logging.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -41,6 +45,14 @@ class ElementwiseSubImageCompute return "ElementwiseSub using cl::Image2D, kFP16"; } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + protected: param_t* ele_param_{nullptr}; std::string kernel_func_name_{"elementwise_sub"}; diff --git a/lite/kernels/opencl/fc_buffer_compute.cc b/lite/kernels/opencl/fc_buffer_compute.cc index 38ca4fb7968fb5d0820837077dd3236e588aa129..9763faf2f33f578e6f62b07a8c89390e1b80c159 100644 --- a/lite/kernels/opencl/fc_buffer_compute.cc +++ b/lite/kernels/opencl/fc_buffer_compute.cc @@ -20,6 +20,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -124,16 +128,24 @@ class FcCompute auto& context = ctx_->As(); CHECK(context.cl_context() != nullptr); - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size_, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size_, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: int m_, n_, k_; param_t* fc_param_{nullptr}; diff --git a/lite/kernels/opencl/grid_sampler_image_compute.cc b/lite/kernels/opencl/grid_sampler_image_compute.cc index e9151e18efb6ea24e965aaa81027259ac0beef90..ff0b2481bfecf2b9ca43f6a4ff9c8426892ae1b6 100644 --- a/lite/kernels/opencl/grid_sampler_image_compute.cc +++ b/lite/kernels/opencl/grid_sampler_image_compute.cc @@ -23,6 +23,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -131,16 +135,24 @@ class GridSamplerImageCompute : public KernelLiteAs(); CHECK(context.cl_context() != nullptr); - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size_, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size_, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + protected: param_t* grid_param_{nullptr}; bool first_epoch_for_reinit_{true}; diff --git a/lite/kernels/opencl/instance_norm_image_compute.cc b/lite/kernels/opencl/instance_norm_image_compute.cc index d0145889419bb7b8d467d645024d56fe8f872976..205575cf61c87ab5fd2dd2d5198248169296505f 100644 --- a/lite/kernels/opencl/instance_norm_image_compute.cc +++ b/lite/kernels/opencl/instance_norm_image_compute.cc @@ -23,6 +23,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -137,13 +141,13 @@ class InstanceNormImageCompute : public KernelLiteGetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - local_work_size, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + local_work_size, + nullptr, + event_); CL_CHECK_FATAL(status); } @@ -258,17 +262,25 @@ class InstanceNormImageCompute : public KernelLiteGetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - local_work_size, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + local_work_size, + nullptr, + event_); CL_CHECK_FATAL(status); } #endif +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + protected: param_t* instance_norm_param_{nullptr}; std::string kernel_func_name_{"instance_norm_onnx"}; diff --git a/lite/kernels/opencl/lrn_image_compute.cc b/lite/kernels/opencl/lrn_image_compute.cc index 8e70189b8842045b0e67a5d32b233e8746cf60a2..48f20fa79a3b9de313841787e877d6c046ba53f1 100644 --- a/lite/kernels/opencl/lrn_image_compute.cc +++ b/lite/kernels/opencl/lrn_image_compute.cc @@ -23,6 +23,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -128,13 +132,13 @@ class LrnImageCompute : public KernelLite(default_work_size[1]), static_cast(default_work_size[2])}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); #ifdef LITE_WITH_LOG VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " " @@ -142,6 +146,14 @@ class LrnImageCompute : public KernelLitekernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + protected: param_t* lrn_param_{nullptr}; int n_{5}; diff --git a/lite/kernels/opencl/mul_buffer_compute.cc b/lite/kernels/opencl/mul_buffer_compute.cc index 7877a7fde69d9e8a8e9a7c262736b5b8cd23d1c3..87249b007919d70c00544a6b093591e0cad5366f 100644 --- a/lite/kernels/opencl/mul_buffer_compute.cc +++ b/lite/kernels/opencl/mul_buffer_compute.cc @@ -20,6 +20,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -92,16 +96,24 @@ class MulCompute auto global_work_size = cl::NDRange{static_cast((m_ + 3) / 4), static_cast((n_ + 3) / 4)}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: int m_, n_, k_; std::string kernel_func_name_{"mat_mul"}; diff --git a/lite/kernels/opencl/nearest_interp_image_compute.cc b/lite/kernels/opencl/nearest_interp_image_compute.cc index eb0c84f25d72f5dfcc55a95026ba30617254a902..a3c202bbb458d0fb838cf97baa451fd4c9f0e10e 100644 --- a/lite/kernels/opencl/nearest_interp_image_compute.cc +++ b/lite/kernels/opencl/nearest_interp_image_compute.cc @@ -19,6 +19,10 @@ #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -110,16 +114,24 @@ class NearestInterpComputeImageDefault static_cast(default_work_size.data()[1]), static_cast(default_work_size.data()[2])}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"nearest_interp"}; std::string build_options_{" -DCL_DTYPE_half"}; diff --git a/lite/kernels/opencl/pad2d_image_compute.cc b/lite/kernels/opencl/pad2d_image_compute.cc index 49489ea3b40d99c00b89cdda6108b512a9f9b6b9..dc0590ee47ebd6753b788859dbaf6439ac0fbc77 100644 --- a/lite/kernels/opencl/pad2d_image_compute.cc +++ b/lite/kernels/opencl/pad2d_image_compute.cc @@ -23,6 +23,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -142,13 +146,13 @@ class Pad2dCompute : public KernelLite(default_work_size[1]), static_cast(default_work_size[2])}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); #ifdef LITE_WITH_LOG VLOG(4) << "global_work_size:[2D]:" << global_work_size[0] << " " @@ -156,6 +160,14 @@ class Pad2dCompute : public KernelLitekernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + protected: param_t* pad2d_param_{nullptr}; std::string kernel_func_name_{}; diff --git a/lite/kernels/opencl/pool_buffer_compute.cc b/lite/kernels/opencl/pool_buffer_compute.cc index 9be0775d99cbacd5cfefc1e8cd68afc7f2ac229c..5b81d8586ccf5bd6e9dc495b76caa7f5bd7ac088 100644 --- a/lite/kernels/opencl/pool_buffer_compute.cc +++ b/lite/kernels/opencl/pool_buffer_compute.cc @@ -20,6 +20,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -106,16 +110,24 @@ class PoolCompute CL_CHECK_FATAL(status); auto global_work_size = cl::NDRange(static_cast(numel)); - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"pool_"}; std::string build_options_{"-DCL_DTYPE_float"}; diff --git a/lite/kernels/opencl/pool_image_compute.cc b/lite/kernels/opencl/pool_image_compute.cc index db27bf2aedc4bbd43691a7c6e312a5d6cb72f2e4..7a89b33841ff6a181d3e59c747620f5711e5eacb 100644 --- a/lite/kernels/opencl/pool_image_compute.cc +++ b/lite/kernels/opencl/pool_image_compute.cc @@ -16,13 +16,16 @@ #include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_include.h" -#include "lite/backends/opencl/cl_utility.h" #include "lite/core/kernel.h" #include "lite/core/op_registry.h" #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" #undef LITE_WITH_LOG diff --git a/lite/kernels/opencl/reshape_image_compute.cc b/lite/kernels/opencl/reshape_image_compute.cc index bcaa46ba3d8c3b5f6679a58937ffd9720b9d1a73..0ee55d13f853ae9e68363a4fd8ef630f23f770f4 100644 --- a/lite/kernels/opencl/reshape_image_compute.cc +++ b/lite/kernels/opencl/reshape_image_compute.cc @@ -14,13 +14,16 @@ #include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_include.h" -#include "lite/backends/opencl/cl_utility.h" #include "lite/core/kernel.h" #include "lite/core/op_registry.h" #include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/logging.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" #undef LITE_WITH_LOG diff --git a/lite/kernels/opencl/scale_image_compute.cc b/lite/kernels/opencl/scale_image_compute.cc index bb1d6f8e66925d3024771d8230297f045c74ffab..169fd25a83f51e4a71c26fb5f597e51827f7e4d9 100644 --- a/lite/kernels/opencl/scale_image_compute.cc +++ b/lite/kernels/opencl/scale_image_compute.cc @@ -21,6 +21,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -93,16 +97,24 @@ class ScaleComputeImage2D : public KernelLiteGetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size_, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size_, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"scale"}; std::string build_options_{"-DCL_DTYPE_half"}; diff --git a/lite/kernels/opencl/slice_image_compute.cc b/lite/kernels/opencl/slice_image_compute.cc index 5bef5bfe09e62018b47bd081d9f264f49695bbca..e9ae7e4a122d8172c39f7197e368d1b5a265f67f 100644 --- a/lite/kernels/opencl/slice_image_compute.cc +++ b/lite/kernels/opencl/slice_image_compute.cc @@ -21,6 +21,10 @@ #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/profiler.h" +#endif +#include "lite/backends/opencl/cl_utility.h" namespace paddle { namespace lite { @@ -96,16 +100,24 @@ class SliceComputeImage2D : public KernelLite(default_work_size.data()[1]), static_cast(default_work_size.data()[2])}; - status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, - cl::NullRange, - global_work_size, - cl::NullRange, - nullptr, - nullptr); + status = EnqueueNDRangeKernel(context, + kernel, + cl::NullRange, + global_work_size, + cl::NullRange, + nullptr, + event_); CL_CHECK_FATAL(status); } +#ifdef LITE_WITH_PROFILE + void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { + ch->kernel_func_name = kernel_func_name_; + ch->cl_event = + event_; // `event_` defined in `kernel.h`, valid after kernel::Run + } +#endif + private: std::string kernel_func_name_{"slice"}; std::string build_options_{"-DCL_DTYPE_half"};