diff --git a/lite/backends/opencl/cl_context.cc b/lite/backends/opencl/cl_context.cc index 7fa12a5e5e697549c32a2f477119eee2fddfc700..153c0620035377afac065e8049a9ebbc0a6f0c15 100644 --- a/lite/backends/opencl/cl_context.cc +++ b/lite/backends/opencl/cl_context.cc @@ -36,8 +36,10 @@ cl::Program &CLContext::GetProgram(const std::string &file_name, STL::stringstream program_key_ss; program_key_ss << file_name << options; std::string program_key = program_key_ss.str(); - auto it = programs_.find(program_key); - if (it != programs_.end()) { + + auto &programs = CLRuntime::Global()->programs(); + auto it = programs.find(program_key); + if (it != programs.end()) { VLOG(3) << " --- program -> " << program_key << " has been built --- "; return *(it->second); } @@ -48,14 +50,15 @@ cl::Program &CLContext::GetProgram(const std::string &file_name, CLRuntime::Global()->BuildProgram(program.get(), options); VLOG(3) << " --- end build program -> " << program_key << " --- "; - programs_[program_key] = std::move(program); + programs[program_key] = std::move(program); - return *(programs_[program_key]); + return *(programs[program_key]); } void CLContext::AddKernel(const std::string &kernel_name, const std::string &file_name, - const std::string &options) { + const std::string &options, + const std::string &time_stamp) { cl_int status{CL_SUCCESS}; VLOG(3) << " --- to get program " << file_name << " --- "; auto program = GetProgram(file_name, options); @@ -65,25 +68,30 @@ void CLContext::AddKernel(const std::string &kernel_name, new cl::Kernel(program, kernel_name.c_str(), &status)); CL_CHECK_FATAL(status); VLOG(3) << " --- end create kernel --- "; - kernels_.emplace_back(std::move(kernel)); + + auto &kernels = CLRuntime::Global()->kernels(); + auto &kernel_offset_map = CLRuntime::Global()->kernel_offset(); + kernels.emplace_back(std::move(kernel)); STL::stringstream kernel_key; - kernel_key << kernel_name << options; - kernel_offset_[kernel_key.str()] = kernels_.size() - 1; + kernel_key << kernel_name << options << time_stamp; + kernel_offset_map[kernel_key.str()] = kernels.size() - 1; } cl::Kernel &CLContext::GetKernel(const int index) { - VLOG(3) << " --- kernel count: " << kernels_.size() << " --- "; - CHECK(static_cast(index) < kernels_.size()) + auto &kernels = CLRuntime::Global()->kernels(); + VLOG(3) << " --- kernel count: " << kernels.size() << " --- "; + CHECK(static_cast(index) < kernels.size()) << "The index must be less than the size of kernels."; - CHECK(kernels_[index] != nullptr) + CHECK(kernels[index] != nullptr) << "The target kernel pointer cannot be null."; - return *(kernels_[index]); + return *(kernels[index]); } cl::Kernel &CLContext::GetKernel(const std::string &name) { - auto it = kernel_offset_.find(name); - CHECK(it != kernel_offset_.end()) << "Cannot find the kernel function: " - << name; + auto &kernel_offset_map = CLRuntime::Global()->kernel_offset(); + auto it = kernel_offset_map.find(name); + CHECK(it != kernel_offset_map.end()) << "Cannot find the kernel function: " + << name; return GetKernel(it->second); } diff --git a/lite/backends/opencl/cl_context.h b/lite/backends/opencl/cl_context.h index 7ffe1bc87d86cf370074091e7adf16c8460d218a..b12473ccf5b4238f4ee95b7848a0842ee5b2ffe0 100644 --- a/lite/backends/opencl/cl_context.h +++ b/lite/backends/opencl/cl_context.h @@ -36,7 +36,8 @@ class CLContext { void AddKernel(const std::string &kernel_name, const std::string &file_name, - const std::string &options = ""); + const std::string &options = "", + const std::string &time_stamp = ""); cl::Kernel &GetKernel(const int index); @@ -45,16 +46,12 @@ class CLContext { cl::NDRange DefaultWorkSize(const CLImage &image); cl::NDRange LocalWorkSize(cl::NDRange global_work_size, size_t max_work_size); + cl::NDRange LocalWorkSizeTurn(cl::NDRange global_work_size, size_t max_work_size, int divitor = 2); // cl::NDRange LocalWorkSizeConv1x1(cl::NDRange global_work_size, // size_t max_work_size); - - private: - std::unordered_map> programs_; - std::vector> kernels_; - std::map kernel_offset_; }; } // namespace lite diff --git a/lite/backends/opencl/cl_runtime.cc b/lite/backends/opencl/cl_runtime.cc index 63c9954f9181e9252c4d14f57b6ed29107965fe3..8405fc967239e851705feb96f517b3980192ebef 100644 --- a/lite/backends/opencl/cl_runtime.cc +++ b/lite/backends/opencl/cl_runtime.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "lite/backends/opencl/cl_runtime.h" #include +#include #include #include #include "lite/utils/cp_logging.h" @@ -29,10 +30,26 @@ CLRuntime* CLRuntime::Global() { CLRuntime::~CLRuntime() { if (command_queue_ != nullptr) { + command_queue_->flush(); command_queue_->finish(); } - // For controlling the destruction order: + + for (size_t kidx = 0; kidx < kernels_.size(); ++kidx) { + clReleaseKernel(kernels_[kidx]->get()); + kernels_[kidx].reset(); + } + kernels_.clear(); + kernel_offset_.clear(); + + for (auto& p : programs_) { + clReleaseProgram(p.second->get()); + } + programs_.clear(); + + // For controlling the destruction order + command_queue_&& clReleaseCommandQueue(command_queue_->get()); command_queue_.reset(); + context_&& clReleaseContext(context_->get()); context_.reset(); device_.reset(); platform_.reset(); @@ -73,14 +90,14 @@ cl::CommandQueue& CLRuntime::command_queue() { return *command_queue_; } -std::unique_ptr CLRuntime::CreateProgram( +std::shared_ptr CLRuntime::CreateProgram( const cl::Context& context, std::string file_name) { auto cl_file = opencl_kernels_files.find(file_name); std::string content(cl_file->second.begin(), cl_file->second.end()); cl::Program::Sources sources; sources.push_back(content); auto prog = - std::unique_ptr(new cl::Program(context, sources, &status_)); + std::shared_ptr(new cl::Program(context, sources, &status_)); VLOG(4) << "OpenCL kernel file name: " << file_name; VLOG(4) << "Program source size: " << content.size(); CL_CHECK_FATAL(status_); diff --git a/lite/backends/opencl/cl_runtime.h b/lite/backends/opencl/cl_runtime.h index 1a5ededeff37d9f6820af6a49dc22c669620734b..36e5d64b906ff5c91b2b5cb5e97855d7dff511c4 100644 --- a/lite/backends/opencl/cl_runtime.h +++ b/lite/backends/opencl/cl_runtime.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include +#include #include #include "lite/backends/opencl/cl_include.h" #include "lite/backends/opencl/cl_utility.h" @@ -42,7 +43,7 @@ class CLRuntime { cl::CommandQueue& command_queue(); - std::unique_ptr CreateProgram(const cl::Context& context, + std::shared_ptr CreateProgram(const cl::Context& context, std::string file_name); std::unique_ptr CreateEvent(const cl::Context& context); @@ -57,6 +58,12 @@ class CLRuntime { std::map& GetDeviceInfo(); + std::unordered_map>& programs() { + return programs_; + } + std::vector>& kernels() { return kernels_; } + std::map& kernel_offset() { return kernel_offset_; } + private: CLRuntime() = default; @@ -98,6 +105,12 @@ class CLRuntime { std::shared_ptr command_queue_{nullptr}; + std::unordered_map> programs_{}; + + std::vector> kernels_{}; + + std::map kernel_offset_{}; + cl_int status_{CL_SUCCESS}; bool initialized_{false}; diff --git a/lite/kernels/opencl/activation_buffer_compute.cc b/lite/kernels/opencl/activation_buffer_compute.cc index c662aa89fb257aded70119ea14494111398f0529..03ccdac99e5f11e1c056374463f7a8068dbd4f56 100644 --- a/lite/kernels/opencl/activation_buffer_compute.cc +++ b/lite/kernels/opencl/activation_buffer_compute.cc @@ -32,8 +32,10 @@ class ReluCompute std::string doc() const override { return "Relu using cl::Buffer, kFloat"; } void PrepareForRun() override { auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "buffer/relu_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "buffer/relu_kernel.cl", + build_options_, + time_stamp_); } void Run() override { @@ -46,7 +48,7 @@ class ReluCompute auto* x_buf = param.X->data(); auto* out_buf = param.Out->mutable_data(TARGET(kOpenCL)); STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; auto kernel = context.cl_context()->GetKernel(kernel_key.str()); VLOG(4) << TargetToStr(param.X->target()); VLOG(4) << TargetToStr(param.Out->target()); @@ -74,6 +76,7 @@ class ReluCompute private: std::string kernel_func_name_{"relu"}; std::string build_options_{"-DCL_DTYPE_float -DRELU"}; + std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; }; @@ -87,8 +90,10 @@ class SigmoidCompute } void PrepareForRun() override { auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "buffer/sigmoid_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "buffer/sigmoid_kernel.cl", + build_options_, + time_stamp_); } void Run() override { @@ -101,7 +106,7 @@ class SigmoidCompute auto* x_buf = param.X->data(); auto* out_buf = param.Out->mutable_data(TARGET(kOpenCL)); STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp; auto kernel = context.cl_context()->GetKernel(kernel_key.str()); VLOG(4) << TargetToStr(param.X->target()); VLOG(4) << TargetToStr(param.Out->target()); @@ -129,6 +134,7 @@ class SigmoidCompute private: std::string kernel_func_name_{"sigmoid"}; std::string build_options_{"-DCL_DTYPE_float -DSIGMOID"}; + std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/activation_image_compute.cc b/lite/kernels/opencl/activation_image_compute.cc index 0e0613237ba4d53bfab15ac267d12efc8451f88d..a99e588eccd79eb35a5e7c0f3da73471849ab581 100644 --- a/lite/kernels/opencl/activation_image_compute.cc +++ b/lite/kernels/opencl/activation_image_compute.cc @@ -77,11 +77,13 @@ class ActivationComputeImageDefault #endif auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "image/activation_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/activation_kernel.cl", + build_options_, + time_stamp_); STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; kernel_ = context.cl_context()->GetKernel(kernel_key.str()); } @@ -171,6 +173,7 @@ class ActivationComputeImageDefault cl::NDRange global_work_size_ = cl::NDRange{ static_cast(1), static_cast(1), static_cast(1)}; std::string build_options_{"-DCL_DTYPE_half"}; + std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; }; } // namespace opencl diff --git a/lite/kernels/opencl/bilinear_interp_image_compute.cc b/lite/kernels/opencl/bilinear_interp_image_compute.cc index 7e32010c0b5ff5cedad8b0da7ce7233fbf73da6f..53f260789e12a94dc39f785df12a8e988d08bcbe 100644 --- a/lite/kernels/opencl/bilinear_interp_image_compute.cc +++ b/lite/kernels/opencl/bilinear_interp_image_compute.cc @@ -43,8 +43,10 @@ class BilinearInterpImageCompute bilinear_interp_param_ = param_.get_mutable(); auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "image/bilinear_interp_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/bilinear_interp_kernel.cl", + build_options_, + time_stamp_); VLOG(1) << "kernel_func_name_:" << kernel_func_name_; } @@ -103,7 +105,7 @@ class BilinearInterpImageCompute #endif STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; auto kernel = context.cl_context()->GetKernel(kernel_key.str()); int arg_idx = 0; @@ -159,6 +161,7 @@ class BilinearInterpImageCompute param_t* bilinear_interp_param_{nullptr}; std::string kernel_func_name_{"bilinear_interp"}; std::string build_options_{"-DCL_DTYPE_half"}; + std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/concat_buffer_compute.cc b/lite/kernels/opencl/concat_buffer_compute.cc index 010e7726170ab1f40adc2fcb56a66835ac7d2bd2..414f62ff0c4f86f29756b933817de2a7682ecd4c 100644 --- a/lite/kernels/opencl/concat_buffer_compute.cc +++ b/lite/kernels/opencl/concat_buffer_compute.cc @@ -38,8 +38,10 @@ class ConcatCompute : public KernelLiteAddKernel( - kernel_func_name_, "buffer/concat_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "buffer/concat_kernel.cl", + build_options_, + time_stamp_); auto axis = concat_param_->axis; auto inputs = concat_param_->x; @@ -88,7 +90,7 @@ class ConcatCompute : public KernelLiteAs(); CHECK(context.cl_context() != nullptr); STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; auto inputs = param.x; int arg_idx = 0; @@ -177,6 +179,7 @@ class ConcatCompute : public KernelLite event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/concat_image_compute.cc b/lite/kernels/opencl/concat_image_compute.cc index 95e64025662a4b87cd68c211ccc0b0fb7b84a9f2..60d1ac628ab1474d7e82f1861067bca838548569 100644 --- a/lite/kernels/opencl/concat_image_compute.cc +++ b/lite/kernels/opencl/concat_image_compute.cc @@ -40,8 +40,10 @@ class ConcatComputeImage : public KernelLiteAddKernel( - kernel_func_name_, "image/concat_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/concat_kernel.cl", + build_options_, + time_stamp_); auto axis = concat_param_->axis; auto inputs = concat_param_->x; @@ -117,7 +119,7 @@ class ConcatComputeImage : public KernelLiteAs(); CHECK(context.cl_context() != nullptr); STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; auto inputs = param.x; int arg_idx = 0; @@ -251,6 +253,7 @@ class ConcatComputeImage : public KernelLite event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/conv_buffer_compute.cc b/lite/kernels/opencl/conv_buffer_compute.cc index 65477e89c7d00408bf4d639138dea936a61a3d70..4c118e1263c0d3c23eb223b01b98a8d9a53bac0e 100644 --- a/lite/kernels/opencl/conv_buffer_compute.cc +++ b/lite/kernels/opencl/conv_buffer_compute.cc @@ -114,8 +114,10 @@ void ConvCompute::PrepareForRun() { } for (size_t i = 0; i < kernel_func_names_.size(); i++) { - context.cl_context()->AddKernel( - kernel_func_names_[i], kernel_func_paths_[i], build_options_[i]); + context.cl_context()->AddKernel(kernel_func_names_[i], + kernel_func_paths_[i], + build_options_[i], + time_stamp_); } } @@ -153,7 +155,7 @@ void ConvCompute::GemmlikeConv2d() { auto& context = ctx_->As(); std::stringstream kernel_key; - kernel_key << kernel_func_names_[0] << build_options_[0]; + kernel_key << kernel_func_names_[0] << build_options_[0] << time_stamp_; auto img2col_kernel = context.cl_context()->GetKernel(kernel_key.str()); int n_threads = c_in * h_out * w_out; @@ -218,7 +220,7 @@ void ConvCompute::GemmlikeConv2d() { int n = h_out * w_out; VLOG(4) << "m = " << m << " n = " << n << " k = " << k; kernel_key.str(""); - kernel_key << kernel_func_names_[1] << build_options_[1]; + kernel_key << kernel_func_names_[1] << build_options_[1] << time_stamp_; auto gemm_kernel = context.cl_context()->GetKernel(kernel_key.str()); GemmBatched( gemm_kernel, col_buf, filter_buf, bias_buf, output_buf, bs, m, n, k); @@ -249,7 +251,8 @@ void ConvCompute::Conv2d1x1() { auto& context = ctx_->As(); std::stringstream kernel_key; - kernel_key << kernel_func_names_.front() << build_options_.front(); + kernel_key << kernel_func_names_.front() << build_options_.front() + << time_stamp_; auto kernel = context.cl_context()->GetKernel(kernel_key.str()); GemmBatched(kernel, x_d, filter_d, bias_d, output_d, batch_size, m, n, k); diff --git a/lite/kernels/opencl/conv_buffer_compute.h b/lite/kernels/opencl/conv_buffer_compute.h index 44ada55d92352edf3c64cd653e832b26718cdd2f..3dabe906f128ef96fb03dfa82ab3847febaeeed5 100644 --- a/lite/kernels/opencl/conv_buffer_compute.h +++ b/lite/kernels/opencl/conv_buffer_compute.h @@ -21,6 +21,7 @@ #include "lite/backends/opencl/cl_include.h" #include "lite/core/kernel.h" #include "lite/core/tensor.h" +#include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" namespace paddle { @@ -55,6 +56,7 @@ class ConvCompute std::vector kernel_func_names_{}; std::vector kernel_func_paths_{}; std::vector build_options_{}; + std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/conv_image_compute.cc b/lite/kernels/opencl/conv_image_compute.cc index 40ffd697c2ac66223430441bae9800d06a743387..aadd7010cca2ec03ea417e3b486d8c946d80fcab 100644 --- a/lite/kernels/opencl/conv_image_compute.cc +++ b/lite/kernels/opencl/conv_image_compute.cc @@ -369,15 +369,17 @@ void ConvImageCompute::PrepareForRun() { build_options_.push_back(build_options_single); for (size_t i = 0; i < kernel_func_names_.size(); i++) { - context.cl_context()->AddKernel( - kernel_func_names_[i], kernel_func_paths_[i], build_options_[i]); + context.cl_context()->AddKernel(kernel_func_names_[i], + kernel_func_paths_[i], + build_options_[i], + time_stamp_); } VLOG(4) << "global_work_size_[3D]: {" << global_work_size_[0] << "," << global_work_size_[1] << "," << global_work_size_[2] << "}"; std::stringstream kernel_key; - kernel_key << kernel_func_names_[0] << build_options_[0]; + kernel_key << kernel_func_names_[0] << build_options_[0] << time_stamp_; kernel_ = context.cl_context()->GetKernel(kernel_key.str()); VLOG(4) << "kernel_key: " << kernel_key.str(); VLOG(4) << "kernel ready ... " << kernel_key.str(); diff --git a/lite/kernels/opencl/conv_image_compute.h b/lite/kernels/opencl/conv_image_compute.h index 31a14a0c5b94f357e78df8eb35f6823ec6f57998..6f293a0d7dd90e55bedd63c214ba38799a591080 100644 --- a/lite/kernels/opencl/conv_image_compute.h +++ b/lite/kernels/opencl/conv_image_compute.h @@ -22,6 +22,7 @@ #include "lite/backends/opencl/cl_include.h" #include "lite/core/kernel.h" #include "lite/core/tensor.h" +#include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" namespace paddle { @@ -56,6 +57,7 @@ class ConvImageCompute : public KernelLite kernel_func_names_{}; std::vector kernel_func_paths_{}; std::vector build_options_{}; + std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; Tensor filter_gpu_image_; Tensor bias_gpu_image_; diff --git a/lite/kernels/opencl/depthwise_conv2d_buffer_compute.cc b/lite/kernels/opencl/depthwise_conv2d_buffer_compute.cc index 0c88509926041411eddac66bea08b5d3a08d6a3c..afe2aa1c66c04d2bdf180a77362e5d6f1271c1f6 100644 --- a/lite/kernels/opencl/depthwise_conv2d_buffer_compute.cc +++ b/lite/kernels/opencl/depthwise_conv2d_buffer_compute.cc @@ -44,8 +44,10 @@ class DepthwiseConv2dCompute build_options_ += " -DRELU6"; } auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "buffer/depthwise_conv2d_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "buffer/depthwise_conv2d_kernel.cl", + build_options_, + time_stamp_); } void Run() override { @@ -67,7 +69,7 @@ class DepthwiseConv2dCompute param.output->mutable_data(TARGET(kOpenCL)); STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; auto kernel = context.cl_context()->GetKernel(kernel_key.str()); cl_int status; @@ -120,6 +122,7 @@ class DepthwiseConv2dCompute private: std::string kernel_func_name_{"depthwise_conv2d"}; std::string build_options_{"-DCL_DTYPE_float"}; + std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/dropout_image_compute.cc b/lite/kernels/opencl/dropout_image_compute.cc index 490e34a8868a3f625591a1c621aa297bb0639576..2be5af2ef0bf3e30d1c586d57ed6c3d40d625b14 100644 --- a/lite/kernels/opencl/dropout_image_compute.cc +++ b/lite/kernels/opencl/dropout_image_compute.cc @@ -40,8 +40,10 @@ class DropoutComputeImage2D : public KernelLiteAs(); VLOG(1) << "kernel_func_name_:" << kernel_func_name_; - context.cl_context()->AddKernel( - kernel_func_name_, "image/dropout_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/dropout_kernel.cl", + build_options_, + time_stamp_); } void Run() override { @@ -63,7 +65,7 @@ class DropoutComputeImage2D : public KernelLiteAs(); CHECK(context.cl_context() != nullptr); STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; auto kernel = context.cl_context()->GetKernel(kernel_key.str()); cl_int status; @@ -101,6 +103,7 @@ class DropoutComputeImage2D : public KernelLite event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/elementwise_add_buffer_compute.cc b/lite/kernels/opencl/elementwise_add_buffer_compute.cc index 3961ac7583917fdcd761614558c493e6917d3294..b70f7d1ee017566e399ac86d35df56bd4ba4d383 100644 --- a/lite/kernels/opencl/elementwise_add_buffer_compute.cc +++ b/lite/kernels/opencl/elementwise_add_buffer_compute.cc @@ -25,8 +25,10 @@ namespace opencl { void ElementwiseAddCompute::PrepareForRun() { auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "buffer/elementwise_add_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "buffer/elementwise_add_kernel.cl", + build_options_, + time_stamp_); ele_param_ = param_.get_mutable(); UpdateParams(); } @@ -39,7 +41,7 @@ void ElementwiseAddCompute::Run() { auto* out_buf = ele_param_->Out->template mutable_data( TARGET(kOpenCL)); STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; auto kernel = context.cl_context()->GetKernel(kernel_key.str()); #ifndef LITE_SHUTDOWN_LOG VLOG(4) << TargetToStr(ele_param_->X->target()); diff --git a/lite/kernels/opencl/elementwise_add_buffer_compute.h b/lite/kernels/opencl/elementwise_add_buffer_compute.h index 5a9266ee69b81416d5f4dea9a3eb38aaed7b4165..7dbe5d0e8d5172386418d547812bf4e6c269f043 100644 --- a/lite/kernels/opencl/elementwise_add_buffer_compute.h +++ b/lite/kernels/opencl/elementwise_add_buffer_compute.h @@ -16,6 +16,7 @@ #include #include #include "lite/core/kernel.h" +#include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/cp_logging.h" @@ -46,6 +47,7 @@ class ElementwiseAddCompute param_t* ele_param_{nullptr}; std::string kernel_func_name_{"elementwise_add"}; std::string build_options_{"-DCL_DTYPE_float"}; + std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/elementwise_add_image_compute.cc b/lite/kernels/opencl/elementwise_add_image_compute.cc index b13326056682cbe81077e248dd477a8f698ca602..51d488d51b72dd9af8225b45a7ee56063312d055 100644 --- a/lite/kernels/opencl/elementwise_add_image_compute.cc +++ b/lite/kernels/opencl/elementwise_add_image_compute.cc @@ -59,11 +59,13 @@ void ElementwiseAddImageCompute::ReInitWhenNeeded() { VLOG(1) << "kernel_func_name_:" << kernel_func_name_; auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "image/elementwise_add_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/elementwise_add_kernel.cl", + build_options_, + time_stamp_); STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; kernel_ = context.cl_context()->GetKernel(kernel_key.str()); // compute image shape diff --git a/lite/kernels/opencl/elementwise_add_image_compute.h b/lite/kernels/opencl/elementwise_add_image_compute.h index 7e38f300430b6faf199976088ad0cef69f94b789..a92a1b448176628381a3c65b838f6bba529eb4e0 100644 --- a/lite/kernels/opencl/elementwise_add_image_compute.h +++ b/lite/kernels/opencl/elementwise_add_image_compute.h @@ -18,6 +18,7 @@ #include #include "lite/backends/opencl/cl_half.h" #include "lite/core/kernel.h" +#include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/cp_logging.h" @@ -54,8 +55,10 @@ class ElementwiseAddImageCompute {static_cast(1), static_cast(1)})); DDim out_img_shape_ = DDim(std::vector( {static_cast(1), static_cast(1)})); + std::string kernel_func_name_{"elementwise_add"}; std::string build_options_{"-DCL_DTYPE_half"}; + std::string time_stamp_{GetTimeStamp()}; bool first_epoch_for_reinit_{true}; cl::Kernel kernel_; cl::NDRange global_work_size_ = cl::NDRange{ diff --git a/lite/kernels/opencl/elementwise_mul_image_compute.cc b/lite/kernels/opencl/elementwise_mul_image_compute.cc index aa6af2a29bfdedfb5fdd3114693514b6fad13a64..96dc2de1affba7c36be6c9c0e952b85be726fca8 100644 --- a/lite/kernels/opencl/elementwise_mul_image_compute.cc +++ b/lite/kernels/opencl/elementwise_mul_image_compute.cc @@ -71,8 +71,10 @@ class ElementwiseMulImageCompute VLOG(4) << "bias_dims.size():" << bias_dims.size(); auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "image/elementwise_mul_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/elementwise_mul_kernel.cl", + build_options_, + time_stamp_); } void Run() override { @@ -114,7 +116,7 @@ class ElementwiseMulImageCompute #endif STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto bias_dims = y->dims(); @@ -201,6 +203,7 @@ class ElementwiseMulImageCompute param_t* ele_param_{nullptr}; std::string kernel_func_name_{"elementwise_mul"}; std::string build_options_{"-DCL_DTYPE_half"}; + std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/elementwise_sub_image_compute.cc b/lite/kernels/opencl/elementwise_sub_image_compute.cc index 0bc867d7f124582660b7a0a9a95d026d910fc2d3..b93167b99c064a2f9eb2256291adad99f3912baf 100644 --- a/lite/kernels/opencl/elementwise_sub_image_compute.cc +++ b/lite/kernels/opencl/elementwise_sub_image_compute.cc @@ -49,8 +49,10 @@ void ElementwiseSubImageCompute::PrepareForRun() { VLOG(1) << "kernel_func_name_:" << kernel_func_name_; auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "image/elementwise_sub_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/elementwise_sub_kernel.cl", + build_options_, + time_stamp_); } void ElementwiseSubImageCompute::Run() { @@ -93,7 +95,7 @@ void ElementwiseSubImageCompute::Run() { #endif STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; auto kernel = context.cl_context()->GetKernel(kernel_key.str()); int arg_idx = 0; diff --git a/lite/kernels/opencl/elementwise_sub_image_compute.h b/lite/kernels/opencl/elementwise_sub_image_compute.h index 48386b083e5375f8943c04afb1da70a2ed207dbf..db3e1db9813bffd985a41abbac14e5c89e574397 100644 --- a/lite/kernels/opencl/elementwise_sub_image_compute.h +++ b/lite/kernels/opencl/elementwise_sub_image_compute.h @@ -17,6 +17,7 @@ #include #include "lite/backends/opencl/cl_half.h" #include "lite/core/kernel.h" +#include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/cp_logging.h" @@ -44,6 +45,7 @@ class ElementwiseSubImageCompute param_t* ele_param_{nullptr}; std::string kernel_func_name_{"elementwise_sub"}; std::string build_options_{"-DCL_DTYPE_half"}; + std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/fc_buffer_compute.cc b/lite/kernels/opencl/fc_buffer_compute.cc index d486e97bb0ca799dcaf671ad55d0cb76c6fac389..0fb83db2fe76e27baf7a096395369cb92b995072 100644 --- a/lite/kernels/opencl/fc_buffer_compute.cc +++ b/lite/kernels/opencl/fc_buffer_compute.cc @@ -16,6 +16,7 @@ #include "lite/backends/opencl/cl_include.h" #include "lite/core/kernel.h" #include "lite/core/op_registry.h" +#include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" @@ -74,10 +75,12 @@ class FcCompute } auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "buffer/fc_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "buffer/fc_kernel.cl", + build_options_, + time_stamp_); STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; kernel_ = context.cl_context()->GetKernel(kernel_key.str()); // compute global work size @@ -136,6 +139,7 @@ class FcCompute param_t* fc_param_{nullptr}; std::string kernel_func_name_{}; std::string build_options_{"-DCL_DTYPE_float "}; + std::string time_stamp_{GetTimeStamp()}; bool first_epoch_for_reinit_{true}; DDim last_x_dims_; cl::NDRange global_work_size_; diff --git a/lite/kernels/opencl/fusion_elementwise_add_activation_buffer_compute.cc b/lite/kernels/opencl/fusion_elementwise_add_activation_buffer_compute.cc index d76e00fa85d4ebb6da9d779e9c2b220a2fd731d9..730b70525e818512aea11e1f42c1282b125aae54 100644 --- a/lite/kernels/opencl/fusion_elementwise_add_activation_buffer_compute.cc +++ b/lite/kernels/opencl/fusion_elementwise_add_activation_buffer_compute.cc @@ -28,8 +28,10 @@ class FusionElementwiseAddActivationCompute : public ElementwiseAddCompute { void PrepareForRun() override { build_options_ += " -DRELU"; auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "buffer/elementwise_add_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "buffer/elementwise_add_kernel.cl", + build_options_, + time_stamp_); ele_param_ = param_.get_mutable(); UpdateParams(); auto act_t = static_cast(ele_param_)->act_type; diff --git a/lite/kernels/opencl/fusion_elementwise_add_activation_image_compute.cc b/lite/kernels/opencl/fusion_elementwise_add_activation_image_compute.cc index e5c0e29bddf5cd6c25ccf98f05aa7cb091a4be7e..8e687340943dcb0f1b68e4c9495cbab1ad703645 100644 --- a/lite/kernels/opencl/fusion_elementwise_add_activation_image_compute.cc +++ b/lite/kernels/opencl/fusion_elementwise_add_activation_image_compute.cc @@ -16,6 +16,7 @@ #include "lite/backends/opencl/cl_include.h" #include "lite/core/op_registry.h" #include "lite/kernels/opencl/elementwise_add_image_compute.h" +#include "lite/kernels/opencl/image_helper.h" namespace paddle { namespace lite { @@ -30,8 +31,10 @@ class FusionElementwiseAddActivationImageCompute void PrepareForRun() override { build_options_ += " -DRELU"; auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "image/elementwise_add_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/elementwise_add_kernel.cl", + build_options_, + time_stamp_); ele_param_ = param_.get_mutable(); auto act_t = static_cast(ele_param_)->act_type; VLOG(4) << "act: " << act_t; diff --git a/lite/kernels/opencl/grid_sampler_image_compute.cc b/lite/kernels/opencl/grid_sampler_image_compute.cc index 8edfff8a5952680ae559e0bf78b798c0abc365f1..4fb13a61181ba282f7005ea158768ee18b94b7a0 100644 --- a/lite/kernels/opencl/grid_sampler_image_compute.cc +++ b/lite/kernels/opencl/grid_sampler_image_compute.cc @@ -40,12 +40,14 @@ class GridSamplerImageCompute : public KernelLiteAs(); - context.cl_context()->AddKernel( - kernel_func_name_, "image/grid_sampler_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/grid_sampler_kernel.cl", + build_options_, + time_stamp_); VLOG(1) << "kernel_func_name_:" << kernel_func_name_; STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; kernel_ = context.cl_context()->GetKernel(kernel_key.str()); VLOG(4) << "kernel_key: " << kernel_key.str(); } @@ -150,6 +152,7 @@ class GridSamplerImageCompute : public KernelLite(1), static_cast(1), static_cast(1)}; std::string build_options_{"-DCL_DTYPE_half"}; + std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/image_helper.h b/lite/kernels/opencl/image_helper.h index d0d282250d1c5658bc8f684b52b4b0d140895833..81d38bc683eb355b1d85a307d35839b4e3e8ef45 100644 --- a/lite/kernels/opencl/image_helper.h +++ b/lite/kernels/opencl/image_helper.h @@ -74,6 +74,12 @@ static std::vector DefaultWorkSize(const DDim& image_dim, LOG(FATAL) << " not support this dim, need imp "; } +static const std::string GetTimeStamp() { + struct timeval time; + gettimeofday(&time, NULL); + return std::to_string(time.tv_usec); +} + } // namespace opencl } // namespace kernels } // namespace lite diff --git a/lite/kernels/opencl/instance_norm_image_compute.cc b/lite/kernels/opencl/instance_norm_image_compute.cc index 6bdec0ca6cdfd16219becf704de4d5701aad3197..c5e02ae0ed4ae9facf36747d99ee825e6eab6515 100644 --- a/lite/kernels/opencl/instance_norm_image_compute.cc +++ b/lite/kernels/opencl/instance_norm_image_compute.cc @@ -60,8 +60,10 @@ class InstanceNormImageCompute : public KernelLiteAs(); - context.cl_context()->AddKernel( - kernel_func_name_, "image/instance_norm_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/instance_norm_kernel.cl", + build_options_, + time_stamp_); VLOG(1) << "kernel_func_name_:" << kernel_func_name_; } @@ -115,7 +117,7 @@ class InstanceNormImageCompute : public KernelLiteGetKernel(kernel_key.str()); cl_int status = kernel.setArg(0, out_w); @@ -180,8 +182,10 @@ class InstanceNormImageCompute : public KernelLite( scale_img_size[0], scale_img_size[1], bias_img.data()); auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "image/instance_norm_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/instance_norm_kernel.cl", + build_options_, + time_stamp_); VLOG(1) << "kernel_func_name_:" << kernel_func_name_; } @@ -234,7 +238,7 @@ class InstanceNormImageCompute : public KernelLiteGetKernel(kernel_key.str()); auto* scale_img = scale_image_.data(); auto* bias_img = bias_image_.data(); @@ -271,6 +275,7 @@ class InstanceNormImageCompute : public KernelLite event_{new cl::Event}; Tensor scale_image_; Tensor bias_image_; diff --git a/lite/kernels/opencl/lrn_image_compute.cc b/lite/kernels/opencl/lrn_image_compute.cc index edce0368ddc9cda54fdab44b472fcd0e771413ae..0e01bdc107c4fcb4a0caf943cfb1b768557dd671 100644 --- a/lite/kernels/opencl/lrn_image_compute.cc +++ b/lite/kernels/opencl/lrn_image_compute.cc @@ -48,7 +48,7 @@ class LrnImageCompute : public KernelLitebeta; norm_region_ = lrn_param_->norm_region; context.cl_context()->AddKernel( - kernel_func_name_, "image/lrn_kernel.cl", build_options_); + kernel_func_name_, "image/lrn_kernel.cl", build_options_, time_stamp_); VLOG(1) << "kernel_func_name_:" << kernel_func_name_; } @@ -91,7 +91,7 @@ class LrnImageCompute : public KernelLiteGetKernel(kernel_key.str()); int arg_idx = 0; @@ -152,6 +152,7 @@ class LrnImageCompute : public KernelLite event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/mul_buffer_compute.cc b/lite/kernels/opencl/mul_buffer_compute.cc index 4c46da67da9877fb37b214b6d738b3dd3da3e5bb..e8edb359898fb47cf47919a25e521ca9f8353104 100644 --- a/lite/kernels/opencl/mul_buffer_compute.cc +++ b/lite/kernels/opencl/mul_buffer_compute.cc @@ -16,6 +16,7 @@ #include "lite/backends/opencl/cl_include.h" #include "lite/core/kernel.h" #include "lite/core/op_registry.h" +#include "lite/kernels/opencl/image_helper.h" #include "lite/operators/op_params.h" #include "lite/utils/replace_stl/stream.h" #include "lite/utils/string.h" @@ -32,8 +33,10 @@ class MulCompute void PrepareForRun() override { auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "buffer/mat_mul_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "buffer/mat_mul_kernel.cl", + build_options_, + time_stamp_); const auto& param = *param_.get_mutable(); const auto* x_data = param.x->data(); const auto* y_data = param.y->data(); @@ -68,7 +71,7 @@ class MulCompute param.output->mutable_data(TARGET(kOpenCL)); STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; auto kernel = context.cl_context()->GetKernel(kernel_key.str()); cl_int status; @@ -103,6 +106,7 @@ class MulCompute int m_, n_, k_; std::string kernel_func_name_{"mat_mul"}; std::string build_options_{"-DCL_DTYPE_float"}; + std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/nearest_interp_image_compute.cc b/lite/kernels/opencl/nearest_interp_image_compute.cc index 082f21ab1ae792ae33e9e2a368073274258b8884..17637e2569556d1eeb8b6002c0073223345ac7ec 100644 --- a/lite/kernels/opencl/nearest_interp_image_compute.cc +++ b/lite/kernels/opencl/nearest_interp_image_compute.cc @@ -38,8 +38,10 @@ class NearestInterpComputeImageDefault void PrepareForRun() override { auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "image/nearest_interp_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/nearest_interp_kernel.cl", + build_options_, + time_stamp_); VLOG(1) << "kernel_func_name_:" << kernel_func_name_; } @@ -66,7 +68,7 @@ class NearestInterpComputeImageDefault auto& context = ctx_->As(); CHECK(context.cl_context() != nullptr); STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; auto kernel = context.cl_context()->GetKernel(kernel_key.str()); int arg_idx = 0; @@ -121,6 +123,7 @@ class NearestInterpComputeImageDefault private: std::string kernel_func_name_{"nearest_interp"}; std::string build_options_{" -DCL_DTYPE_half"}; + std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/pad2d_image_compute.cc b/lite/kernels/opencl/pad2d_image_compute.cc index 1be4729ee1b24ac77383de4d7c111e9d37d29d6b..f16642d449d29c2afd3db7097432945c73d107e3 100644 --- a/lite/kernels/opencl/pad2d_image_compute.cc +++ b/lite/kernels/opencl/pad2d_image_compute.cc @@ -52,8 +52,10 @@ class Pad2dCompute : public KernelLiteAs(); - context.cl_context()->AddKernel( - kernel_func_name_, "image/pad2d_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/pad2d_kernel.cl", + build_options_, + time_stamp_); VLOG(1) << "kernel_func_name_:" << kernel_func_name_; } @@ -93,7 +95,7 @@ class Pad2dCompute : public KernelLiteGetKernel(kernel_key.str()); int arg_idx = 0; @@ -159,6 +161,7 @@ class Pad2dCompute : public KernelLite event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/pool_buffer_compute.cc b/lite/kernels/opencl/pool_buffer_compute.cc index 3f491afb86d4e4d5144522b6fb028c225c9a97e4..aeba4bcd2ea1d9b1f14ac86509ab9dbec2509ad0 100644 --- a/lite/kernels/opencl/pool_buffer_compute.cc +++ b/lite/kernels/opencl/pool_buffer_compute.cc @@ -37,8 +37,10 @@ class PoolCompute const auto& param = *param_.get_mutable(); kernel_func_name_ += param.pooling_type; auto& context = ctx_->As(); - context.cl_context()->AddKernel( - kernel_func_name_, "buffer/pool_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "buffer/pool_kernel.cl", + build_options_, + time_stamp_); } void Run() override { @@ -69,7 +71,7 @@ class PoolCompute auto* output_buf = param.output->mutable_data(TARGET(kOpenCL)); STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; auto kernel = context.cl_context()->GetKernel(kernel_key.str()); cl_int status; auto numel = out_dims.production(); @@ -117,6 +119,7 @@ class PoolCompute private: std::string kernel_func_name_{"pool_"}; std::string build_options_{"-DCL_DTYPE_float"}; + std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/pool_image_compute.cc b/lite/kernels/opencl/pool_image_compute.cc index 39da325ebb10c85f153e349173aa833bbf5e1f6e..34524122c8e475df63db02eae32b7d100abfa2d9 100644 --- a/lite/kernels/opencl/pool_image_compute.cc +++ b/lite/kernels/opencl/pool_image_compute.cc @@ -47,7 +47,7 @@ class PoolComputeImage2D : public KernelLiteAs(); context.cl_context()->AddKernel( - kernel_func_name_, "image/pool_kernel.cl", build_options_); + kernel_func_name_, "image/pool_kernel.cl", build_options_, time_stamp_); } void Run() override { @@ -112,7 +112,7 @@ class PoolComputeImage2D : public KernelLiteGetKernel(kernel_key.str()); int c_block = (out_dims[1] + 3) / 4; @@ -164,6 +164,7 @@ class PoolComputeImage2D : public KernelLite event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/reshape_image_compute.cc b/lite/kernels/opencl/reshape_image_compute.cc index 376add226216a57a0868c9c52497b784929a207e..febb1c33d9c4df2cb58580a03bda1eff93ed4da7 100644 --- a/lite/kernels/opencl/reshape_image_compute.cc +++ b/lite/kernels/opencl/reshape_image_compute.cc @@ -36,8 +36,10 @@ class ReshapeComputeFloatImage : public KernelLiteAs(); VLOG(1) << "kernel_func_name_:" << kernel_func_name_; - context.cl_context()->AddKernel( - kernel_func_name_, "image/reshape_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/reshape_kernel.cl", + build_options_, + time_stamp_); } void Run() override { @@ -110,7 +112,7 @@ class ReshapeComputeFloatImage : public KernelLiteAs(); CHECK(context.cl_context() != nullptr); STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; auto kernel = context.cl_context()->GetKernel(kernel_key.str()); #ifndef LITE_SHUTDOWN_LOG @@ -166,6 +168,7 @@ class ReshapeComputeFloatImage : public KernelLite event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/scale_image_compute.cc b/lite/kernels/opencl/scale_image_compute.cc index 3535b4f8030ec8681320d85333c3d5b0cc7d4805..97b56e68d47fcdf1647433f5e267c264fb36c5c2 100644 --- a/lite/kernels/opencl/scale_image_compute.cc +++ b/lite/kernels/opencl/scale_image_compute.cc @@ -37,12 +37,14 @@ class ScaleComputeImage2D : public KernelLiteAs(); - context.cl_context()->AddKernel( - kernel_func_name_, "image/scale_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/scale_kernel.cl", + build_options_, + time_stamp_); VLOG(1) << "kernel_func_name_:" << kernel_func_name_; STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; kernel_ = context.cl_context()->GetKernel(kernel_key.str()); } @@ -105,6 +107,7 @@ class ScaleComputeImage2D : public KernelLite event_{new cl::Event}; param_t* scale_param_{nullptr}; diff --git a/lite/kernels/opencl/slice_image_compute.cc b/lite/kernels/opencl/slice_image_compute.cc index 149ef35afe3d49ca8793769ee7ad366292462296..dd231ec8647ba88ab0f953661af47bc36c948e8b 100644 --- a/lite/kernels/opencl/slice_image_compute.cc +++ b/lite/kernels/opencl/slice_image_compute.cc @@ -38,8 +38,10 @@ class SliceComputeImage2D : public KernelLiteAs(); VLOG(1) << "kernel_func_name_:" << kernel_func_name_; - context.cl_context()->AddKernel( - kernel_func_name_, "image/slice_kernel.cl", build_options_); + context.cl_context()->AddKernel(kernel_func_name_, + "image/slice_kernel.cl", + build_options_, + time_stamp_); } void Run() override { @@ -68,7 +70,7 @@ class SliceComputeImage2D : public KernelLiteAs(); CHECK(context.cl_context() != nullptr); STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_; + kernel_key << kernel_func_name_ << build_options_ << time_stamp_; auto kernel = context.cl_context()->GetKernel(kernel_key.str()); cl_int status; @@ -108,6 +110,7 @@ class SliceComputeImage2D : public KernelLite event_{new cl::Event}; }; diff --git a/lite/tools/ci_build.sh b/lite/tools/ci_build.sh index 703da69fa59f3aa99bad9fb04c0decb591486058..a5dc2b741d2d3d5fdd2f08d13b7dc483a3065b0e 100755 --- a/lite/tools/ci_build.sh +++ b/lite/tools/ci_build.sh @@ -192,6 +192,7 @@ function build_opencl { cmake_opencl ${os} ${abi} ${lang} make opencl_clhpp -j$NUM_CORES_FOR_COMPILE + make publish_inference -j$NUM_CORES_FOR_COMPILE build $TESTS_FILE }