diff --git a/lite/kernels/opencl/activation_image_compute.cc b/lite/kernels/opencl/activation_image_compute.cc index a99e588eccd79eb35a5e7c0f3da73471849ab581..e85126d597b44fa465df9bb6be13b8a3bcfc2816 100644 --- a/lite/kernels/opencl/activation_image_compute.cc +++ b/lite/kernels/opencl/activation_image_compute.cc @@ -77,14 +77,10 @@ class ActivationComputeImageDefault #endif auto& context = ctx_->As(); - context.cl_context()->AddKernel(kernel_func_name_, - "image/activation_kernel.cl", - build_options_, - time_stamp_); - - STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_ << time_stamp_; - kernel_ = context.cl_context()->GetKernel(kernel_key.str()); + kernel_ = context.cl_context()->CreateKernel(kernel_func_name_, + "image/activation_kernel.cl", + build_options_, + time_stamp_); } void ReInitWhenNeeded() override { @@ -118,15 +114,14 @@ class ActivationComputeImageDefault auto* out_img = act_param_->Out->mutable_data( out_img_shape_[0], out_img_shape_[1]); - auto kernel = kernel_; cl_int status; - status = kernel.setArg(0, *x_img); + status = kernel_->setArg(0, *x_img); CL_CHECK_FATAL(status); - status = kernel.setArg(1, *out_img); + status = kernel_->setArg(1, *out_img); CL_CHECK_FATAL(status); - status = kernel.setArg(2, threshold_); + status = kernel_->setArg(2, threshold_); CL_CHECK_FATAL(status); - status = kernel.setArg(3, scale_); + status = kernel_->setArg(3, scale_); CL_CHECK_FATAL(status); #ifndef LITE_SHUTDOWN_LOG @@ -148,7 +143,7 @@ class ActivationComputeImageDefault auto& context = ctx_->As(); CHECK(context.cl_context() != nullptr); status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size_, cl::NullRange, @@ -168,7 +163,7 @@ class ActivationComputeImageDefault std::string kernel_func_name_{}; float threshold_{6.f}; float scale_{1.f}; - cl::Kernel kernel_; + std::shared_ptr kernel_; bool first_epoch_for_reinit_{true}; cl::NDRange global_work_size_ = cl::NDRange{ static_cast(1), static_cast(1), static_cast(1)}; diff --git a/lite/kernels/opencl/concat_image_compute.cc b/lite/kernels/opencl/concat_image_compute.cc index 60d1ac628ab1474d7e82f1861067bca838548569..fe1e70d0a1fd6e0dbe6aad2d16eb7c3c82e2de39 100644 --- a/lite/kernels/opencl/concat_image_compute.cc +++ b/lite/kernels/opencl/concat_image_compute.cc @@ -40,10 +40,10 @@ class ConcatComputeImage : public KernelLiteAddKernel(kernel_func_name_, - "image/concat_kernel.cl", - build_options_, - time_stamp_); + kernel_ = context.cl_context()->CreateKernel(kernel_func_name_, + "image/concat_kernel.cl", + build_options_, + time_stamp_); auto axis = concat_param_->axis; auto inputs = concat_param_->x; @@ -118,8 +118,6 @@ class ConcatComputeImage : public KernelLiteAs(); CHECK(context.cl_context() != nullptr); - STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_ << time_stamp_; auto inputs = param.x; int arg_idx = 0; @@ -164,31 +162,30 @@ class ConcatComputeImage : public KernelLiteGetKernel(kernel_key.str()); int out_w = x_dims[x_dims.size() - 1]; int out_c = x_dims[1]; if (inputs.size() == 2) { auto* x_buf0 = inputs[0]->data(); auto* x_buf1 = inputs[1]->data(); - cl_int status = kernel.setArg(arg_idx, *x_buf0); + cl_int status = kernel_->setArg(arg_idx, *x_buf0); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *x_buf1); + status = kernel_->setArg(++arg_idx, *x_buf1); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *out_buf); + status = kernel_->setArg(++arg_idx, *out_buf); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, flag_); + status = kernel_->setArg(++arg_idx, flag_); CL_CHECK_FATAL(status); - status = - kernel.setArg(++arg_idx, static_cast(inputs[0]->dims()[axis_])); + status = kernel_->setArg(++arg_idx, + static_cast(inputs[0]->dims()[axis_])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, out_c); + status = kernel_->setArg(++arg_idx, out_c); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, out_w); + status = kernel_->setArg(++arg_idx, out_w); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, width_); + status = kernel_->setArg(++arg_idx, width_); CL_CHECK_FATAL(status); status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size, cl::NullRange, @@ -213,25 +210,25 @@ class ConcatComputeImage : public KernelLite(image_shape["width"] / in_dims[in_dims.size() - 1]), static_cast(image_shape["height"])}; - cl_int status = kernel.setArg(arg_idx, *x_buf); + cl_int status = kernel_->setArg(arg_idx, *x_buf); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *out_buf); + status = kernel_->setArg(++arg_idx, *out_buf); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, flag_); + status = kernel_->setArg(++arg_idx, flag_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, start); + status = kernel_->setArg(++arg_idx, start); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, out_c); + status = kernel_->setArg(++arg_idx, out_c); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, out_w); + status = kernel_->setArg(++arg_idx, out_w); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, in_w); + status = kernel_->setArg(++arg_idx, in_w); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, width_); + status = kernel_->setArg(++arg_idx, width_); CL_CHECK_FATAL(status); CL_CHECK_FATAL(status); status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size, cl::NullRange, @@ -255,6 +252,7 @@ class ConcatComputeImage : public KernelLite event_{new cl::Event}; + std::shared_ptr kernel_; }; } // namespace opencl diff --git a/lite/kernels/opencl/conv_image_compute.cc b/lite/kernels/opencl/conv_image_compute.cc index aadd7010cca2ec03ea417e3b486d8c946d80fcab..ff5af4cdbe9f4122bb35d9b99c58dbb81fcec1e3 100644 --- a/lite/kernels/opencl/conv_image_compute.cc +++ b/lite/kernels/opencl/conv_image_compute.cc @@ -368,25 +368,17 @@ void ConvImageCompute::PrepareForRun() { build_options_.push_back(build_options_single); - for (size_t i = 0; i < kernel_func_names_.size(); i++) { - context.cl_context()->AddKernel(kernel_func_names_[i], - kernel_func_paths_[i], - build_options_[i], - time_stamp_); - } + kernel_ = context.cl_context()->CreateKernel(kernel_func_names_[0], + kernel_func_paths_[0], + build_options_[0], + time_stamp_); VLOG(4) << "global_work_size_[3D]: {" << global_work_size_[0] << "," << global_work_size_[1] << "," << global_work_size_[2] << "}"; - - std::stringstream kernel_key; - kernel_key << kernel_func_names_[0] << build_options_[0] << time_stamp_; - kernel_ = context.cl_context()->GetKernel(kernel_key.str()); - VLOG(4) << "kernel_key: " << kernel_key.str(); - VLOG(4) << "kernel ready ... " << kernel_key.str(); size_t max_work_group_size = 0; - kernel_.getWorkGroupInfo(CLRuntime::Global()->device(), - CL_KERNEL_WORK_GROUP_SIZE, - &max_work_group_size); + kernel_->getWorkGroupInfo(CLRuntime::Global()->device(), + CL_KERNEL_WORK_GROUP_SIZE, + &max_work_group_size); VLOG(4) << "max_work_group_size: " << max_work_group_size; @@ -501,49 +493,48 @@ void ConvImageCompute::Conv2d1x1opt(bool is_turn) { bias_image = bias_gpu_image_.data(); } - auto kernel = kernel_; cl_int status; int arg_idx = 0; - status = kernel.setArg(arg_idx, c_blk_); + status = kernel_->setArg(arg_idx, c_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, w_blk_); + status = kernel_->setArg(++arg_idx, w_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, nh_blk_); + status = kernel_->setArg(++arg_idx, nh_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *input_image); + status = kernel_->setArg(++arg_idx, *input_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *filter_image); + status = kernel_->setArg(++arg_idx, *filter_image); CL_CHECK_FATAL(status); if (has_bias) { - status = kernel.setArg(++arg_idx, *bias_image); + status = kernel_->setArg(++arg_idx, *bias_image); CL_CHECK_FATAL(status); } - status = kernel.setArg(++arg_idx, *out_image); + status = kernel_->setArg(++arg_idx, *out_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, strides[0]); + status = kernel_->setArg(++arg_idx, strides[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, offset); + status = kernel_->setArg(++arg_idx, offset); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_c_block); + status = kernel_->setArg(++arg_idx, input_c_block); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_c); + status = kernel_->setArg(++arg_idx, input_c); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, dilations[0]); + status = kernel_->setArg(++arg_idx, dilations[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_width); + status = kernel_->setArg(++arg_idx, input_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_height); + status = kernel_->setArg(++arg_idx, input_height); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_width); + status = kernel_->setArg(++arg_idx, output_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_height); + status = kernel_->setArg(++arg_idx, output_height); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, default_w_blk_); + status = kernel_->setArg(++arg_idx, default_w_blk_); CL_CHECK_FATAL(status); status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size_, local_work_size_, @@ -649,56 +640,55 @@ void ConvImageCompute::Conv2d3x3(bool is_turn) { if (has_bias) { bias_image = bias_gpu_image_.data(); } - auto kernel = kernel_; cl_int status; int arg_idx = 0; - status = kernel.setArg(arg_idx, c_blk_); + status = kernel_->setArg(arg_idx, c_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, w_blk_); + status = kernel_->setArg(++arg_idx, w_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, nh_blk_); + status = kernel_->setArg(++arg_idx, nh_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *input_image); + status = kernel_->setArg(++arg_idx, *input_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *filter_image); + status = kernel_->setArg(++arg_idx, *filter_image); CL_CHECK_FATAL(status); if (has_bias) { #ifndef LITE_SHUTDOWN_LOG VLOG(4) << "set bias_image: "; #endif - status = kernel.setArg(++arg_idx, *bias_image); + status = kernel_->setArg(++arg_idx, *bias_image); CL_CHECK_FATAL(status); } - status = kernel.setArg(++arg_idx, *out_image); + status = kernel_->setArg(++arg_idx, *out_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, strides[0]); + status = kernel_->setArg(++arg_idx, strides[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, offset); + status = kernel_->setArg(++arg_idx, offset); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_c_block); + status = kernel_->setArg(++arg_idx, input_c_block); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, dilations[0]); + status = kernel_->setArg(++arg_idx, dilations[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_width); + status = kernel_->setArg(++arg_idx, input_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_height); + status = kernel_->setArg(++arg_idx, input_height); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_width); + status = kernel_->setArg(++arg_idx, output_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_height); + status = kernel_->setArg(++arg_idx, output_height); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_channel); + status = kernel_->setArg(++arg_idx, output_channel); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, filter_channel); + status = kernel_->setArg(++arg_idx, filter_channel); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, filter_width); + status = kernel_->setArg(++arg_idx, filter_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, filter_height); + status = kernel_->setArg(++arg_idx, filter_height); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, new_groups); + status = kernel_->setArg(++arg_idx, new_groups); CL_CHECK_FATAL(status); #ifndef LITE_SHUTDOWN_LOG @@ -708,7 +698,7 @@ void ConvImageCompute::Conv2d3x3(bool is_turn) { #endif status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size_, cl::NullRange, @@ -784,48 +774,46 @@ void ConvImageCompute::Conv2d3x3opt(bool is_turn) { bias_image = bias_gpu_image_.data(); } - auto kernel = kernel_; - cl_int status; int arg_idx = 0; - status = kernel.setArg(arg_idx, c_blk_); + status = kernel_->setArg(arg_idx, c_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, w_blk_); + status = kernel_->setArg(++arg_idx, w_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, nh_blk_); + status = kernel_->setArg(++arg_idx, nh_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *input_image); + status = kernel_->setArg(++arg_idx, *input_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *filter_image); + status = kernel_->setArg(++arg_idx, *filter_image); CL_CHECK_FATAL(status); if (has_bias) { #ifndef LITE_SHUTDOWN_LOG VLOG(4) << "set bias_image: "; #endif - status = kernel.setArg(++arg_idx, *bias_image); + status = kernel_->setArg(++arg_idx, *bias_image); CL_CHECK_FATAL(status); } - status = kernel.setArg(++arg_idx, *out_image); + status = kernel_->setArg(++arg_idx, *out_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, strides[0]); + status = kernel_->setArg(++arg_idx, strides[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, paddings[0]); + status = kernel_->setArg(++arg_idx, paddings[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, dilations[0]); + status = kernel_->setArg(++arg_idx, dilations[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, batch); + status = kernel_->setArg(++arg_idx, batch); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_channel); + status = kernel_->setArg(++arg_idx, input_channel); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_width); + status = kernel_->setArg(++arg_idx, input_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_height); + status = kernel_->setArg(++arg_idx, input_height); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_width); + status = kernel_->setArg(++arg_idx, output_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_height); + status = kernel_->setArg(++arg_idx, output_height); CL_CHECK_FATAL(status); #ifndef LITE_SHUTDOWN_LOG @@ -835,7 +823,7 @@ void ConvImageCompute::Conv2d3x3opt(bool is_turn) { #endif status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size_, local_work_size_, @@ -917,46 +905,44 @@ void ConvImageCompute::Conv2d5x5(bool is_turn) { bias_image = bias_gpu_image_.data(); } - auto kernel = kernel_; - cl_int status; int arg_idx = 0; - status = kernel.setArg(arg_idx, c_blk_); + status = kernel_->setArg(arg_idx, c_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, w_blk_); + status = kernel_->setArg(++arg_idx, w_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, nh_blk_); + status = kernel_->setArg(++arg_idx, nh_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *input_image); + status = kernel_->setArg(++arg_idx, *input_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *filter_image); + status = kernel_->setArg(++arg_idx, *filter_image); CL_CHECK_FATAL(status); if (has_bias) { #ifndef LITE_SHUTDOWN_LOG VLOG(4) << "set bias_image: "; #endif - status = kernel.setArg(++arg_idx, *bias_image); + status = kernel_->setArg(++arg_idx, *bias_image); CL_CHECK_FATAL(status); } - status = kernel.setArg(++arg_idx, *out_image); + status = kernel_->setArg(++arg_idx, *out_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, strides[0]); + status = kernel_->setArg(++arg_idx, strides[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, offset); + status = kernel_->setArg(++arg_idx, offset); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_c_block); + status = kernel_->setArg(++arg_idx, input_c_block); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, dilations[0]); + status = kernel_->setArg(++arg_idx, dilations[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_width); + status = kernel_->setArg(++arg_idx, input_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_height); + status = kernel_->setArg(++arg_idx, input_height); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_width); + status = kernel_->setArg(++arg_idx, output_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_height); + status = kernel_->setArg(++arg_idx, output_height); CL_CHECK_FATAL(status); #ifndef LITE_SHUTDOWN_LOG @@ -966,7 +952,7 @@ void ConvImageCompute::Conv2d5x5(bool is_turn) { #endif status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size_, cl::NullRange, @@ -1042,50 +1028,49 @@ void ConvImageCompute::Conv2d5x5opt(bool is_turn) { bias_image = bias_gpu_image_.data(); } - auto kernel = kernel_; cl_int status; int arg_idx = 0; - status = kernel.setArg(arg_idx, c_blk_); + status = kernel_->setArg(arg_idx, c_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, w_blk_); + status = kernel_->setArg(++arg_idx, w_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, nh_blk_); + status = kernel_->setArg(++arg_idx, nh_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *input_image); + status = kernel_->setArg(++arg_idx, *input_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *filter_image); + status = kernel_->setArg(++arg_idx, *filter_image); CL_CHECK_FATAL(status); if (has_bias) { - status = kernel.setArg(++arg_idx, *bias_image); + status = kernel_->setArg(++arg_idx, *bias_image); CL_CHECK_FATAL(status); } - status = kernel.setArg(++arg_idx, *out_image); + status = kernel_->setArg(++arg_idx, *out_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, strides[0]); + status = kernel_->setArg(++arg_idx, strides[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, paddings[0]); + status = kernel_->setArg(++arg_idx, paddings[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, dilations[0]); + status = kernel_->setArg(++arg_idx, dilations[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, batch); + status = kernel_->setArg(++arg_idx, batch); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_channel); + status = kernel_->setArg(++arg_idx, input_channel); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_width); + status = kernel_->setArg(++arg_idx, input_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_height); + status = kernel_->setArg(++arg_idx, input_height); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_width); + status = kernel_->setArg(++arg_idx, output_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_height); + status = kernel_->setArg(++arg_idx, output_height); CL_CHECK_FATAL(status); // VLOG(4) << "out_image: " << out_image; status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size_, local_work_size_, @@ -1167,46 +1152,44 @@ void ConvImageCompute::Conv2d7x7(bool is_turn) { bias_image = bias_gpu_image_.data(); } - auto kernel = kernel_; - cl_int status; int arg_idx = 0; - status = kernel.setArg(arg_idx, c_blk_); + status = kernel_->setArg(arg_idx, c_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, w_blk_); + status = kernel_->setArg(++arg_idx, w_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, nh_blk_); + status = kernel_->setArg(++arg_idx, nh_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *input_image); + status = kernel_->setArg(++arg_idx, *input_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *filter_image); + status = kernel_->setArg(++arg_idx, *filter_image); CL_CHECK_FATAL(status); if (has_bias) { #ifndef LITE_SHUTDOWN_LOG VLOG(4) << "set bias_image: "; #endif - status = kernel.setArg(++arg_idx, *bias_image); + status = kernel_->setArg(++arg_idx, *bias_image); CL_CHECK_FATAL(status); } - status = kernel.setArg(++arg_idx, *out_image); + status = kernel_->setArg(++arg_idx, *out_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, strides[0]); + status = kernel_->setArg(++arg_idx, strides[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, offset); + status = kernel_->setArg(++arg_idx, offset); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_c_block); + status = kernel_->setArg(++arg_idx, input_c_block); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, dilations[0]); + status = kernel_->setArg(++arg_idx, dilations[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_width); + status = kernel_->setArg(++arg_idx, input_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_height); + status = kernel_->setArg(++arg_idx, input_height); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_width); + status = kernel_->setArg(++arg_idx, output_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_height); + status = kernel_->setArg(++arg_idx, output_height); CL_CHECK_FATAL(status); #ifndef LITE_SHUTDOWN_LOG @@ -1216,7 +1199,7 @@ void ConvImageCompute::Conv2d7x7(bool is_turn) { #endif status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size_, cl::NullRange, @@ -1290,49 +1273,47 @@ void ConvImageCompute::Conv2d7x7opt(bool is_turn) { bias_image = bias_gpu_image_.data(); } - auto kernel = kernel_; - cl_int status; int arg_idx = 0; - status = kernel.setArg(arg_idx, c_blk_); + status = kernel_->setArg(arg_idx, c_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, w_blk_); + status = kernel_->setArg(++arg_idx, w_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, nh_blk_); + status = kernel_->setArg(++arg_idx, nh_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *input_image); + status = kernel_->setArg(++arg_idx, *input_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *filter_image); + status = kernel_->setArg(++arg_idx, *filter_image); CL_CHECK_FATAL(status); if (has_bias) { - status = kernel.setArg(++arg_idx, *bias_image); + status = kernel_->setArg(++arg_idx, *bias_image); CL_CHECK_FATAL(status); } - status = kernel.setArg(++arg_idx, *out_image); + status = kernel_->setArg(++arg_idx, *out_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, strides[0]); + status = kernel_->setArg(++arg_idx, strides[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, paddings[0]); + status = kernel_->setArg(++arg_idx, paddings[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, dilations[0]); + status = kernel_->setArg(++arg_idx, dilations[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, batch); + status = kernel_->setArg(++arg_idx, batch); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_channel); + status = kernel_->setArg(++arg_idx, input_channel); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_width); + status = kernel_->setArg(++arg_idx, input_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_height); + status = kernel_->setArg(++arg_idx, input_height); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_width); + status = kernel_->setArg(++arg_idx, output_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_height); + status = kernel_->setArg(++arg_idx, output_height); CL_CHECK_FATAL(status); status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size_, local_work_size_, @@ -1369,19 +1350,17 @@ void ConvImageCompute::DepthwiseConv2d3x3s1(bool is_turn) { auto* output_img = param.output->mutable_data( image_shape["width"], image_shape["height"]); - auto kernel = kernel_; - cl_int status; int arg_idx = 0; - status = kernel.setArg(arg_idx, c_blk_); + status = kernel_->setArg(arg_idx, c_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, w_blk_); + status = kernel_->setArg(++arg_idx, w_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, nh_blk_); + status = kernel_->setArg(++arg_idx, nh_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *input_img); + status = kernel_->setArg(++arg_idx, *input_img); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *filter_img); + status = kernel_->setArg(++arg_idx, *filter_img); CL_CHECK_FATAL(status); const bool has_bias = param.bias != nullptr; @@ -1393,30 +1372,30 @@ void ConvImageCompute::DepthwiseConv2d3x3s1(bool is_turn) { #ifndef LITE_SHUTDOWN_LOG VLOG(4) << "set bias_image: "; #endif - status = kernel.setArg(++arg_idx, *bias_image); + status = kernel_->setArg(++arg_idx, *bias_image); CL_CHECK_FATAL(status); } - status = kernel.setArg(++arg_idx, *output_img); + status = kernel_->setArg(++arg_idx, *output_img); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(strides[0])); + status = kernel_->setArg(++arg_idx, static_cast(strides[0])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(paddings[0])); + status = kernel_->setArg(++arg_idx, static_cast(paddings[0])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(dilations[0])); + status = kernel_->setArg(++arg_idx, static_cast(dilations[0])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(x_dims[1])); + status = kernel_->setArg(++arg_idx, static_cast(x_dims[1])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(x_dims[3])); + status = kernel_->setArg(++arg_idx, static_cast(x_dims[3])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(x_dims[2])); + status = kernel_->setArg(++arg_idx, static_cast(x_dims[2])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(output_dims[3])); + status = kernel_->setArg(++arg_idx, static_cast(output_dims[3])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(output_dims[2])); + status = kernel_->setArg(++arg_idx, static_cast(output_dims[2])); CL_CHECK_FATAL(status); status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size_, local_work_size_, @@ -1456,8 +1435,6 @@ void ConvImageCompute::DepthwiseConv2d3x3(bool is_turn) { auto* output_img = param.output->mutable_data( image_shape["width"], image_shape["height"]); - auto kernel = kernel_; - #ifndef LITE_SHUTDOWN_LOG VLOG(4) << "setArg"; VLOG(4) << "strides = " << strides[0]; @@ -1472,15 +1449,15 @@ void ConvImageCompute::DepthwiseConv2d3x3(bool is_turn) { cl_int status; int arg_idx = 0; - status = kernel.setArg(arg_idx, c_blk_); + status = kernel_->setArg(arg_idx, c_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, w_blk_); + status = kernel_->setArg(++arg_idx, w_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, nh_blk_); + status = kernel_->setArg(++arg_idx, nh_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *input_img); + status = kernel_->setArg(++arg_idx, *input_img); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *filter_img); + status = kernel_->setArg(++arg_idx, *filter_img); CL_CHECK_FATAL(status); const bool has_bias = param.bias != nullptr; const bool is_element_wise_bias = @@ -1491,30 +1468,30 @@ void ConvImageCompute::DepthwiseConv2d3x3(bool is_turn) { #ifndef LITE_SHUTDOWN_LOG VLOG(4) << "set bias_image: "; #endif - status = kernel.setArg(++arg_idx, *bias_image); + status = kernel_->setArg(++arg_idx, *bias_image); CL_CHECK_FATAL(status); } - status = kernel.setArg(++arg_idx, *output_img); + status = kernel_->setArg(++arg_idx, *output_img); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(strides[0])); + status = kernel_->setArg(++arg_idx, static_cast(strides[0])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(offset)); + status = kernel_->setArg(++arg_idx, static_cast(offset)); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(dilations[0])); + status = kernel_->setArg(++arg_idx, static_cast(dilations[0])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(input_c_block)); + status = kernel_->setArg(++arg_idx, static_cast(input_c_block)); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(x_dims[3])); + status = kernel_->setArg(++arg_idx, static_cast(x_dims[3])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(x_dims[2])); + status = kernel_->setArg(++arg_idx, static_cast(x_dims[2])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(output_dims[3])); + status = kernel_->setArg(++arg_idx, static_cast(output_dims[3])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(output_dims[2])); + status = kernel_->setArg(++arg_idx, static_cast(output_dims[2])); CL_CHECK_FATAL(status); status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size_, cl::NullRange, @@ -1598,50 +1575,48 @@ void ConvImageCompute::DepthwiseConv2d(bool is_turn) { bias_image = bias_gpu_image_.data(); } - auto kernel = kernel_; - cl_int status; int arg_idx = 0; - status = kernel.setArg(arg_idx, c_blk_); + status = kernel_->setArg(arg_idx, c_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, w_blk_); + status = kernel_->setArg(++arg_idx, w_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, nh_blk_); + status = kernel_->setArg(++arg_idx, nh_blk_); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *input_image); + status = kernel_->setArg(++arg_idx, *input_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *filter_image); + status = kernel_->setArg(++arg_idx, *filter_image); CL_CHECK_FATAL(status); if (has_bias) { #ifndef LITE_SHUTDOWN_LOG VLOG(4) << "set bias_image: "; #endif - status = kernel.setArg(++arg_idx, *bias_image); + status = kernel_->setArg(++arg_idx, *bias_image); CL_CHECK_FATAL(status); } - status = kernel.setArg(++arg_idx, *out_image); + status = kernel_->setArg(++arg_idx, *out_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, strides[0]); + status = kernel_->setArg(++arg_idx, strides[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, offset); + status = kernel_->setArg(++arg_idx, offset); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_c_block); + status = kernel_->setArg(++arg_idx, input_c_block); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, dilations[0]); + status = kernel_->setArg(++arg_idx, dilations[0]); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_width); + status = kernel_->setArg(++arg_idx, input_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, input_height); + status = kernel_->setArg(++arg_idx, input_height); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_width); + status = kernel_->setArg(++arg_idx, output_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, output_height); + status = kernel_->setArg(++arg_idx, output_height); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, filter_width); + status = kernel_->setArg(++arg_idx, filter_width); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, filter_height); + status = kernel_->setArg(++arg_idx, filter_height); CL_CHECK_FATAL(status); #ifndef LITE_SHUTDOWN_LOG @@ -1651,7 +1626,7 @@ void ConvImageCompute::DepthwiseConv2d(bool is_turn) { #endif status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size_, cl::NullRange, diff --git a/lite/kernels/opencl/conv_image_compute.h b/lite/kernels/opencl/conv_image_compute.h index 6f293a0d7dd90e55bedd63c214ba38799a591080..41004bea0261c4b9f14eb0449bb8ea950e6da9de 100644 --- a/lite/kernels/opencl/conv_image_compute.h +++ b/lite/kernels/opencl/conv_image_compute.h @@ -71,7 +71,7 @@ class ConvImageCompute : public KernelLite kernel_; cl::NDRange local_work_size_ = cl::NDRange{ static_cast(1), static_cast(1), static_cast(1)}; bool use_lws_{true}; diff --git a/lite/kernels/opencl/elementwise_add_image_compute.cc b/lite/kernels/opencl/elementwise_add_image_compute.cc index 51d488d51b72dd9af8225b45a7ee56063312d055..787cb9a2eaa23fabc16fe2125e197572ae3b6582 100644 --- a/lite/kernels/opencl/elementwise_add_image_compute.cc +++ b/lite/kernels/opencl/elementwise_add_image_compute.cc @@ -59,14 +59,11 @@ void ElementwiseAddImageCompute::ReInitWhenNeeded() { VLOG(1) << "kernel_func_name_:" << kernel_func_name_; auto& context = ctx_->As(); - context.cl_context()->AddKernel(kernel_func_name_, - "image/elementwise_add_kernel.cl", - build_options_, - time_stamp_); - - STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_ << time_stamp_; - kernel_ = context.cl_context()->GetKernel(kernel_key.str()); + kernel_ = + context.cl_context()->CreateKernel(kernel_func_name_, + "image/elementwise_add_kernel.cl", + build_options_, + time_stamp_); // compute image shape paddle::lite::CLImageConverterDefault default_convertor; @@ -118,13 +115,12 @@ void ElementwiseAddImageCompute::Run() { #endif cl_int status; - auto kernel = kernel_; if (y_dims.size() == 4) { - status = kernel.setArg(0, *x_img); + status = kernel_->setArg(0, *x_img); CL_CHECK_FATAL(status); - status = kernel.setArg(1, *y_img); + status = kernel_->setArg(1, *y_img); CL_CHECK_FATAL(status); - status = kernel.setArg(2, *out_img); + status = kernel_->setArg(2, *out_img); CL_CHECK_FATAL(status); } else if (y_dims.size() == 1) { if (axis == x_dims.size() - 1 || axis == x_dims.size() - 3) { @@ -132,13 +128,13 @@ void ElementwiseAddImageCompute::Run() { #ifndef LITE_SHUTDOWN_LOG VLOG(4) << "tensor_w:" << tensor_w; #endif - status = kernel.setArg(0, *x_img); + status = kernel_->setArg(0, *x_img); CL_CHECK_FATAL(status); - status = kernel.setArg(1, *y_img); + status = kernel_->setArg(1, *y_img); CL_CHECK_FATAL(status); - status = kernel.setArg(2, *out_img); + status = kernel_->setArg(2, *out_img); CL_CHECK_FATAL(status); - status = kernel.setArg(3, tensor_w); + status = kernel_->setArg(3, tensor_w); CL_CHECK_FATAL(status); } else { LOG(FATAL) << "ElementwiseAddImage doesn't support axis:" << axis @@ -154,7 +150,7 @@ void ElementwiseAddImageCompute::Run() { auto& context = ctx_->As(); CHECK(context.cl_context() != nullptr); status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size_, cl::NullRange, diff --git a/lite/kernels/opencl/elementwise_add_image_compute.h b/lite/kernels/opencl/elementwise_add_image_compute.h index a92a1b448176628381a3c65b838f6bba529eb4e0..56b837d4f70965210b77043fb2765e55d73dca53 100644 --- a/lite/kernels/opencl/elementwise_add_image_compute.h +++ b/lite/kernels/opencl/elementwise_add_image_compute.h @@ -60,7 +60,7 @@ class ElementwiseAddImageCompute std::string build_options_{"-DCL_DTYPE_half"}; std::string time_stamp_{GetTimeStamp()}; bool first_epoch_for_reinit_{true}; - cl::Kernel kernel_; + std::shared_ptr kernel_; cl::NDRange global_work_size_ = cl::NDRange{ static_cast(1), static_cast(1), static_cast(1)}; std::shared_ptr event_{new cl::Event}; diff --git a/lite/kernels/opencl/elementwise_mul_image_compute.cc b/lite/kernels/opencl/elementwise_mul_image_compute.cc index 96dc2de1affba7c36be6c9c0e952b85be726fca8..f24bb573f503a99518194092f9d2213273cb680d 100644 --- a/lite/kernels/opencl/elementwise_mul_image_compute.cc +++ b/lite/kernels/opencl/elementwise_mul_image_compute.cc @@ -71,10 +71,11 @@ class ElementwiseMulImageCompute VLOG(4) << "bias_dims.size():" << bias_dims.size(); auto& context = ctx_->As(); - context.cl_context()->AddKernel(kernel_func_name_, - "image/elementwise_mul_kernel.cl", - build_options_, - time_stamp_); + kernel_ = + context.cl_context()->CreateKernel(kernel_func_name_, + "image/elementwise_mul_kernel.cl", + build_options_, + time_stamp_); } void Run() override { @@ -115,66 +116,61 @@ class ElementwiseMulImageCompute << out_img_shape[1]; #endif - STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_ << time_stamp_; - auto kernel = context.cl_context()->GetKernel(kernel_key.str()); - auto bias_dims = y->dims(); auto x_dims = x->dims(); - if (bias_dims == x_dims) { // kernel_func_name_ = "elementwise_mul"; - cl_int status = kernel.setArg(0, *x_img); + cl_int status = kernel_->setArg(0, *x_img); CL_CHECK_FATAL(status); - status = kernel.setArg(1, *y_img); + status = kernel_->setArg(1, *y_img); CL_CHECK_FATAL(status); - status = kernel.setArg(2, *out_img); + status = kernel_->setArg(2, *out_img); CL_CHECK_FATAL(status); } else { const int bias_dim_size = bias_dims.size(); if (bias_dim_size == 1) { // kernel_func_name_ = "channel_mul_d1"; const int tensor_w = x_dims[x_dims.size() - 1]; - cl_int status = kernel.setArg(0, *x_img); + cl_int status = kernel_->setArg(0, *x_img); CL_CHECK_FATAL(status); - status = kernel.setArg(1, *y_img); + status = kernel_->setArg(1, *y_img); CL_CHECK_FATAL(status); - status = kernel.setArg(2, *out_img); + status = kernel_->setArg(2, *out_img); CL_CHECK_FATAL(status); - status = kernel.setArg(3, tensor_w); + status = kernel_->setArg(3, tensor_w); CL_CHECK_FATAL(status); } else if (bias_dim_size == 2) { // kernel_func_name_ = "channel_mul_d2"; const int tensor_w = x_dims[x_dims.size() - 1]; - cl_int status = kernel.setArg(0, *x_img); + cl_int status = kernel_->setArg(0, *x_img); CL_CHECK_FATAL(status); - status = kernel.setArg(1, *y_img); + status = kernel_->setArg(1, *y_img); CL_CHECK_FATAL(status); - status = kernel.setArg(2, *out_img); + status = kernel_->setArg(2, *out_img); CL_CHECK_FATAL(status); - status = kernel.setArg(3, tensor_w); + status = kernel_->setArg(3, tensor_w); CL_CHECK_FATAL(status); } else if (bias_dim_size == 3) { // kernel_func_name_ = "channel_mul_d3"; const int tensor_w = x_dims[x_dims.size() - 1]; - cl_int status = kernel.setArg(0, *x_img); + cl_int status = kernel_->setArg(0, *x_img); CL_CHECK_FATAL(status); - status = kernel.setArg(1, *y_img); + status = kernel_->setArg(1, *y_img); CL_CHECK_FATAL(status); - status = kernel.setArg(2, *out_img); + status = kernel_->setArg(2, *out_img); CL_CHECK_FATAL(status); - status = kernel.setArg(3, tensor_w); + status = kernel_->setArg(3, tensor_w); CL_CHECK_FATAL(status); } else if (bias_dim_size == 4) { // kernel_func_name_ = "channel_mul_d4"; const int tensor_w = x_dims[x_dims.size() - 1]; - cl_int status = kernel.setArg(0, *x_img); + cl_int status = kernel_->setArg(0, *x_img); CL_CHECK_FATAL(status); - status = kernel.setArg(1, *y_img); + status = kernel_->setArg(1, *y_img); CL_CHECK_FATAL(status); - status = kernel.setArg(2, *out_img); + status = kernel_->setArg(2, *out_img); CL_CHECK_FATAL(status); - status = kernel.setArg(3, tensor_w); + status = kernel_->setArg(3, tensor_w); CL_CHECK_FATAL(status); } else { LOG(FATAL) << "Unsupported ElementwiseMul with x_dims:" << x_dims @@ -186,7 +182,7 @@ class ElementwiseMulImageCompute cl::NDRange{static_cast(x_img_width), static_cast(x_img_height)}; auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size, cl::NullRange, @@ -205,6 +201,7 @@ class ElementwiseMulImageCompute std::string build_options_{"-DCL_DTYPE_half"}; std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; + std::shared_ptr kernel_; }; } // namespace opencl diff --git a/lite/kernels/opencl/fc_buffer_compute.cc b/lite/kernels/opencl/fc_buffer_compute.cc index 0fb83db2fe76e27baf7a096395369cb92b995072..afd6ad190a39e96c558712596ead4c8923576e36 100644 --- a/lite/kernels/opencl/fc_buffer_compute.cc +++ b/lite/kernels/opencl/fc_buffer_compute.cc @@ -75,13 +75,10 @@ class FcCompute } auto& context = ctx_->As(); - context.cl_context()->AddKernel(kernel_func_name_, - "buffer/fc_kernel.cl", - build_options_, - time_stamp_); - STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_ << time_stamp_; - kernel_ = context.cl_context()->GetKernel(kernel_key.str()); + kernel_ = context.cl_context()->CreateKernel(kernel_func_name_, + "buffer/fc_kernel.cl", + build_options_, + time_stamp_); // compute global work size GetGlobalWorkSize(); @@ -106,25 +103,25 @@ class FcCompute auto kernel = kernel_; cl_int status; - status = kernel.setArg(0, *x_buf); + status = kernel_->setArg(0, *x_buf); CL_CHECK_FATAL(status); - status = kernel.setArg(1, *w_buf); + status = kernel_->setArg(1, *w_buf); CL_CHECK_FATAL(status); - status = kernel.setArg(2, *bias_buf); + status = kernel_->setArg(2, *bias_buf); CL_CHECK_FATAL(status); - status = kernel.setArg(3, *out_buf); + status = kernel_->setArg(3, *out_buf); CL_CHECK_FATAL(status); - status = kernel.setArg(4, static_cast(m_)); + status = kernel_->setArg(4, static_cast(m_)); CL_CHECK_FATAL(status); - status = kernel.setArg(5, static_cast(n_)); + status = kernel_->setArg(5, static_cast(n_)); CL_CHECK_FATAL(status); - status = kernel.setArg(6, static_cast(k_)); + status = kernel_->setArg(6, static_cast(k_)); CL_CHECK_FATAL(status); auto& context = ctx_->As(); CHECK(context.cl_context() != nullptr); status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel.get()), cl::NullRange, global_work_size_, cl::NullRange, @@ -143,7 +140,7 @@ class FcCompute bool first_epoch_for_reinit_{true}; DDim last_x_dims_; cl::NDRange global_work_size_; - cl::Kernel kernel_; + std::shared_ptr kernel_; std::shared_ptr event_{new cl::Event}; }; diff --git a/lite/kernels/opencl/fusion_elementwise_add_activation_image_compute.cc b/lite/kernels/opencl/fusion_elementwise_add_activation_image_compute.cc index 8e687340943dcb0f1b68e4c9495cbab1ad703645..1ea272e2a6124af10e83e2d121be0a2ceb2888e0 100644 --- a/lite/kernels/opencl/fusion_elementwise_add_activation_image_compute.cc +++ b/lite/kernels/opencl/fusion_elementwise_add_activation_image_compute.cc @@ -31,10 +31,11 @@ class FusionElementwiseAddActivationImageCompute void PrepareForRun() override { build_options_ += " -DRELU"; auto& context = ctx_->As(); - context.cl_context()->AddKernel(kernel_func_name_, - "image/elementwise_add_kernel.cl", - build_options_, - time_stamp_); + kernel_ = + context.cl_context()->CreateKernel(kernel_func_name_, + "image/elementwise_add_kernel.cl", + build_options_, + time_stamp_); ele_param_ = param_.get_mutable(); auto act_t = static_cast(ele_param_)->act_type; VLOG(4) << "act: " << act_t; diff --git a/lite/kernels/opencl/nearest_interp_image_compute.cc b/lite/kernels/opencl/nearest_interp_image_compute.cc index 17637e2569556d1eeb8b6002c0073223345ac7ec..a5f934650f5c8b0ed1ccb2e313034d3383b487cb 100644 --- a/lite/kernels/opencl/nearest_interp_image_compute.cc +++ b/lite/kernels/opencl/nearest_interp_image_compute.cc @@ -38,10 +38,11 @@ class NearestInterpComputeImageDefault void PrepareForRun() override { auto& context = ctx_->As(); - context.cl_context()->AddKernel(kernel_func_name_, - "image/nearest_interp_kernel.cl", - build_options_, - time_stamp_); + kernel_ = + context.cl_context()->CreateKernel(kernel_func_name_, + "image/nearest_interp_kernel.cl", + build_options_, + time_stamp_); VLOG(1) << "kernel_func_name_:" << kernel_func_name_; } @@ -67,26 +68,23 @@ class NearestInterpComputeImageDefault auto& context = ctx_->As(); CHECK(context.cl_context() != nullptr); - STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_ << time_stamp_; - auto kernel = context.cl_context()->GetKernel(kernel_key.str()); - int arg_idx = 0; - cl_int status = kernel.setArg(arg_idx, *x_img); + cl_int status; + status = kernel_->setArg(0, *x_img); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *out_img); + status = kernel_->setArg(1, *out_img); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(scale_h)); + status = kernel_->setArg(2, static_cast(scale_h)); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(scale_w)); + status = kernel_->setArg(3, static_cast(scale_w)); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(in_dims_h)); + status = kernel_->setArg(4, static_cast(in_dims_h)); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(out_dims_h)); + status = kernel_->setArg(5, static_cast(out_dims_h)); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(in_dims_w)); + status = kernel_->setArg(6, static_cast(in_dims_w)); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(out_dims_w)); + status = kernel_->setArg(7, static_cast(out_dims_w)); CL_CHECK_FATAL(status); #ifndef LITE_SHUTDOWN_LOG @@ -110,7 +108,7 @@ class NearestInterpComputeImageDefault static_cast(default_work_size.data()[1]), static_cast(default_work_size.data()[2])}; status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size, cl::NullRange, @@ -125,6 +123,7 @@ class NearestInterpComputeImageDefault std::string build_options_{" -DCL_DTYPE_half"}; std::string time_stamp_{GetTimeStamp()}; std::shared_ptr event_{new cl::Event}; + std::shared_ptr kernel_; }; } // namespace opencl diff --git a/lite/kernels/opencl/pool_image_compute.cc b/lite/kernels/opencl/pool_image_compute.cc index 34524122c8e475df63db02eae32b7d100abfa2d9..602c823679ffeaf3e258645a6781accb3e233710 100644 --- a/lite/kernels/opencl/pool_image_compute.cc +++ b/lite/kernels/opencl/pool_image_compute.cc @@ -46,7 +46,7 @@ class PoolComputeImage2D : public KernelLiteAs(); - context.cl_context()->AddKernel( + kernel_ = context.cl_context()->CreateKernel( kernel_func_name_, "image/pool_kernel.cl", build_options_, time_stamp_); } @@ -111,10 +111,6 @@ class PoolComputeImage2D : public KernelLiteGetKernel(kernel_key.str()); - int c_block = (out_dims[1] + 3) / 4; int w = out_dims[3]; int nh = out_dims[0] * out_dims[2]; @@ -124,34 +120,33 @@ class PoolComputeImage2D : public KernelLitesetArg(0, *x_img); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *out_img); + status = kernel_->setArg(1, *out_img); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(in_dims[2])); + status = kernel_->setArg(2, static_cast(in_dims[2])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(in_dims[3])); + status = kernel_->setArg(3, static_cast(in_dims[3])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(out_dims[2])); + status = kernel_->setArg(4, static_cast(out_dims[2])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(out_dims[3])); + status = kernel_->setArg(5, static_cast(out_dims[3])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(ksize[0])); + status = kernel_->setArg(6, static_cast(ksize[0])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(ksize[1])); + status = kernel_->setArg(7, static_cast(ksize[1])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(strides[0])); + status = kernel_->setArg(8, static_cast(strides[0])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(strides[1])); + status = kernel_->setArg(9, static_cast(strides[1])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(paddings[2])); + status = kernel_->setArg(10, static_cast(paddings[2])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(paddings[0])); + status = kernel_->setArg(11, static_cast(paddings[0])); CL_CHECK_FATAL(status); status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size, cl::NullRange, @@ -162,6 +157,7 @@ class PoolComputeImage2D : public KernelLite kernel_; std::string kernel_func_name_{"pool_"}; std::string build_options_{"-DCL_DTYPE_half"}; std::string time_stamp_{GetTimeStamp()}; diff --git a/lite/kernels/opencl/reshape_image_compute.cc b/lite/kernels/opencl/reshape_image_compute.cc index febb1c33d9c4df2cb58580a03bda1eff93ed4da7..7876aad8b26cc063fca931c3e6c5523ef9ae778e 100644 --- a/lite/kernels/opencl/reshape_image_compute.cc +++ b/lite/kernels/opencl/reshape_image_compute.cc @@ -36,10 +36,10 @@ class ReshapeComputeFloatImage : public KernelLiteAs(); VLOG(1) << "kernel_func_name_:" << kernel_func_name_; - context.cl_context()->AddKernel(kernel_func_name_, - "image/reshape_kernel.cl", - build_options_, - time_stamp_); + kernel_ = context.cl_context()->CreateKernel(kernel_func_name_, + "image/reshape_kernel.cl", + build_options_, + time_stamp_); } void Run() override { @@ -111,42 +111,38 @@ class ReshapeComputeFloatImage : public KernelLiteAs(); CHECK(context.cl_context() != nullptr); - STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_ << time_stamp_; - auto kernel = context.cl_context()->GetKernel(kernel_key.str()); #ifndef LITE_SHUTDOWN_LOG VLOG(4) << TargetToStr(x->target()); VLOG(4) << TargetToStr(param.output->target()); #endif - int arg_idx = 0; cl_int status; - status = kernel.setArg(arg_idx, *x_image); + status = kernel_->setArg(0, *x_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, *out_image); + status = kernel_->setArg(1, *out_image); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, out_C); + status = kernel_->setArg(2, out_C); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, out_H); + status = kernel_->setArg(3, out_H); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, out_W); + status = kernel_->setArg(4, out_W); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, in_W); + status = kernel_->setArg(5, in_W); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, in_H); + status = kernel_->setArg(6, in_H); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, in_Stride0); + status = kernel_->setArg(7, in_Stride0); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, in_Stride1); + status = kernel_->setArg(8, in_Stride1); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, in_Stride2); + status = kernel_->setArg(9, in_Stride2); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, out_Stride0); + status = kernel_->setArg(10, out_Stride0); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, out_Stride1); + status = kernel_->setArg(11, out_Stride1); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, out_Stride2); + status = kernel_->setArg(12, out_Stride2); CL_CHECK_FATAL(status); auto global_work_size = @@ -155,7 +151,7 @@ class ReshapeComputeFloatImage : public KernelLite(default_work_size.data()[2])}; status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size, cl::NullRange, @@ -170,6 +166,7 @@ class ReshapeComputeFloatImage : public KernelLite event_{new cl::Event}; + std::shared_ptr kernel_; }; } // namespace opencl diff --git a/lite/kernels/opencl/scale_image_compute.cc b/lite/kernels/opencl/scale_image_compute.cc index 97b56e68d47fcdf1647433f5e267c264fb36c5c2..9982bd68b3328fc4df4a67f6f610356ec2a54213 100644 --- a/lite/kernels/opencl/scale_image_compute.cc +++ b/lite/kernels/opencl/scale_image_compute.cc @@ -37,15 +37,11 @@ class ScaleComputeImage2D : public KernelLiteAs(); - context.cl_context()->AddKernel(kernel_func_name_, - "image/scale_kernel.cl", - build_options_, - time_stamp_); + kernel_ = context.cl_context()->CreateKernel(kernel_func_name_, + "image/scale_kernel.cl", + build_options_, + time_stamp_); VLOG(1) << "kernel_func_name_:" << kernel_func_name_; - - STL::stringstream kernel_key; - kernel_key << kernel_func_name_ << build_options_ << time_stamp_; - kernel_ = context.cl_context()->GetKernel(kernel_key.str()); } void ReInitWhenNeeded() override { @@ -82,19 +78,18 @@ class ScaleComputeImage2D : public KernelLiteAs(); CHECK(context.cl_context() != nullptr); - auto kernel = kernel_; cl_int status; - status = kernel.setArg(0, *x_img); + status = kernel_->setArg(0, *x_img); CL_CHECK_FATAL(status); - status = kernel.setArg(1, *out_img); + status = kernel_->setArg(1, *out_img); CL_CHECK_FATAL(status); - status = kernel.setArg(2, scale); + status = kernel_->setArg(2, scale); CL_CHECK_FATAL(status); - status = kernel.setArg(3, bias); + status = kernel_->setArg(3, bias); CL_CHECK_FATAL(status); status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel( - kernel, + *(kernel_.get()), cl::NullRange, global_work_size_, cl::NullRange, @@ -111,7 +106,7 @@ class ScaleComputeImage2D : public KernelLite event_{new cl::Event}; param_t* scale_param_{nullptr}; - cl::Kernel kernel_; + std::shared_ptr kernel_; bool first_epoch_for_reinit_{true}; DDim last_x_dims_; DDim out_img_shape_ = DDim(std::vector(