未验证 提交 0bd68c1b 编写于 作者: X xiebaiyuan 提交者: GitHub

[LITE][OPENCL][Image] lazy init cl::event (#3393)

* [LITE][OPENCL][Image] lazy init cl::event , test=develop

* [LITE][OPENCL][Image] lazy init cl::event , test=develop
上级 0d178c59
......@@ -62,6 +62,7 @@ class ReluCompute
CL_CHECK_FATAL(status);
auto global_work_size = cl::NDRange{count};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -77,7 +78,7 @@ class ReluCompute
std::string kernel_func_name_{"relu"};
std::string build_options_{"-DCL_DTYPE_float -DRELU"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
class SigmoidCompute
......@@ -120,6 +121,7 @@ class SigmoidCompute
CL_CHECK_FATAL(status);
auto global_work_size = cl::NDRange{count};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -135,7 +137,7 @@ class SigmoidCompute
std::string kernel_func_name_{"sigmoid"};
std::string build_options_{"-DCL_DTYPE_float -DSIGMOID"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -147,6 +147,7 @@ class ActivationComputeImageDefault
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -174,7 +175,7 @@ class ActivationComputeImageDefault
static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
} // namespace kernels
......
......@@ -142,6 +142,7 @@ class BilinearInterpImageCompute
static_cast<cl::size_type>(default_work_size[1]),
static_cast<cl::size_type>(default_work_size[2])};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -162,7 +163,7 @@ class BilinearInterpImageCompute
std::string kernel_func_name_{"bilinear_interp"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -120,6 +120,7 @@ class BoxCoderComputeImage : public KernelLite<TARGET(kOpenCL),
cl::NDRange{static_cast<cl::size_type>(default_work_size[0]),
static_cast<cl::size_type>(default_work_size[2])};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -141,7 +142,7 @@ class BoxCoderComputeImage : public KernelLite<TARGET(kOpenCL),
param_t* boxcoder_param_{nullptr};
std::string kernel_func_name_{};
std::string build_options_{" -DCL_DTYPE_half"};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -123,6 +123,7 @@ class ConcatCompute : public KernelLite<TARGET(kOpenCL),
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, total1);
CL_CHECK_FATAL(status);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -156,6 +157,7 @@ class ConcatCompute : public KernelLite<TARGET(kOpenCL),
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, total0);
CL_CHECK_FATAL(status);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -180,7 +182,7 @@ class ConcatCompute : public KernelLite<TARGET(kOpenCL),
std::string kernel_func_name_{};
std::string build_options_{"-DCL_DTYPE_float"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -187,6 +187,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, width_);
CL_CHECK_FATAL(status);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -230,6 +231,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
status = kernel.setArg(++arg_idx, width_);
CL_CHECK_FATAL(status);
CL_CHECK_FATAL(status);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -254,7 +256,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
std::string kernel_func_name_{};
std::string build_options_{" -DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -205,6 +205,7 @@ void ConvCompute::GemmlikeConv2d() {
CL_CHECK_FATAL(status);
auto global_work_size = cl::NDRange{static_cast<size_t>(out_stride)};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
img2col_kernel,
cl::NullRange,
......@@ -300,6 +301,7 @@ void ConvCompute::GemmBatched(cl::Kernel& kernel,
status = kernel.setArg(++arg_idx, batch_size);
CL_CHECK_FATAL(status);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......
......@@ -57,7 +57,7 @@ class ConvCompute
std::vector<std::string> kernel_func_paths_{};
std::vector<std::string> build_options_{};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -544,6 +544,7 @@ void ConvImageCompute::Conv2d1x1opt(bool is_turn) {
status = kernel.setArg(++arg_idx, default_w_blk_);
CL_CHECK_FATAL(status);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -709,6 +710,7 @@ void ConvImageCompute::Conv2d3x3(bool is_turn) {
<< global_work_size_[1] << "," << global_work_size_[2] << "}";
#endif
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -836,6 +838,7 @@ void ConvImageCompute::Conv2d3x3opt(bool is_turn) {
<< global_work_size_[1] << "," << global_work_size_[2] << "}";
#endif
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -967,6 +970,7 @@ void ConvImageCompute::Conv2d5x5(bool is_turn) {
<< global_work_size_[1] << "," << global_work_size_[2] << "}";
#endif
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -1086,6 +1090,7 @@ void ConvImageCompute::Conv2d5x5opt(bool is_turn) {
// VLOG(4) << "out_image: " << out_image;
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -1217,6 +1222,7 @@ void ConvImageCompute::Conv2d7x7(bool is_turn) {
<< global_work_size_[1] << "," << global_work_size_[2] << "}";
#endif
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -1333,6 +1339,7 @@ void ConvImageCompute::Conv2d7x7opt(bool is_turn) {
status = kernel.setArg(++arg_idx, output_height);
CL_CHECK_FATAL(status);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -1417,6 +1424,7 @@ void ConvImageCompute::DepthwiseConv2d3x3s1(bool is_turn) {
status = kernel.setArg(++arg_idx, static_cast<const int>(output_dims[2]));
CL_CHECK_FATAL(status);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -1515,6 +1523,7 @@ void ConvImageCompute::DepthwiseConv2d3x3(bool is_turn) {
status = kernel.setArg(++arg_idx, static_cast<const int>(output_dims[2]));
CL_CHECK_FATAL(status);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -1652,6 +1661,7 @@ void ConvImageCompute::DepthwiseConv2d(bool is_turn) {
<< global_work_size_[1] << "," << global_work_size_[2] << "}";
#endif
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......
......@@ -58,7 +58,7 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL),
std::vector<std::string> kernel_func_paths_{};
std::vector<std::string> build_options_{};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
std::unique_ptr<Tensor> filter_gpu_image_{nullptr};
std::unique_ptr<Tensor> bias_gpu_image_{nullptr};
cl::NDRange global_work_size_ = cl::NDRange{
......
......@@ -108,6 +108,7 @@ class DepthwiseConv2dCompute
status = kernel.setArg(++arg_idx, *bias_buf);
CL_CHECK_FATAL(status);
auto global_work_size = cl::NDRange(static_cast<size_t>(numel));
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -123,7 +124,7 @@ class DepthwiseConv2dCompute
std::string kernel_func_name_{"depthwise_conv2d"};
std::string build_options_{"-DCL_DTYPE_float"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -89,6 +89,7 @@ class DropoutComputeImage2D : public KernelLite<TARGET(kOpenCL),
static_cast<cl::size_type>(default_work_size.data()[1]),
static_cast<cl::size_type>(default_work_size.data()[2])};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -104,7 +105,7 @@ class DropoutComputeImage2D : public KernelLite<TARGET(kOpenCL),
std::string kernel_func_name_{"dropout"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -63,6 +63,7 @@ void ElementwiseAddCompute::Run() {
CL_CHECK_FATAL(status);
auto global_work_size = cl::NDRange{channels_, batch_};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......
......@@ -48,7 +48,7 @@ class ElementwiseAddCompute
std::string kernel_func_name_{"elementwise_add"};
std::string build_options_{"-DCL_DTYPE_float"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -153,6 +153,7 @@ void ElementwiseAddImageCompute::Run() {
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......
......@@ -63,7 +63,7 @@ class ElementwiseAddImageCompute
cl::Kernel kernel_;
cl::NDRange global_work_size_ = cl::NDRange{
static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -150,7 +150,8 @@ void ElementwiseMulFloatImageCompute::Run() {
auto global_work_size = cl::NDRange{static_cast<cl::size_type>(x_img_width),
static_cast<cl::size_type>(x_img_height)};
auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
event_ = std::shared_ptr<cl::Event>(new cl::Event);
auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel
kernel,
cl::NullRange,
global_work_size,
......
......@@ -185,6 +185,7 @@ class ElementwiseMulImageCompute
auto global_work_size =
cl::NDRange{static_cast<cl::size_type>(x_img_width),
static_cast<cl::size_type>(x_img_height)};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -204,7 +205,7 @@ class ElementwiseMulImageCompute
std::string kernel_func_name_{"elementwise_mul"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -138,6 +138,7 @@ void ElementwiseSubImageCompute::Run() {
VLOG(4) << "global_work_size:[2D]:" << x_img_width << " " << x_img_height;
#endif
event_ = std::shared_ptr<cl::Event>(new cl::Event);
auto status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......
......@@ -46,7 +46,7 @@ class ElementwiseSubImageCompute
std::string kernel_func_name_{"elementwise_sub"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -123,6 +123,7 @@ class FcCompute
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -144,7 +145,7 @@ class FcCompute
DDim last_x_dims_;
cl::NDRange global_work_size_;
cl::Kernel kernel_;
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -130,6 +130,7 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -153,7 +154,7 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -137,6 +137,7 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
status = kernel.setArg(7, *out_img);
CL_CHECK_FATAL(status);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -259,6 +260,7 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
status = kernel.setArg(arg_idx++, in_w);
CL_CHECK_FATAL(status);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -276,7 +278,7 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
std::string kernel_func_name_{"instance_norm_onnx"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
Tensor scale_image_;
Tensor bias_image_;
};
......
......@@ -122,6 +122,7 @@ class LayoutComputeBufferChwToImageDefault
cl::NDRange{static_cast<cl::size_type>((new_dims[1] + 3) / 4),
static_cast<cl::size_type>(new_dims[3]),
static_cast<cl::size_type>(new_dims[0] * new_dims[2])};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -141,7 +142,7 @@ class LayoutComputeBufferChwToImageDefault
private:
std::string kernel_func_name_{"buffer_to_image2d"};
std::string build_options_{"-DCL_DTYPE_float"};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
// [ImageDefault] -> [NCHW]
......@@ -229,6 +230,7 @@ class LayoutComputeImageDefaultToBufferChw
cl::NDRange{static_cast<cl::size_type>((new_dims[1] + 3) / 4),
static_cast<cl::size_type>(new_dims[3]),
static_cast<cl::size_type>(new_dims[0] * new_dims[2])};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -248,7 +250,7 @@ class LayoutComputeImageDefaultToBufferChw
private:
std::string kernel_func_name_{"image2d_to_buffer"};
std::string build_options_{"-DCL_DTYPE_float"};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
// [NCHW] -> [ImageDW]
......@@ -323,6 +325,7 @@ class LayoutComputeBufferChwToImage2DNw
cl::NDRange{static_cast<cl::size_type>((out_N + 3) / 4), // N blocks
static_cast<cl::size_type>(out_W), // w
static_cast<cl::size_type>(out_C * out_H)}; // ch
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -341,7 +344,7 @@ class LayoutComputeBufferChwToImage2DNw
private:
std::string kernel_func_name_{"buffer_to_image2d_nw"};
std::string build_options_{"-DCL_DTYPE_float "};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -128,6 +128,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
static_cast<cl::size_type>(default_work_size[1]),
static_cast<cl::size_type>(default_work_size[2])};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -153,7 +154,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
std::string kernel_func_name_{"lrn"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -91,6 +91,7 @@ class MulCompute
auto global_work_size = cl::NDRange{static_cast<size_t>((m_ + 3) / 4),
static_cast<size_t>((n_ + 3) / 4)};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -107,7 +108,7 @@ class MulCompute
std::string kernel_func_name_{"mat_mul"};
std::string build_options_{"-DCL_DTYPE_float"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -109,6 +109,7 @@ class NearestInterpComputeImageDefault
cl::NDRange{static_cast<cl::size_type>(default_work_size.data()[0]),
static_cast<cl::size_type>(default_work_size.data()[1]),
static_cast<cl::size_type>(default_work_size.data()[2])};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -124,7 +125,7 @@ class NearestInterpComputeImageDefault
std::string kernel_func_name_{"nearest_interp"};
std::string build_options_{" -DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -142,6 +142,7 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
static_cast<cl::size_type>(default_work_size[1]),
static_cast<cl::size_type>(default_work_size[2])};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -162,7 +163,7 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
std::string kernel_func_name_{};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -105,6 +105,7 @@ class PoolCompute
status = kernel.setArg(++arg_idx, *output_buf);
CL_CHECK_FATAL(status);
auto global_work_size = cl::NDRange(static_cast<size_t>(numel));
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -120,7 +121,7 @@ class PoolCompute
std::string kernel_func_name_{"pool_"};
std::string build_options_{"-DCL_DTYPE_float"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -150,6 +150,7 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
status = kernel.setArg(++arg_idx, static_cast<const int>(paddings[0]));
CL_CHECK_FATAL(status);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -165,7 +166,7 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
std::string kernel_func_name_{"pool_"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -154,6 +154,7 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
static_cast<size_t>(default_work_size.data()[1]),
static_cast<size_t>(default_work_size.data()[2])};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -169,7 +170,7 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
std::string kernel_func_name_{"reshape"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
......@@ -93,6 +93,7 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
status = kernel.setArg(3, bias);
CL_CHECK_FATAL(status);
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -108,7 +109,7 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
std::string kernel_func_name_{"scale"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
param_t* scale_param_{nullptr};
cl::Kernel kernel_;
......
......@@ -96,6 +96,7 @@ class SliceComputeImage2D : public KernelLite<TARGET(kOpenCL),
static_cast<cl::size_type>(default_work_size.data()[1]),
static_cast<cl::size_type>(default_work_size.data()[2])};
event_ = std::shared_ptr<cl::Event>(new cl::Event);
status = context.cl_context()->GetCommandQueue().enqueueNDRangeKernel(
kernel,
cl::NullRange,
......@@ -111,7 +112,7 @@ class SliceComputeImage2D : public KernelLite<TARGET(kOpenCL),
std::string kernel_func_name_{"slice"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
std::shared_ptr<cl::Event> event_{nullptr};
};
} // namespace opencl
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册