diff --git a/src/operators/kernel/cl/relu_kernel.cpp b/src/operators/kernel/cl/relu_kernel.cpp index 1871e8ae728b353f6819a51bfcb96b7fe5a942ba..6ad5ba56da2609d444d5edf7647a1d2941776b79 100644 --- a/src/operators/kernel/cl/relu_kernel.cpp +++ b/src/operators/kernel/cl/relu_kernel.cpp @@ -50,9 +50,12 @@ void ReluKernel::Compute(const ReluParam& param) { // clSetKernelArg(kernel_p1, 1, sizeof(cl_mem), &outputImage); const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()}; - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, + cl_event out_event = param.Out()->GetClEvent(); + cl_event wait_event = param.InputX()->GetClEvent(); + + clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL, - work_size, NULL, 0, NULL, NULL); + work_size, NULL, 1, &wait_event, &out_event); // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3, // NULL, // work_size, NULL, 0, NULL, NULL); diff --git a/src/operators/kernel/cl/reshape_kernel.cpp b/src/operators/kernel/cl/reshape_kernel.cpp index 98ac780dbd477742815aacabcd21ad21d82f0ac6..d82d810cc2e9bf9e94b67d948f87e090cc45e152 100644 --- a/src/operators/kernel/cl/reshape_kernel.cpp +++ b/src/operators/kernel/cl/reshape_kernel.cpp @@ -55,8 +55,11 @@ void ReshapeKernel::Compute(const ReshapeParam ¶m) { clSetKernelArg(kernel, 9, sizeof(cl_int), &odims[1]); const size_t work_size[2] = {output->ImageWidth(), output->ImageHeight()}; + cl_event out_event = param.Out()->GetClEvent(); + cl_event wait_event = param.InputX()->GetClEvent(); + clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL, - work_size, NULL, 0, NULL, NULL); + work_size, NULL, 1, &wait_event, &out_event); } template class ReshapeKernel; diff --git a/src/operators/kernel/cl/softmax_kernel.cpp b/src/operators/kernel/cl/softmax_kernel.cpp index c3384d956765663f10099dd0a95e32059a0ac753..7d99ad9e1c979dfabeb85f6e5bfbb076dd896ac7 100644 --- a/src/operators/kernel/cl/softmax_kernel.cpp +++ b/src/operators/kernel/cl/softmax_kernel.cpp @@ -55,8 +55,12 @@ void SoftmaxKernel::Compute(const SoftmaxParam ¶m) { // clSetKernelArg(kernel, 4, sizeof(int), &dims[2]); // clSetKernelArg(kernel, 5, sizeof(int), &dims[3]); + cl_event out_event = param.Out()->GetClEvent(); + cl_event wait_event = param.InputX()->GetClEvent(); + status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL, - default_work_size.data(), NULL, 0, NULL, NULL); + default_work_size.data(), NULL, 1, &wait_event, &out_event); + CL_CHECK_ERRORS(status); }