diff --git a/src/operators/kernel/cl/batchnorm_kernel.cpp b/src/operators/kernel/cl/batchnorm_kernel.cpp index b3ef2027825d2f02ba27d6b3ad9d4364e4eccdb1..2da70637968b0da10fec301e454bf359a6f5bb56 100644 --- a/src/operators/kernel/cl/batchnorm_kernel.cpp +++ b/src/operators/kernel/cl/batchnorm_kernel.cpp @@ -86,8 +86,11 @@ void BatchNormKernel::Compute( clSetKernelArg(kernel, 4, sizeof(cl_mem), &new_bias); clSetKernelArg(kernel, 5, sizeof(cl_mem), &out); + cl_event out_event = param.OutputY()->GetClEvent(); + cl_event wait_event = param.InputX()->GetClEvent(); clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, - default_work_size.data(), NULL, 0, NULL, NULL); + default_work_size.data(), NULL, 1, &wait_event, + &out_event); } template class BatchNormKernel; diff --git a/src/operators/kernel/cl/fetch_kernel.cpp b/src/operators/kernel/cl/fetch_kernel.cpp index 23c5e8de4616c0fb014bd8c696c8eeb1b8ea06c1..c6f8e78361842008fbb841b1751bdfea1ca2d18b 100644 --- a/src/operators/kernel/cl/fetch_kernel.cpp +++ b/src/operators/kernel/cl/fetch_kernel.cpp @@ -73,8 +73,9 @@ void FetchKernel::Compute(const FetchParam ¶m) { clSetKernelArg(kernel, 6, sizeof(int), &size_batch); } + cl_event wait_event = param.InputX()->GetClEvent(); clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, - default_work_size.data(), NULL, 0, NULL, NULL); + default_work_size.data(), NULL, 1, &wait_event, NULL); memcpy(out->data(), out_cl_tensor.Data(), out->memory_size()); } diff --git a/src/operators/kernel/cl/pool_kernel.cpp b/src/operators/kernel/cl/pool_kernel.cpp index 802de26e6147aa0bf5d9467c6c6cab0f0148fe59..904bb0336a4de506fe4ac1dd4b915b6a125e6715 100644 --- a/src/operators/kernel/cl/pool_kernel.cpp +++ b/src/operators/kernel/cl/pool_kernel.cpp @@ -63,8 +63,10 @@ void PoolKernel::Compute(const PoolParam ¶m) { clSetKernelArg(kernel, 10, sizeof(cl_mem), &input); clSetKernelArg(kernel, 11, sizeof(cl_mem), &out); + cl_event out_event = param.Output()->GetClEvent(); + cl_event wait_event = param.Input()->GetClEvent(); clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, - default_work_size.data(), NULL, 0, NULL, NULL); + default_work_size.data(), NULL, 1, &wait_event, &out_event); } template class PoolKernel;