提交 1bb55cbb 编写于 作者: Z zhaojiaying01

add wait event for kernel

上级 56ce4aaf
......@@ -86,8 +86,11 @@ void BatchNormKernel<GPU_CL, float>::Compute(
clSetKernelArg(kernel, 4, sizeof(cl_mem), &new_bias);
clSetKernelArg(kernel, 5, sizeof(cl_mem), &out);
cl_event out_event = param.OutputY()->GetClEvent();
cl_event wait_event = param.InputX()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
default_work_size.data(), NULL, 1, &wait_event,
&out_event);
}
template class BatchNormKernel<GPU_CL, float>;
......
......@@ -73,8 +73,9 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> &param) {
clSetKernelArg(kernel, 6, sizeof(int), &size_batch);
}
cl_event wait_event = param.InputX()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
default_work_size.data(), NULL, 1, &wait_event, NULL);
memcpy(out->data<float>(), out_cl_tensor.Data<float>(), out->memory_size());
}
......
......@@ -63,8 +63,10 @@ void PoolKernel<GPU_CL, float>::Compute(const PoolParam<GPU_CL> &param) {
clSetKernelArg(kernel, 10, sizeof(cl_mem), &input);
clSetKernelArg(kernel, 11, sizeof(cl_mem), &out);
cl_event out_event = param.Output()->GetClEvent();
cl_event wait_event = param.Input()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
default_work_size.data(), NULL, 1, &wait_event, &out_event);
}
template class PoolKernel<GPU_CL, float>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册